diff options
Diffstat (limited to 'bdb/hash')
-rw-r--r-- | bdb/hash/hash.c | 2062 | ||||
-rw-r--r-- | bdb/hash/hash.src | 266 | ||||
-rw-r--r-- | bdb/hash/hash_conv.c | 116 | ||||
-rw-r--r-- | bdb/hash/hash_dup.c | 891 | ||||
-rw-r--r-- | bdb/hash/hash_func.c | 245 | ||||
-rw-r--r-- | bdb/hash/hash_meta.c | 125 | ||||
-rw-r--r-- | bdb/hash/hash_method.c | 126 | ||||
-rw-r--r-- | bdb/hash/hash_open.c | 558 | ||||
-rw-r--r-- | bdb/hash/hash_page.c | 1862 | ||||
-rw-r--r-- | bdb/hash/hash_rec.c | 1156 | ||||
-rw-r--r-- | bdb/hash/hash_reclaim.c | 111 | ||||
-rw-r--r-- | bdb/hash/hash_stat.c | 372 | ||||
-rw-r--r-- | bdb/hash/hash_upgrade.c | 266 | ||||
-rw-r--r-- | bdb/hash/hash_verify.c | 1079 |
14 files changed, 0 insertions, 9235 deletions
diff --git a/bdb/hash/hash.c b/bdb/hash/hash.c deleted file mode 100644 index 2f972a3238d..00000000000 --- a/bdb/hash/hash.c +++ /dev/null @@ -1,2062 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2002 - * Sleepycat Software. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994 - * Margo Seltzer. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "db_config.h" - -#ifndef lint -static const char revid[] = "$Id: hash.c,v 11.166 2002/08/06 06:11:25 bostic Exp $"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <stdlib.h> -#include <string.h> -#endif - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_shash.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/lock.h" - -static int __ham_bulk __P((DBC *, DBT *, u_int32_t)); -static int __ham_c_close __P((DBC *, db_pgno_t, int *)); -static int __ham_c_del __P((DBC *)); -static int __ham_c_destroy __P((DBC *)); -static int __ham_c_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); -static int __ham_c_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); -static int __ham_c_writelock __P((DBC *)); -static int __ham_dup_return __P((DBC *, DBT *, u_int32_t)); -static int __ham_expand_table __P((DBC *)); -static int __ham_lookup __P((DBC *, - const DBT *, u_int32_t, db_lockmode_t, db_pgno_t *)); -static int __ham_overwrite __P((DBC *, DBT *, u_int32_t)); - -/* - * __ham_quick_delete -- - * When performing a DB->del operation that does not involve secondary - * indices and is not removing an off-page duplicate tree, we can - * speed things up substantially by removing the entire duplicate - * set, if any is present, in one operation, rather than by conjuring - * up and deleting each of the items individually. (All are stored - * in one big HKEYDATA structure.) We don't bother to distinguish - * on-page duplicate sets from single, non-dup items; they're deleted - * in exactly the same way. - * - * This function is called by __db_delete when the appropriate - * conditions are met, and it performs the delete in the optimized way. - * - * The cursor should be set to the first item in the duplicate - * set, or to the sole key/data pair when the key does not have a - * duplicate set, before the function is called. - * - * PUBLIC: int __ham_quick_delete __P((DBC *)); - */ -int -__ham_quick_delete(dbc) - DBC *dbc; -{ - int ret, t_ret; - - if ((ret = __ham_get_meta(dbc)) != 0) - return (ret); - - /* Assert that we're not using secondary indices. */ - DB_ASSERT(!F_ISSET(dbc->dbp, DB_AM_SECONDARY)); - /* - * We should assert that we're not a primary either, but that - * would require grabbing the dbp's mutex, so we don't bother. - */ - - /* Assert that we're set, but not to an off-page duplicate. */ - DB_ASSERT(IS_INITIALIZED(dbc)); - DB_ASSERT(((HASH_CURSOR *)dbc->internal)->opd == NULL); - - ret = __ham_del_pair(dbc, 1); - - if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* ****************** CURSORS ********************************** */ -/* - * __ham_c_init -- - * Initialize the hash-specific portion of a cursor. - * - * PUBLIC: int __ham_c_init __P((DBC *)); - */ -int -__ham_c_init(dbc) - DBC *dbc; -{ - DB_ENV *dbenv; - HASH_CURSOR *new_curs; - int ret; - - dbenv = dbc->dbp->dbenv; - if ((ret = __os_calloc(dbenv, - 1, sizeof(struct cursor_t), &new_curs)) != 0) - return (ret); - if ((ret = __os_malloc(dbenv, - dbc->dbp->pgsize, &new_curs->split_buf)) != 0) { - __os_free(dbenv, new_curs); - return (ret); - } - - dbc->internal = (DBC_INTERNAL *) new_curs; - dbc->c_close = __db_c_close; - dbc->c_count = __db_c_count; - dbc->c_del = __db_c_del; - dbc->c_dup = __db_c_dup; - dbc->c_get = dbc->c_real_get = __db_c_get; - dbc->c_pget = __db_c_pget; - dbc->c_put = __db_c_put; - dbc->c_am_bulk = __ham_bulk; - dbc->c_am_close = __ham_c_close; - dbc->c_am_del = __ham_c_del; - dbc->c_am_destroy = __ham_c_destroy; - dbc->c_am_get = __ham_c_get; - dbc->c_am_put = __ham_c_put; - dbc->c_am_writelock = __ham_c_writelock; - - __ham_item_init(dbc); - - return (0); -} - -/* - * __ham_c_close -- - * Close down the cursor from a single use. - */ -static int -__ham_c_close(dbc, root_pgno, rmroot) - DBC *dbc; - db_pgno_t root_pgno; - int *rmroot; -{ - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - HKEYDATA *dp; - int doroot, gotmeta, ret, t_ret; - u_int32_t dirty; - - COMPQUIET(rmroot, 0); - mpf = dbc->dbp->mpf; - dirty = 0; - doroot = gotmeta = ret = 0; - hcp = (HASH_CURSOR *) dbc->internal; - - /* Check for off page dups. */ - if (dbc->internal->opd != NULL) { - if ((ret = __ham_get_meta(dbc)) != 0) - goto done; - gotmeta = 1; - if ((ret = __ham_get_cpage(dbc, DB_LOCK_READ)) != 0) - goto out; - dp = (HKEYDATA *)H_PAIRDATA(dbc->dbp, hcp->page, hcp->indx); - - /* If its not a dup we aborted before we changed it. */ - if (HPAGE_PTYPE(dp) == H_OFFDUP) - memcpy(&root_pgno, - HOFFPAGE_PGNO(dp), sizeof(db_pgno_t)); - else - root_pgno = PGNO_INVALID; - - if ((ret = - hcp->opd->c_am_close(hcp->opd, root_pgno, &doroot)) != 0) - goto out; - if (doroot != 0) { - if ((ret = __ham_del_pair(dbc, 1)) != 0) - goto out; - dirty = DB_MPOOL_DIRTY; - } - } - -out: if (hcp->page != NULL && (t_ret = - mpf->put(mpf, hcp->page, dirty)) != 0 && ret == 0) - ret = t_ret; - if (gotmeta != 0 && (t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) - ret = t_ret; - -done: - __ham_item_init(dbc); - return (ret); -} - -/* - * __ham_c_destroy -- - * Cleanup the access method private part of a cursor. - */ -static int -__ham_c_destroy(dbc) - DBC *dbc; -{ - HASH_CURSOR *hcp; - - hcp = (HASH_CURSOR *)dbc->internal; - if (hcp->split_buf != NULL) - __os_free(dbc->dbp->dbenv, hcp->split_buf); - __os_free(dbc->dbp->dbenv, hcp); - - return (0); -} - -/* - * __ham_c_count -- - * Return a count of on-page duplicates. - * - * PUBLIC: int __ham_c_count __P((DBC *, db_recno_t *)); - */ -int -__ham_c_count(dbc, recnop) - DBC *dbc; - db_recno_t *recnop; -{ - DB *dbp; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - db_indx_t len; - db_recno_t recno; - int ret, t_ret; - u_int8_t *p, *pend; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - recno = 0; - - if ((ret = __ham_get_cpage(dbc, DB_LOCK_READ)) != 0) - return (ret); - - switch (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))) { - case H_KEYDATA: - case H_OFFPAGE: - recno = 1; - break; - case H_DUPLICATE: - p = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); - pend = p + - LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); - for (; p < pend; recno++) { - /* p may be odd, so copy rather than just dereffing */ - memcpy(&len, p, sizeof(db_indx_t)); - p += 2 * sizeof(db_indx_t) + len; - } - - break; - default: - ret = __db_pgfmt(dbp->dbenv, hcp->pgno); - goto err; - } - - *recnop = recno; - -err: if ((t_ret = mpf->put(mpf, hcp->page, 0)) != 0 && ret == 0) - ret = t_ret; - hcp->page = NULL; - return (ret); -} - -static int -__ham_c_del(dbc) - DBC *dbc; -{ - DB *dbp; - DBT repldbt; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - int ret, t_ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - if (F_ISSET(hcp, H_DELETED)) - return (DB_NOTFOUND); - - if ((ret = __ham_get_meta(dbc)) != 0) - goto out; - - if ((ret = __ham_get_cpage(dbc, DB_LOCK_WRITE)) != 0) - goto out; - - /* Off-page duplicates. */ - if (HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) - goto out; - - if (F_ISSET(hcp, H_ISDUP)) { /* On-page duplicate. */ - if (hcp->dup_off == 0 && - DUP_SIZE(hcp->dup_len) == LEN_HDATA(dbp, hcp->page, - hcp->hdr->dbmeta.pagesize, hcp->indx)) - ret = __ham_del_pair(dbc, 1); - else { - repldbt.flags = 0; - F_SET(&repldbt, DB_DBT_PARTIAL); - repldbt.doff = hcp->dup_off; - repldbt.dlen = DUP_SIZE(hcp->dup_len); - repldbt.size = 0; - repldbt.data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, - hcp->indx)); - if ((ret = __ham_replpair(dbc, &repldbt, 0)) == 0) { - hcp->dup_tlen -= DUP_SIZE(hcp->dup_len); - F_SET(hcp, H_DELETED); - ret = __ham_c_update(dbc, - DUP_SIZE(hcp->dup_len), 0, 1); - } - } - - } else /* Not a duplicate */ - ret = __ham_del_pair(dbc, 1); - -out: if (hcp->page != NULL) { - if ((t_ret = mpf->put(mpf, - hcp->page, ret == 0 ? DB_MPOOL_DIRTY : 0)) && ret == 0) - ret = t_ret; - hcp->page = NULL; - } - if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -/* - * __ham_c_dup -- - * Duplicate a hash cursor, such that the new one holds appropriate - * locks for the position of the original. - * - * PUBLIC: int __ham_c_dup __P((DBC *, DBC *)); - */ -int -__ham_c_dup(orig_dbc, new_dbc) - DBC *orig_dbc, *new_dbc; -{ - HASH_CURSOR *orig, *new; - - orig = (HASH_CURSOR *)orig_dbc->internal; - new = (HASH_CURSOR *)new_dbc->internal; - - new->bucket = orig->bucket; - new->lbucket = orig->lbucket; - new->dup_off = orig->dup_off; - new->dup_len = orig->dup_len; - new->dup_tlen = orig->dup_tlen; - - if (F_ISSET(orig, H_DELETED)) - F_SET(new, H_DELETED); - if (F_ISSET(orig, H_ISDUP)) - F_SET(new, H_ISDUP); - - /* - * If the old cursor held a lock and we're not in transactions, get one - * for the new one. The reason that we don't need a new lock if we're - * in a transaction is because we already hold a lock and will continue - * to do so until commit, so there is no point in reaquiring it. We - * don't know if the old lock was a read or write lock, but it doesn't - * matter. We'll get a read lock. We know that this locker already - * holds a lock of the correct type, so if we need a write lock and - * request it, we know that we'll get it. - */ - if (!LOCK_ISSET(orig->lock) || orig_dbc->txn != NULL) - return (0); - - return (__ham_lock_bucket(new_dbc, DB_LOCK_READ)); -} - -static int -__ham_c_get(dbc, key, data, flags, pgnop) - DBC *dbc; - DBT *key; - DBT *data; - u_int32_t flags; - db_pgno_t *pgnop; -{ - DB *dbp; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - db_lockmode_t lock_type; - int get_key, ret, t_ret; - - hcp = (HASH_CURSOR *)dbc->internal; - dbp = dbc->dbp; - mpf = dbp->mpf; - - /* Clear OR'd in additional bits so we can check for flag equality. */ - if (F_ISSET(dbc, DBC_RMW)) - lock_type = DB_LOCK_WRITE; - else - lock_type = DB_LOCK_READ; - - if ((ret = __ham_get_meta(dbc)) != 0) - return (ret); - hcp->seek_size = 0; - - ret = 0; - get_key = 1; - switch (flags) { - case DB_PREV_NODUP: - F_SET(hcp, H_NEXT_NODUP); - /* FALLTHROUGH */ - case DB_PREV: - if (IS_INITIALIZED(dbc)) { - ret = __ham_item_prev(dbc, lock_type, pgnop); - break; - } - /* FALLTHROUGH */ - case DB_LAST: - ret = __ham_item_last(dbc, lock_type, pgnop); - break; - case DB_NEXT_NODUP: - F_SET(hcp, H_NEXT_NODUP); - /* FALLTHROUGH */ - case DB_NEXT: - if (IS_INITIALIZED(dbc)) { - ret = __ham_item_next(dbc, lock_type, pgnop); - break; - } - /* FALLTHROUGH */ - case DB_FIRST: - ret = __ham_item_first(dbc, lock_type, pgnop); - break; - case DB_NEXT_DUP: - /* cgetchk has already determined that the cursor is set. */ - F_SET(hcp, H_DUPONLY); - ret = __ham_item_next(dbc, lock_type, pgnop); - break; - case DB_SET: - case DB_SET_RANGE: - case DB_GET_BOTH: - case DB_GET_BOTH_RANGE: - ret = __ham_lookup(dbc, key, 0, lock_type, pgnop); - get_key = 0; - break; - case DB_GET_BOTHC: - F_SET(hcp, H_DUPONLY); - - ret = __ham_item_next(dbc, lock_type, pgnop); - get_key = 0; - break; - case DB_CURRENT: - /* cgetchk has already determined that the cursor is set. */ - if (F_ISSET(hcp, H_DELETED)) { - ret = DB_KEYEMPTY; - goto err; - } - - ret = __ham_item(dbc, lock_type, pgnop); - break; - } - - /* - * Must always enter this loop to do error handling and - * check for big key/data pair. - */ - for (;;) { - if (ret != 0 && ret != DB_NOTFOUND) - goto err; - else if (F_ISSET(hcp, H_OK)) { - if (*pgnop == PGNO_INVALID) - ret = __ham_dup_return(dbc, data, flags); - break; - } else if (!F_ISSET(hcp, H_NOMORE)) { - __db_err(dbp->dbenv, - "H_NOMORE returned to __ham_c_get"); - ret = EINVAL; - break; - } - - /* - * Ran out of entries in a bucket; change buckets. - */ - switch (flags) { - case DB_LAST: - case DB_PREV: - case DB_PREV_NODUP: - ret = mpf->put(mpf, hcp->page, 0); - hcp->page = NULL; - if (hcp->bucket == 0) { - ret = DB_NOTFOUND; - hcp->pgno = PGNO_INVALID; - goto err; - } - F_CLR(hcp, H_ISDUP); - hcp->bucket--; - hcp->indx = NDX_INVALID; - hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - if (ret == 0) - ret = __ham_item_prev(dbc, - lock_type, pgnop); - break; - case DB_FIRST: - case DB_NEXT: - case DB_NEXT_NODUP: - ret = mpf->put(mpf, hcp->page, 0); - hcp->page = NULL; - hcp->indx = NDX_INVALID; - hcp->bucket++; - F_CLR(hcp, H_ISDUP); - hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - if (hcp->bucket > hcp->hdr->max_bucket) { - ret = DB_NOTFOUND; - hcp->pgno = PGNO_INVALID; - goto err; - } - if (ret == 0) - ret = __ham_item_next(dbc, - lock_type, pgnop); - break; - case DB_GET_BOTH: - case DB_GET_BOTHC: - case DB_GET_BOTH_RANGE: - case DB_NEXT_DUP: - case DB_SET: - case DB_SET_RANGE: - /* Key not found. */ - ret = DB_NOTFOUND; - goto err; - case DB_CURRENT: - /* - * This should only happen if you are doing - * deletes and reading with concurrent threads - * and not doing proper locking. We return - * the same error code as we would if the - * cursor were deleted. - */ - ret = DB_KEYEMPTY; - goto err; - default: - DB_ASSERT(0); - } - } - - if (get_key == 0) - F_SET(key, DB_DBT_ISSET); - -err: if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) - ret = t_ret; - - F_CLR(hcp, H_DUPONLY); - F_CLR(hcp, H_NEXT_NODUP); - - return (ret); -} - -/* - * __ham_bulk -- Return bulk data from a hash table. - */ -static int -__ham_bulk(dbc, data, flags) - DBC *dbc; - DBT *data; - u_int32_t flags; -{ - DB *dbp; - DB_MPOOLFILE *mpf; - HASH_CURSOR *cp; - PAGE *pg; - db_indx_t dup_len, dup_off, dup_tlen, indx, *inp; - db_lockmode_t lock_mode; - db_pgno_t pgno; - int32_t *endp, key_off, *offp, *saveoff; - u_int32_t key_size, size, space; - u_int8_t *dbuf, *dp, *hk, *np, *tmp; - int is_dup, is_key; - int need_pg, next_key, no_dup, pagesize, ret, t_ret; - - ret = 0; - key_off = 0; - dup_len = dup_off = dup_tlen = 0; - size = 0; - dbp = dbc->dbp; - pagesize = dbp->pgsize; - mpf = dbp->mpf; - cp = (HASH_CURSOR *)dbc->internal; - is_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1 : 0; - next_key = is_key && LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP; - no_dup = LF_ISSET(DB_OPFLAGS_MASK) == DB_NEXT_NODUP; - dbuf = data->data; - np = dp = dbuf; - - /* Keep track of space that is left. There is an termination entry */ - space = data->ulen; - space -= sizeof(*offp); - - /* Build the offset/size table from the end up. */ - endp = (int32_t *) ((u_int8_t *)dbuf + data->ulen); - endp--; - offp = endp; - - key_size = 0; - lock_mode = F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE: DB_LOCK_READ; - -next_pg: - need_pg = 1; - indx = cp->indx; - pg = cp->page; - inp = P_INP(dbp, pg); - - do { - if (is_key) { - hk = H_PAIRKEY(dbp, pg, indx); - if (HPAGE_PTYPE(hk) == H_OFFPAGE) { - memcpy(&key_size, - HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); - memcpy(&pgno, - HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); - size = key_size; - if (key_size > space) - goto get_key_space; - if ((ret = __bam_bulk_overflow( - dbc, key_size, pgno, np)) != 0) - return (ret); - space -= key_size; - key_off = (int32_t)(np - dbuf); - np += key_size; - } else { - if (need_pg) { - dp = np; - size = pagesize - HOFFSET(pg); - if (space < size) { -get_key_space: - if (offp == endp) { - data->size = - ALIGN(size + - pagesize, - sizeof(u_int32_t)); - return (ENOMEM); - } - goto back_up; - } - memcpy(dp, - (u_int8_t *)pg + HOFFSET(pg), size); - need_pg = 0; - space -= size; - np += size; - } - key_size = LEN_HKEY(dbp, pg, pagesize, indx); - key_off = (int32_t)(inp[indx] - HOFFSET(pg) - + dp - dbuf + SSZA(HKEYDATA, data)); - } - } - - hk = H_PAIRDATA(dbp, pg, indx); - switch (HPAGE_PTYPE(hk)) { - case H_DUPLICATE: - case H_KEYDATA: - if (need_pg) { - dp = np; - size = pagesize - HOFFSET(pg); - if (space < size) { -back_up: - if (indx != 0) { - indx -= 2; - /* XXX - * It's not clear that this is - * the right way to fix this, - * but here goes. - * If we are backing up onto a - * duplicate, then we need to - * position ourselves at the - * end of the duplicate set. - * We probably need to make - * this work for H_OFFDUP too. - * It might be worth making a - * dummy cursor and calling - * __ham_item_prev. - */ - tmp = H_PAIRDATA(dbp, pg, indx); - if (HPAGE_PTYPE(tmp) == - H_DUPLICATE) { - dup_off = dup_tlen = - LEN_HDATA(dbp, pg, - pagesize, indx + 1); - memcpy(&dup_len, - HKEYDATA_DATA(tmp), - sizeof(db_indx_t)); - } - goto get_space; - } - /* indx == 0 */ - if ((ret = __ham_item_prev(dbc, - lock_mode, &pgno)) != 0) { - if (ret != DB_NOTFOUND) - return (ret); - if ((ret = mpf->put(mpf, - cp->page, 0)) != 0) - return (ret); - cp->page = NULL; - if (cp->bucket == 0) { - cp->indx = indx = - NDX_INVALID; - goto get_space; - } - if ((ret = - __ham_get_meta(dbc)) != 0) - return (ret); - - cp->bucket--; - cp->pgno = BUCKET_TO_PAGE(cp, - cp->bucket); - cp->indx = NDX_INVALID; - if ((ret = __ham_release_meta( - dbc)) != 0) - return (ret); - if ((ret = __ham_item_prev(dbc, - lock_mode, &pgno)) != 0) - return (ret); - } - indx = cp->indx; -get_space: - /* - * See if we put any data in the buffer. - */ - if (offp >= endp || - F_ISSET(dbc, DBC_TRANSIENT)) { - data->size = ALIGN(size + - data->ulen - space, - sizeof(u_int32_t)); - return (ENOMEM); - } - /* - * Don't continue; we're all out - * of space, even though we're - * returning success. - */ - next_key = 0; - break; - } - memcpy(dp, (u_int8_t *)pg + HOFFSET(pg), size); - need_pg = 0; - space -= size; - np += size; - } - - /* - * We're about to crack the offset(s) and length(s) - * out of an H_KEYDATA or H_DUPLICATE item. - * There are three cases: - * 1. We were moved into a duplicate set by - * the standard hash cursor code. Respect - * the dup_off and dup_tlen we were given. - * 2. We stumbled upon a duplicate set while - * walking the page on our own. We need to - * recognize it as a dup and set dup_off and - * dup_tlen. - * 3. The current item is not a dup. - */ - if (F_ISSET(cp, H_ISDUP)) { - /* Case 1 */ - is_dup = 1; - dup_len = cp->dup_len; - dup_off = cp->dup_off; - dup_tlen = cp->dup_tlen; - } else if (HPAGE_PTYPE(hk) == H_DUPLICATE) { - /* Case 2 */ - is_dup = 1; - /* - * If we run out of memory and bail, - * make sure the fact we're in a dup set - * isn't ignored later. - */ - F_SET(cp, H_ISDUP); - dup_off = 0; - memcpy(&dup_len, - HKEYDATA_DATA(hk), sizeof(db_indx_t)); - dup_tlen = LEN_HDATA(dbp, pg, pagesize, indx); - } else - /* Case 3 */ - is_dup = dup_len = dup_off = dup_tlen = 0; - - do { - space -= (is_key ? 4 : 2) * sizeof(*offp); - size += (is_key ? 4 : 2) * sizeof(*offp); - /* - * Since space is an unsigned, if we happen - * to wrap, then this comparison will turn out - * to be true. XXX Wouldn't it be better to - * simply check above that space is greater than - * the value we're about to subtract??? - */ - if (space > data->ulen) { - if (!is_dup || dup_off == 0) - goto back_up; - dup_off -= (db_indx_t)DUP_SIZE(offp[1]); - goto get_space; - } - if (is_key) { - *offp-- = key_off; - *offp-- = key_size; - } - if (is_dup) { - *offp-- = (int32_t)( - inp[indx + 1] - HOFFSET(pg) + - dp - dbuf + SSZA(HKEYDATA, data) + - dup_off + sizeof(db_indx_t)); - memcpy(&dup_len, - HKEYDATA_DATA(hk) + dup_off, - sizeof(db_indx_t)); - dup_off += DUP_SIZE(dup_len); - *offp-- = dup_len; - } else { - *offp-- = (int32_t)( - inp[indx + 1] - HOFFSET(pg) + - dp - dbuf + SSZA(HKEYDATA, data)); - *offp-- = LEN_HDATA(dbp, pg, - pagesize, indx); - } - } while (is_dup && dup_off < dup_tlen && no_dup == 0); - F_CLR(cp, H_ISDUP); - break; - case H_OFFDUP: - memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); - space -= 2 * sizeof(*offp); - if (space > data->ulen) - goto back_up; - - if (is_key) { - space -= 2 * sizeof(*offp); - if (space > data->ulen) - goto back_up; - *offp-- = key_off; - *offp-- = key_size; - } - saveoff = offp; - if ((ret = __bam_bulk_duplicates(dbc, - pgno, dbuf, is_key ? offp + 2 : NULL, - &offp, &np, &space, no_dup)) != 0) { - if (ret == ENOMEM) { - size = space; - if (is_key && saveoff == offp) { - offp += 2; - goto back_up; - } - goto get_space; - } - return (ret); - } - break; - case H_OFFPAGE: - space -= (is_key ? 4 : 2) * sizeof(*offp); - if (space > data->ulen) - goto back_up; - - memcpy(&size, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); - memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); - if (size > space) - goto back_up; - - if ((ret = - __bam_bulk_overflow(dbc, size, pgno, np)) != 0) - return (ret); - - if (is_key) { - *offp-- = key_off; - *offp-- = key_size; - } - - *offp-- = (int32_t)(np - dbuf); - *offp-- = size; - - np += size; - space -= size; - break; - } - } while (next_key && (indx += 2) < NUM_ENT(pg)); - - cp->indx = indx; - cp->dup_len = dup_len; - cp->dup_off = dup_off; - cp->dup_tlen = dup_tlen; - - /* If we are off the page then try to the next page. */ - if (ret == 0 && next_key && indx >= NUM_ENT(pg)) { - if ((ret = __ham_item_next(dbc, lock_mode, &pgno)) == 0) - goto next_pg; - if (ret != DB_NOTFOUND) - return (ret); - if ((ret = mpf->put(dbc->dbp->mpf, cp->page, 0)) != 0) - return (ret); - cp->page = NULL; - if ((ret = __ham_get_meta(dbc)) != 0) - return (ret); - - cp->bucket++; - if (cp->bucket > cp->hdr->max_bucket) { - /* - * Restore cursor to its previous state. We're past - * the last item in the last bucket, so the next - * DBC->c_get(DB_NEXT) will return DB_NOTFOUND. - */ - cp->bucket--; - ret = DB_NOTFOUND; - } else { - /* - * Start on the next bucket. - * - * Note that if this new bucket happens to be empty, - * but there's another non-empty bucket after it, - * we'll return early. This is a rare case, and we - * don't guarantee any particular number of keys - * returned on each call, so just let the next call - * to bulk get move forward by yet another bucket. - */ - cp->pgno = BUCKET_TO_PAGE(cp, cp->bucket); - cp->indx = NDX_INVALID; - F_CLR(cp, H_ISDUP); - ret = __ham_item_next(dbc, lock_mode, &pgno); - } - - if ((t_ret = __ham_release_meta(dbc)) != 0) - return (t_ret); - if (ret == 0) - goto next_pg; - if (ret != DB_NOTFOUND) - return (ret); - } - *offp = (u_int32_t) -1; - return (0); -} - -static int -__ham_c_put(dbc, key, data, flags, pgnop) - DBC *dbc; - DBT *key; - DBT *data; - u_int32_t flags; - db_pgno_t *pgnop; -{ - DB *dbp; - DB_MPOOLFILE *mpf; - DBT tmp_val, *myval; - HASH_CURSOR *hcp; - u_int32_t nbytes; - int ret, t_ret; - - /* - * The compiler doesn't realize that we only use this when ret is - * equal to 0 and that if ret is equal to 0, that we must have set - * myval. So, we initialize it here to shut the compiler up. - */ - COMPQUIET(myval, NULL); - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - if (F_ISSET(hcp, H_DELETED) && - flags != DB_KEYFIRST && flags != DB_KEYLAST) - return (DB_NOTFOUND); - - if ((ret = __ham_get_meta(dbc)) != 0) - goto err1; - - switch (flags) { - case DB_KEYLAST: - case DB_KEYFIRST: - case DB_NODUPDATA: - nbytes = (ISBIG(hcp, key->size) ? HOFFPAGE_PSIZE : - HKEYDATA_PSIZE(key->size)) + - (ISBIG(hcp, data->size) ? HOFFPAGE_PSIZE : - HKEYDATA_PSIZE(data->size)); - if ((ret = __ham_lookup(dbc, - key, nbytes, DB_LOCK_WRITE, pgnop)) == DB_NOTFOUND) { - ret = 0; - if (hcp->seek_found_page != PGNO_INVALID && - hcp->seek_found_page != hcp->pgno) { - if ((ret = mpf->put(mpf, hcp->page, 0)) != 0) - goto err2; - hcp->page = NULL; - hcp->pgno = hcp->seek_found_page; - hcp->indx = NDX_INVALID; - } - - if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) { - /* - * A partial put, but the key does not exist - * and we are not beginning the write at 0. - * We must create a data item padded up to doff - * and then write the new bytes represented by - * val. - */ - if ((ret = __ham_init_dbt(dbp->dbenv, &tmp_val, - data->size + data->doff, - &dbc->my_rdata.data, - &dbc->my_rdata.ulen)) == 0) { - memset(tmp_val.data, 0, data->doff); - memcpy((u_int8_t *)tmp_val.data + - data->doff, data->data, data->size); - myval = &tmp_val; - } - } else - myval = (DBT *)data; - - if (ret == 0) - ret = __ham_add_el(dbc, key, myval, H_KEYDATA); - goto done; - } - break; - case DB_BEFORE: - case DB_AFTER: - case DB_CURRENT: - ret = __ham_item(dbc, DB_LOCK_WRITE, pgnop); - break; - } - - if (*pgnop == PGNO_INVALID && ret == 0) { - if (flags == DB_CURRENT || - ((flags == DB_KEYFIRST || - flags == DB_KEYLAST || flags == DB_NODUPDATA) && - !(F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK)))) - ret = __ham_overwrite(dbc, data, flags); - else - ret = __ham_add_dup(dbc, data, flags, pgnop); - } - -done: if (ret == 0 && F_ISSET(hcp, H_EXPAND)) { - ret = __ham_expand_table(dbc); - F_CLR(hcp, H_EXPAND); - } - - if (hcp->page != NULL && - (t_ret = mpf->set(mpf, hcp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0) - ret = t_ret; - -err2: if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) - ret = t_ret; - -err1: return (ret); -} - -/********************************* UTILITIES ************************/ - -/* - * __ham_expand_table -- - */ -static int -__ham_expand_table(dbc) - DBC *dbc; -{ - DB *dbp; - DB_LOCK metalock; - DB_LSN lsn; - DB_MPOOLFILE *mpf; - DBMETA *mmeta; - HASH_CURSOR *hcp; - PAGE *h; - db_pgno_t pgno, mpgno; - u_int32_t newalloc, new_bucket, old_bucket; - int dirty_meta, got_meta, logn, new_double, ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - if ((ret = __ham_dirty_meta(dbc)) != 0) - return (ret); - - LOCK_INIT(metalock); - mmeta = (DBMETA *) hcp->hdr; - mpgno = mmeta->pgno; - h = NULL; - dirty_meta = 0; - got_meta = 0; - newalloc = 0; - - /* - * If the split point is about to increase, make sure that we - * have enough extra pages. The calculation here is weird. - * We'd like to do this after we've upped max_bucket, but it's - * too late then because we've logged the meta-data split. What - * we'll do between then and now is increment max bucket and then - * see what the log of one greater than that is; here we have to - * look at the log of max + 2. VERY NASTY STUFF. - * - * We figure out what we need to do, then we log it, then request - * the pages from mpool. We don't want to fail after extending - * the file. - * - * If the page we are about to split into has already been allocated, - * then we simply need to get it to get its LSN. If it hasn't yet - * been allocated, then we know it's LSN (0,0). - */ - - new_bucket = hcp->hdr->max_bucket + 1; - old_bucket = new_bucket & hcp->hdr->low_mask; - - new_double = hcp->hdr->max_bucket == hcp->hdr->high_mask; - logn = __db_log2(new_bucket); - - if (!new_double || hcp->hdr->spares[logn + 1] != PGNO_INVALID) { - /* Page exists; get it so we can get its LSN */ - pgno = BUCKET_TO_PAGE(hcp, new_bucket); - if ((ret = - mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &h)) != 0) - goto err; - lsn = h->lsn; - } else { - /* Get the master meta-data page to do allocation. */ - if (F_ISSET(dbp, DB_AM_SUBDB)) { - mpgno = PGNO_BASE_MD; - if ((ret = __db_lget(dbc, - 0, mpgno, DB_LOCK_WRITE, 0, &metalock)) != 0) - goto err; - if ((ret = - mpf->get(mpf, &mpgno, 0, (PAGE **)&mmeta)) != 0) - goto err; - got_meta = 1; - } - pgno = mmeta->last_pgno + 1; - ZERO_LSN(lsn); - newalloc = 1; - } - - /* Log the meta-data split first. */ - if (DBC_LOGGING(dbc)) { - /* - * We always log the page number of the first page of - * the allocation group. However, the LSN that we log - * is either the LSN on the first page (if we did not - * do the actual allocation here) or the LSN on the last - * page of the unit (if we did do the allocation here). - */ - if ((ret = __ham_metagroup_log(dbp, dbc->txn, - &lsn, 0, hcp->hdr->max_bucket, mpgno, &mmeta->lsn, - hcp->hdr->dbmeta.pgno, &hcp->hdr->dbmeta.lsn, - pgno, &lsn, newalloc)) != 0) - goto err; - } else - LSN_NOT_LOGGED(lsn); - - hcp->hdr->dbmeta.lsn = lsn; - - if (new_double && hcp->hdr->spares[logn + 1] == PGNO_INVALID) { - /* - * We need to begin a new doubling and we have not allocated - * any pages yet. Read the last page in and initialize it to - * make the allocation contiguous. The pgno we calculated - * above is the first page allocated. The entry in spares is - * that page number minus any buckets already allocated (it - * simplifies bucket to page transaction). After we've set - * that, we calculate the last pgno. - */ - - hcp->hdr->spares[logn + 1] = pgno - new_bucket; - pgno += hcp->hdr->max_bucket; - mmeta->last_pgno = pgno; - mmeta->lsn = lsn; - dirty_meta = DB_MPOOL_DIRTY; - - if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &h)) != 0) - goto err; - - P_INIT(h, dbp->pgsize, - pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - } - - /* Write out whatever page we ended up modifying. */ - h->lsn = lsn; - if ((ret = mpf->put(mpf, h, DB_MPOOL_DIRTY)) != 0) - goto err; - h = NULL; - - /* - * Update the meta-data page of this hash database. - */ - hcp->hdr->max_bucket = new_bucket; - if (new_double) { - hcp->hdr->low_mask = hcp->hdr->high_mask; - hcp->hdr->high_mask = new_bucket | hcp->hdr->low_mask; - } - - /* Relocate records to the new bucket */ - ret = __ham_split_page(dbc, old_bucket, new_bucket); - -err: if (got_meta) - (void)mpf->put(mpf, mmeta, dirty_meta); - - if (LOCK_ISSET(metalock)) - (void)__TLPUT(dbc, metalock); - - if (h != NULL) - (void)mpf->put(mpf, h, 0); - - return (ret); -} - -/* - * PUBLIC: u_int32_t __ham_call_hash __P((DBC *, u_int8_t *, int32_t)); - */ -u_int32_t -__ham_call_hash(dbc, k, len) - DBC *dbc; - u_int8_t *k; - int32_t len; -{ - DB *dbp; - u_int32_t n, bucket; - HASH_CURSOR *hcp; - HASH *hashp; - - dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; - hashp = dbp->h_internal; - - n = (u_int32_t)(hashp->h_hash(dbp, k, len)); - - bucket = n & hcp->hdr->high_mask; - if (bucket > hcp->hdr->max_bucket) - bucket = bucket & hcp->hdr->low_mask; - return (bucket); -} - -/* - * Check for duplicates, and call __db_ret appropriately. Release - * everything held by the cursor. - */ -static int -__ham_dup_return(dbc, val, flags) - DBC *dbc; - DBT *val; - u_int32_t flags; -{ - DB *dbp; - HASH_CURSOR *hcp; - PAGE *pp; - DBT *myval, tmp_val; - db_indx_t ndx; - db_pgno_t pgno; - u_int32_t off, tlen; - u_int8_t *hk, type; - int cmp, ret; - db_indx_t len; - - /* Check for duplicate and return the first one. */ - dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; - ndx = H_DATAINDEX(hcp->indx); - type = HPAGE_TYPE(dbp, hcp->page, ndx); - pp = hcp->page; - myval = val; - - /* - * There are 4 cases: - * 1. We are not in duplicate, simply return; the upper layer - * will do the right thing. - * 2. We are looking at keys and stumbled onto a duplicate. - * 3. We are in the middle of a duplicate set. (ISDUP set) - * 4. We need to check for particular data match. - */ - - /* We should never get here with off-page dups. */ - DB_ASSERT(type != H_OFFDUP); - - /* Case 1 */ - if (type != H_DUPLICATE && flags != DB_GET_BOTH && - flags != DB_GET_BOTHC && flags != DB_GET_BOTH_RANGE) - return (0); - - /* - * Here we check for the case where we just stumbled onto a - * duplicate. In this case, we do initialization and then - * let the normal duplicate code handle it. (Case 2) - */ - if (!F_ISSET(hcp, H_ISDUP) && type == H_DUPLICATE) { - F_SET(hcp, H_ISDUP); - hcp->dup_tlen = LEN_HDATA(dbp, hcp->page, - hcp->hdr->dbmeta.pagesize, hcp->indx); - hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - if (flags == DB_LAST || - flags == DB_PREV || flags == DB_PREV_NODUP) { - hcp->dup_off = 0; - do { - memcpy(&len, - HKEYDATA_DATA(hk) + hcp->dup_off, - sizeof(db_indx_t)); - hcp->dup_off += DUP_SIZE(len); - } while (hcp->dup_off < hcp->dup_tlen); - hcp->dup_off -= DUP_SIZE(len); - } else { - memcpy(&len, - HKEYDATA_DATA(hk), sizeof(db_indx_t)); - hcp->dup_off = 0; - } - hcp->dup_len = len; - } - - /* - * If we are retrieving a specific key/data pair, then we - * may need to adjust the cursor before returning data. - * Case 4 - */ - if (flags == DB_GET_BOTH || - flags == DB_GET_BOTHC || flags == DB_GET_BOTH_RANGE) { - if (F_ISSET(hcp, H_ISDUP)) { - /* - * If we're doing a join, search forward from the - * current position, not the beginning of the dup set. - */ - if (flags == DB_GET_BOTHC) - F_SET(hcp, H_CONTINUE); - - __ham_dsearch(dbc, val, &off, &cmp, flags); - - /* - * This flag is set nowhere else and is safe to - * clear unconditionally. - */ - F_CLR(hcp, H_CONTINUE); - hcp->dup_off = off; - } else { - hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - if (((HKEYDATA *)hk)->type == H_OFFPAGE) { - memcpy(&tlen, - HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); - memcpy(&pgno, - HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); - if ((ret = __db_moff(dbp, val, - pgno, tlen, dbp->dup_compare, &cmp)) != 0) - return (ret); - } else { - /* - * We do not zero tmp_val since the comparison - * routines may only look at data and size. - */ - tmp_val.data = HKEYDATA_DATA(hk); - tmp_val.size = LEN_HDATA(dbp, hcp->page, - dbp->pgsize, hcp->indx); - cmp = dbp->dup_compare == NULL ? - __bam_defcmp(dbp, &tmp_val, val) : - dbp->dup_compare(dbp, &tmp_val, val); - } - } - - if (cmp != 0) - return (DB_NOTFOUND); - } - - /* - * If we're doing a bulk get, we don't want to actually return - * the data: __ham_bulk will take care of cracking out the - * duplicates appropriately. - * - * The rest of this function calculates partial offsets and - * handles the actual __db_ret, so just return if - * DB_MULTIPLE(_KEY) is set. - */ - if (F_ISSET(dbc, DBC_MULTIPLE | DBC_MULTIPLE_KEY)) - return (0); - - /* - * Now, everything is initialized, grab a duplicate if - * necessary. - */ - if (F_ISSET(hcp, H_ISDUP)) { /* Case 3 */ - /* - * Copy the DBT in case we are retrieving into user - * memory and we need the parameters for it. If the - * user requested a partial, then we need to adjust - * the user's parameters to get the partial of the - * duplicate which is itself a partial. - */ - memcpy(&tmp_val, val, sizeof(*val)); - if (F_ISSET(&tmp_val, DB_DBT_PARTIAL)) { - /* - * Take the user's length unless it would go - * beyond the end of the duplicate. - */ - if (tmp_val.doff + hcp->dup_off > hcp->dup_len) - tmp_val.dlen = 0; - else if (tmp_val.dlen + tmp_val.doff > - hcp->dup_len) - tmp_val.dlen = - hcp->dup_len - tmp_val.doff; - - /* - * Calculate the new offset. - */ - tmp_val.doff += hcp->dup_off; - } else { - F_SET(&tmp_val, DB_DBT_PARTIAL); - tmp_val.dlen = hcp->dup_len; - tmp_val.doff = hcp->dup_off + sizeof(db_indx_t); - } - myval = &tmp_val; - } - - /* - * Finally, if we had a duplicate, pp, ndx, and myval should be - * set appropriately. - */ - if ((ret = __db_ret(dbp, pp, ndx, myval, &dbc->rdata->data, - &dbc->rdata->ulen)) != 0) - return (ret); - - /* - * In case we sent a temporary off to db_ret, set the real - * return values. - */ - val->data = myval->data; - val->size = myval->size; - - F_SET(val, DB_DBT_ISSET); - - return (0); -} - -static int -__ham_overwrite(dbc, nval, flags) - DBC *dbc; - DBT *nval; - u_int32_t flags; -{ - DB *dbp; - DB_ENV *dbenv; - HASH_CURSOR *hcp; - DBT *myval, tmp_val, tmp_val2; - void *newrec; - u_int8_t *hk, *p; - u_int32_t len, nondup_size; - db_indx_t newsize; - int ret; - - dbp = dbc->dbp; - dbenv = dbp->dbenv; - hcp = (HASH_CURSOR *)dbc->internal; - if (F_ISSET(hcp, H_ISDUP)) { - /* - * This is an overwrite of a duplicate. We should never - * be off-page at this point. - */ - DB_ASSERT(hcp->opd == NULL); - /* On page dups */ - if (F_ISSET(nval, DB_DBT_PARTIAL)) { - /* - * We're going to have to get the current item, then - * construct the record, do any padding and do a - * replace. - */ - memset(&tmp_val, 0, sizeof(tmp_val)); - if ((ret = - __ham_dup_return(dbc, &tmp_val, DB_CURRENT)) != 0) - return (ret); - - /* Figure out new size. */ - nondup_size = tmp_val.size; - newsize = nondup_size; - - /* - * Three cases: - * 1. strictly append (may need to allocate space - * for pad bytes; really gross). - * 2. overwrite some and append. - * 3. strictly overwrite. - */ - if (nval->doff > nondup_size) - newsize += - (nval->doff - nondup_size + nval->size); - else if (nval->doff + nval->dlen > nondup_size) - newsize += nval->size - - (nondup_size - nval->doff); - else - newsize += nval->size - nval->dlen; - - /* - * Make sure that the new size doesn't put us over - * the onpage duplicate size in which case we need - * to convert to off-page duplicates. - */ - if (ISBIG(hcp, hcp->dup_tlen - nondup_size + newsize)) { - if ((ret = __ham_dup_convert(dbc)) != 0) - return (ret); - return (hcp->opd->c_am_put(hcp->opd, - NULL, nval, flags, NULL)); - } - - if ((ret = __os_malloc(dbp->dbenv, - DUP_SIZE(newsize), &newrec)) != 0) - return (ret); - memset(&tmp_val2, 0, sizeof(tmp_val2)); - F_SET(&tmp_val2, DB_DBT_PARTIAL); - - /* Construct the record. */ - p = newrec; - /* Initial size. */ - memcpy(p, &newsize, sizeof(db_indx_t)); - p += sizeof(db_indx_t); - - /* First part of original record. */ - len = nval->doff > tmp_val.size - ? tmp_val.size : nval->doff; - memcpy(p, tmp_val.data, len); - p += len; - - if (nval->doff > tmp_val.size) { - /* Padding */ - memset(p, 0, nval->doff - tmp_val.size); - p += nval->doff - tmp_val.size; - } - - /* New bytes */ - memcpy(p, nval->data, nval->size); - p += nval->size; - - /* End of original record (if there is any) */ - if (nval->doff + nval->dlen < tmp_val.size) { - len = tmp_val.size - nval->doff - nval->dlen; - memcpy(p, (u_int8_t *)tmp_val.data + - nval->doff + nval->dlen, len); - p += len; - } - - /* Final size. */ - memcpy(p, &newsize, sizeof(db_indx_t)); - - /* - * Make sure that the caller isn't corrupting - * the sort order. - */ - if (dbp->dup_compare != NULL) { - tmp_val2.data = - (u_int8_t *)newrec + sizeof(db_indx_t); - tmp_val2.size = newsize; - if (dbp->dup_compare( - dbp, &tmp_val, &tmp_val2) != 0) { - (void)__os_free(dbenv, newrec); - return (__db_duperr(dbp, flags)); - } - } - - tmp_val2.data = newrec; - tmp_val2.size = DUP_SIZE(newsize); - tmp_val2.doff = hcp->dup_off; - tmp_val2.dlen = DUP_SIZE(hcp->dup_len); - - ret = __ham_replpair(dbc, &tmp_val2, 0); - (void)__os_free(dbenv, newrec); - - /* Update cursor */ - if (ret != 0) - return (ret); - - if (newsize > nondup_size) - hcp->dup_tlen += (newsize - nondup_size); - else - hcp->dup_tlen -= (nondup_size - newsize); - hcp->dup_len = DUP_SIZE(newsize); - return (0); - } else { - /* Check whether we need to convert to off page. */ - if (ISBIG(hcp, - hcp->dup_tlen - hcp->dup_len + nval->size)) { - if ((ret = __ham_dup_convert(dbc)) != 0) - return (ret); - return (hcp->opd->c_am_put(hcp->opd, - NULL, nval, flags, NULL)); - } - - /* Make sure we maintain sort order. */ - if (dbp->dup_compare != NULL) { - tmp_val2.data = - HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, - hcp->indx)) + hcp->dup_off + - sizeof(db_indx_t); - tmp_val2.size = hcp->dup_len; - if (dbp->dup_compare(dbp, nval, &tmp_val2) != 0) - return (EINVAL); - } - /* Overwriting a complete duplicate. */ - if ((ret = - __ham_make_dup(dbp->dbenv, nval, &tmp_val, - &dbc->my_rdata.data, &dbc->my_rdata.ulen)) != 0) - return (ret); - /* Now fix what we are replacing. */ - tmp_val.doff = hcp->dup_off; - tmp_val.dlen = DUP_SIZE(hcp->dup_len); - - /* Update cursor */ - if (nval->size > hcp->dup_len) - hcp->dup_tlen += (nval->size - hcp->dup_len); - else - hcp->dup_tlen -= (hcp->dup_len - nval->size); - hcp->dup_len = (db_indx_t)DUP_SIZE(nval->size); - } - myval = &tmp_val; - } else if (!F_ISSET(nval, DB_DBT_PARTIAL)) { - /* Put/overwrite */ - memcpy(&tmp_val, nval, sizeof(*nval)); - F_SET(&tmp_val, DB_DBT_PARTIAL); - tmp_val.doff = 0; - hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - if (HPAGE_PTYPE(hk) == H_OFFPAGE) - memcpy(&tmp_val.dlen, - HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); - else - tmp_val.dlen = LEN_HDATA(dbp, hcp->page, - hcp->hdr->dbmeta.pagesize, hcp->indx); - myval = &tmp_val; - } else - /* Regular partial put */ - myval = nval; - - return (__ham_replpair(dbc, myval, 0)); -} - -/* - * Given a key and a cursor, sets the cursor to the page/ndx on which - * the key resides. If the key is found, the cursor H_OK flag is set - * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set. - * If the key is not found, the H_OK flag is not set. If the sought - * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields - * are set indicating where an add might take place. If it is 0, - * non of the cursor pointer field are valid. - */ -static int -__ham_lookup(dbc, key, sought, mode, pgnop) - DBC *dbc; - const DBT *key; - u_int32_t sought; - db_lockmode_t mode; - db_pgno_t *pgnop; -{ - DB *dbp; - HASH_CURSOR *hcp; - db_pgno_t pgno; - u_int32_t tlen; - int match, ret; - u_int8_t *hk, *dk; - - dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; - /* - * Set up cursor so that we're looking for space to add an item - * as we cycle through the pages looking for the key. - */ - if ((ret = __ham_item_reset(dbc)) != 0) - return (ret); - hcp->seek_size = sought; - - hcp->bucket = __ham_call_hash(dbc, (u_int8_t *)key->data, key->size); - hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - - for (;;) { - *pgnop = PGNO_INVALID; - if ((ret = __ham_item_next(dbc, mode, pgnop)) != 0) - return (ret); - - if (F_ISSET(hcp, H_NOMORE)) - break; - - hk = H_PAIRKEY(dbp, hcp->page, hcp->indx); - switch (HPAGE_PTYPE(hk)) { - case H_OFFPAGE: - memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); - if (tlen == key->size) { - memcpy(&pgno, - HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); - if ((ret = __db_moff(dbp, - key, pgno, tlen, NULL, &match)) != 0) - return (ret); - if (match == 0) - goto found_key; - } - break; - case H_KEYDATA: - if (key->size == - LEN_HKEY(dbp, hcp->page, dbp->pgsize, hcp->indx) && - memcmp(key->data, - HKEYDATA_DATA(hk), key->size) == 0) { - /* Found the key, check for data type. */ -found_key: F_SET(hcp, H_OK); - dk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - if (HPAGE_PTYPE(dk) == H_OFFDUP) - memcpy(pgnop, HOFFDUP_PGNO(dk), - sizeof(db_pgno_t)); - return (0); - } - break; - case H_DUPLICATE: - case H_OFFDUP: - /* - * These are errors because keys are never - * duplicated, only data items are. - */ - return (__db_pgfmt(dbp->dbenv, PGNO(hcp->page))); - } - } - - /* - * Item was not found. - */ - - if (sought != 0) - return (ret); - - return (ret); -} - -/* - * __ham_init_dbt -- - * Initialize a dbt using some possibly already allocated storage - * for items. - * - * PUBLIC: int __ham_init_dbt __P((DB_ENV *, - * PUBLIC: DBT *, u_int32_t, void **, u_int32_t *)); - */ -int -__ham_init_dbt(dbenv, dbt, size, bufp, sizep) - DB_ENV *dbenv; - DBT *dbt; - u_int32_t size; - void **bufp; - u_int32_t *sizep; -{ - int ret; - - memset(dbt, 0, sizeof(*dbt)); - if (*sizep < size) { - if ((ret = __os_realloc(dbenv, size, bufp)) != 0) { - *sizep = 0; - return (ret); - } - *sizep = size; - } - dbt->data = *bufp; - dbt->size = size; - return (0); -} - -/* - * Adjust the cursor after an insert or delete. The cursor passed is - * the one that was operated upon; we just need to check any of the - * others. - * - * len indicates the length of the item added/deleted - * add indicates if the item indicated by the cursor has just been - * added (add == 1) or deleted (add == 0). - * dup indicates if the addition occurred into a duplicate set. - * - * PUBLIC: int __ham_c_update - * PUBLIC: __P((DBC *, u_int32_t, int, int)); - */ -int -__ham_c_update(dbc, len, add, is_dup) - DBC *dbc; - u_int32_t len; - int add, is_dup; -{ - DB *dbp, *ldbp; - DBC *cp; - DB_ENV *dbenv; - DB_LSN lsn; - DB_TXN *my_txn; - HASH_CURSOR *hcp, *lcp; - int found, ret; - u_int32_t order; - - dbp = dbc->dbp; - dbenv = dbp->dbenv; - hcp = (HASH_CURSOR *)dbc->internal; - - /* - * Adjustment will only be logged if this is a subtransaction. - * Only subtransactions can abort and effect their parent - * transactions cursors. - */ - - my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL; - found = 0; - - MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp); - - /* - * Calculate the order of this deleted record. - * This will be one greater than any cursor that is pointing - * at this record and already marked as deleted. - */ - order = 0; - if (!add) { - order = 1; - for (ldbp = __dblist_get(dbenv, dbp->adj_fileid); - ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; - ldbp = LIST_NEXT(ldbp, dblistlinks)) { - MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); - for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL; - cp = TAILQ_NEXT(cp, links)) { - if (cp == dbc || cp->dbtype != DB_HASH) - continue; - lcp = (HASH_CURSOR *)cp->internal; - if (F_ISSET(lcp, H_DELETED) && - hcp->pgno == lcp->pgno && - hcp->indx == lcp->indx && - order <= lcp->order && - (!is_dup || hcp->dup_off == lcp->dup_off)) - order = lcp->order + 1; - } - MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); - } - hcp->order = order; - } - - for (ldbp = __dblist_get(dbenv, dbp->adj_fileid); - ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; - ldbp = LIST_NEXT(ldbp, dblistlinks)) { - MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); - for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL; - cp = TAILQ_NEXT(cp, links)) { - if (cp == dbc || cp->dbtype != DB_HASH) - continue; - - lcp = (HASH_CURSOR *)cp->internal; - - if (lcp->pgno != hcp->pgno || lcp->indx == NDX_INVALID) - continue; - - if (my_txn != NULL && cp->txn != my_txn) - found = 1; - - if (!is_dup) { - if (add) { - /* - * This routine is not called to add - * non-dup records which are always put - * at the end. It is only called from - * recovery in this case and the - * cursor will be marked deleted. - * We are "undeleting" so unmark all - * cursors with the same order. - */ - if (lcp->indx == hcp->indx && - F_ISSET(lcp, H_DELETED)) { - if (lcp->order == hcp->order) - F_CLR(lcp, H_DELETED); - else if (lcp->order > - hcp->order) { - - /* - * If we've moved this cursor's - * index, split its order - * number--i.e., decrement it by - * enough so that the lowest - * cursor moved has order 1. - * cp_arg->order is the split - * point, so decrement by one - * less than that. - */ - lcp->order -= - (hcp->order - 1); - lcp->indx += 2; - } - } else if (lcp->indx >= hcp->indx) - lcp->indx += 2; - - } else { - if (lcp->indx > hcp->indx) { - lcp->indx -= 2; - if (lcp->indx == hcp->indx && - F_ISSET(lcp, H_DELETED)) - lcp->order += order; - } else if (lcp->indx == hcp->indx && - !F_ISSET(lcp, H_DELETED)) { - F_SET(lcp, H_DELETED); - F_CLR(lcp, H_ISDUP); - lcp->order = order; - } - } - } else if (lcp->indx == hcp->indx) { - /* - * Handle duplicates. This routine is - * only called for on page dups. - * Off page dups are handled by btree/rtree - * code. - */ - if (add) { - lcp->dup_tlen += len; - if (lcp->dup_off == hcp->dup_off && - F_ISSET(hcp, H_DELETED) && - F_ISSET(lcp, H_DELETED)) { - /* Abort of a delete. */ - if (lcp->order == hcp->order) - F_CLR(lcp, H_DELETED); - else if (lcp->order > - hcp->order) { - lcp->order -= - (hcp->order -1); - lcp->dup_off += len; - } - } else if (lcp->dup_off >= hcp->dup_off) - lcp->dup_off += len; - } else { - lcp->dup_tlen -= len; - if (lcp->dup_off > hcp->dup_off) { - lcp->dup_off -= len; - if (lcp->dup_off == - hcp->dup_off && - F_ISSET(lcp, H_DELETED)) - lcp->order += order; - } else if (lcp->dup_off == - hcp->dup_off && - !F_ISSET(lcp, H_DELETED)) { - F_SET(lcp, H_DELETED); - lcp->order = order; - } - } - } - } - MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); - } - MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); - - if (found != 0 && DBC_LOGGING(dbc)) { - if ((ret = __ham_curadj_log(dbp, my_txn, &lsn, 0, hcp->pgno, - hcp->indx, len, hcp->dup_off, add, is_dup, order)) != 0) - return (ret); - } - - return (0); -} - -/* - * __ham_get_clist -- - * - * Get a list of cursors either on a particular bucket or on a particular - * page and index combination. The former is so that we can update - * cursors on a split. The latter is so we can update cursors when we - * move items off page. - * - * PUBLIC: int __ham_get_clist __P((DB *, db_pgno_t, u_int32_t, DBC ***)); - */ -int -__ham_get_clist(dbp, pgno, indx, listp) - DB *dbp; - db_pgno_t pgno; - u_int32_t indx; - DBC ***listp; -{ - DB *ldbp; - DBC *cp; - DB_ENV *dbenv; - int nalloc, nused, ret; - - /* - * Assume that finding anything is the exception, so optimize for - * the case where there aren't any. - */ - nalloc = nused = 0; - *listp = NULL; - dbenv = dbp->dbenv; - - MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp); - for (ldbp = __dblist_get(dbenv, dbp->adj_fileid); - ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; - ldbp = LIST_NEXT(ldbp, dblistlinks)) { - MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); - for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL; - cp = TAILQ_NEXT(cp, links)) - /* - * We match if cp->pgno matches the specified - * pgno, and if either the cp->indx matches - * or we weren't given an index. - */ - if (cp->internal->pgno == pgno && - (indx == NDX_INVALID || - cp->internal->indx == indx)) { - if (nused >= nalloc) { - nalloc += 10; - if ((ret = __os_realloc(dbp->dbenv, - nalloc * sizeof(HASH_CURSOR *), - listp)) != 0) - goto err; - } - (*listp)[nused++] = cp; - } - - MUTEX_THREAD_UNLOCK(dbp->dbenv, dbp->mutexp); - } - MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); - - if (listp != NULL) { - if (nused >= nalloc) { - nalloc++; - if ((ret = __os_realloc(dbp->dbenv, - nalloc * sizeof(HASH_CURSOR *), listp)) != 0) - return (ret); - } - (*listp)[nused] = NULL; - } - return (0); -err: - MUTEX_THREAD_UNLOCK(dbp->dbenv, dbp->mutexp); - MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); - return (ret); -} - -static int -__ham_c_writelock(dbc) - DBC *dbc; -{ - DB_ENV *dbenv; - DB_LOCK tmp_lock; - HASH_CURSOR *hcp; - int ret; - - /* - * All we need do is acquire the lock and let the off-page - * dup tree do its thing. - */ - if (!STD_LOCKING(dbc)) - return (0); - - hcp = (HASH_CURSOR *)dbc->internal; - if ((!LOCK_ISSET(hcp->lock) || hcp->lock_mode == DB_LOCK_READ)) { - tmp_lock = hcp->lock; - if ((ret = __ham_lock_bucket(dbc, DB_LOCK_WRITE)) != 0) - return (ret); - dbenv = dbc->dbp->dbenv; - if (LOCK_ISSET(tmp_lock) && - (ret = dbenv->lock_put(dbenv, &tmp_lock)) != 0) - return (ret); - } - return (0); -} diff --git a/bdb/hash/hash.src b/bdb/hash/hash.src deleted file mode 100644 index b4b633c56e6..00000000000 --- a/bdb/hash/hash.src +++ /dev/null @@ -1,266 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2002 - * Sleepycat Software. All rights reserved. - * - * $Id: hash.src,v 10.38 2002/04/17 19:03:10 krinsky Exp $ - */ -/* - * Copyright (c) 1995, 1996 - * Margo Seltzer. All rights reserved. - */ -/* - * Copyright (c) 1995, 1996 - * The President and Fellows of Harvard University. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -PREFIX __ham -DBPRIVATE - -INCLUDE #include "db_config.h" -INCLUDE -INCLUDE #ifndef NO_SYSTEM_INCLUDES -INCLUDE #include <sys/types.h> -INCLUDE -INCLUDE #include <ctype.h> -INCLUDE #include <string.h> -INCLUDE #endif -INCLUDE -INCLUDE #include "db_int.h" -INCLUDE #include "dbinc/crypto.h" -INCLUDE #include "dbinc/db_page.h" -INCLUDE #include "dbinc/db_dispatch.h" -INCLUDE #include "dbinc/db_am.h" -INCLUDE #include "dbinc/hash.h" -INCLUDE #include "dbinc/rep.h" -INCLUDE #include "dbinc/log.h" -INCLUDE #include "dbinc/txn.h" -INCLUDE - -/* - * HASH-insdel: used for hash to insert/delete a pair of entries onto a master - * page. The pair might be regular key/data pairs or they might be the - * structures that refer to off page items, duplicates or offpage duplicates. - * opcode - PUTPAIR/DELPAIR + big masks - * fileid - identifies the file referenced - * pgno - page within file - * ndx - index on the page of the item being added (item index) - * pagelsn - lsn on the page before the update - * key - the key being inserted - * data - the data being inserted - */ -BEGIN insdel 21 -ARG opcode u_int32_t lu -DB fileid int32_t ld -WRLOCK pgno db_pgno_t lu -ARG ndx u_int32_t lu -POINTER pagelsn DB_LSN * lu -DBT key DBT s -DBT data DBT s -END - -/* - * Used to add and remove overflow pages. - * prev_pgno is the previous page that is going to get modified to - * point to this one. If this is the first page in a chain - * then prev_pgno should be PGNO_INVALID. - * new_pgno is the page being allocated. - * next_pgno is the page that follows this one. On allocation, - * this should be PGNO_INVALID. For deletes, it may exist. - * pagelsn is the old lsn on the page. - */ -BEGIN newpage 22 -ARG opcode u_int32_t lu -DB fileid int32_t ld -WRLOCKNZ prev_pgno db_pgno_t lu -POINTER prevlsn DB_LSN * lu -WRLOCKNZ new_pgno db_pgno_t lu -POINTER pagelsn DB_LSN * lu -WRLOCKNZ next_pgno db_pgno_t lu -POINTER nextlsn DB_LSN * lu -END - -/* - * Splitting requires two types of log messages. The second logs the - * data on the original page. To redo the split, we have to visit the - * new page (pages) and add the items back on the page if they are not - * yet there. - */ -BEGIN splitdata 24 -DB fileid int32_t ld -ARG opcode u_int32_t lu -WRLOCK pgno db_pgno_t lu -PGDBT pageimage DBT s -POINTER pagelsn DB_LSN * lu -END - -/* - * HASH-replace: is used for hash to handle partial puts that only - * affect a single master page. - * fileid - identifies the file referenced - * pgno - page within file - * ndx - index on the page of the item being modified (item index) - * pagelsn - lsn on the page before the update - * off - offset in the old item where the new item is going. - * olditem - DBT that describes the part of the item being replaced. - * newitem - DBT of the new item. - * makedup - this was a replacement that made an item a duplicate. - */ -BEGIN replace 25 -DB fileid int32_t ld -WRLOCK pgno db_pgno_t lu -ARG ndx u_int32_t lu -POINTER pagelsn DB_LSN * lu -ARG off int32_t ld -DBT olditem DBT s -DBT newitem DBT s -ARG makedup u_int32_t lu -END - -/* - * Used when we empty the first page in a bucket and there are pages after - * it. The page after it gets copied into the bucket page (since bucket - * pages have to be in fixed locations). - * pgno: the bucket page - * pagelsn: the old LSN on the bucket page - * next_pgno: the page number of the next page - * nnext_pgno: page after next_pgno (may need to change its prev) - * nnextlsn: the LSN of nnext_pgno. - */ -BEGIN copypage 28 -DB fileid int32_t ld -WRLOCK pgno db_pgno_t lu -POINTER pagelsn DB_LSN * lu -WRLOCK next_pgno db_pgno_t lu -POINTER nextlsn DB_LSN * lu -WRLOCKNZ nnext_pgno db_pgno_t lu -POINTER nnextlsn DB_LSN * lu -PGDBT page DBT s -END - -/* - * This record logs the meta-data aspects of a split operation. It has enough - * information so that we can record both an individual page allocation as well - * as a group allocation which we do because in sub databases, the pages in - * a hash doubling, must be contiguous. If we do a group allocation, the - * number of pages allocated is bucket + 1, pgno is the page number of the - * first newly allocated bucket. - * - * bucket: Old maximum bucket number. - * mmpgno: Master meta-data page number (0 if same as mpgno). - * mmetalsn: Lsn of the master meta-data page. - * mpgno: Meta-data page number. - * metalsn: Lsn of the meta-data page. - * pgno: Page allocated to bucket + 1 (first newly allocated page) - * pagelsn: Lsn of either the first page allocated (if newalloc == 0) or - * the last page allocated (if newalloc == 1). - * newalloc: 1 indicates that this record did the actual allocation; - * 0 indicates that the pages were already allocated from a - * previous (failed) allocation. - */ -BEGIN metagroup 29 -DB fileid int32_t ld -ARG bucket u_int32_t lu -WRLOCK mmpgno db_pgno_t lu -POINTER mmetalsn DB_LSN * lu -WRLOCKNZ mpgno db_pgno_t lu -POINTER metalsn DB_LSN * lu -WRLOCK pgno db_pgno_t lu -POINTER pagelsn DB_LSN * lu -ARG newalloc u_int32_t lu -END - -/* - * groupalloc - * - * This is used in conjunction with MPOOL_NEW_GROUP when we are creating - * a new database to make sure that we recreate or reclaim free pages - * when we allocate a chunk of contiguous ones during database creation. - * - * pgno: meta-data page number - * metalsn: meta-data lsn - * start_pgno: starting page number - * num: number of allocated pages - */ -BEGIN groupalloc 32 -DB fileid int32_t ld -POINTER meta_lsn DB_LSN * lu -WRLOCK start_pgno db_pgno_t lu -ARG num u_int32_t lu -ARG free db_pgno_t lu -END - -/* - * Records for backing out cursor adjustment. - * curadj - added or deleted a record or a dup - * within a record. - * pgno - page that was effected - * indx - indx of recrod effected. - * len - if a dup its length. - * dup_off - if a dup its offset - * add - 1 if add 0 if delete - * is_dup - 1 if dup 0 otherwise. - * order - order assigned to this deleted record or dup. - * - * chgpg - rmoved a page, move the records to a new page - * mode - CHGPG page was deleted or records move to new page. - * - SPLIT we split a bucket - * - DUP we convered to off page duplicates. - * old_pgno, new_pgno - old and new page numbers. - * old_index, new_index - old and new index numbers, NDX_INVALID if - * it effects all records on the page. - * For three opcodes new in 3.3 (DB_HAM_DELFIRSTPG, DELMIDPG, - * and DELLASTPG), we overload old_indx and new_indx to avoid - * needing a new log record type: old_indx stores the only - * indx of interest to these records, and new_indx stores the - * order that's assigned to the lowest deleted record we're - * moving. - */ -BEGIN curadj 33 -DB fileid int32_t ld -ARG pgno db_pgno_t lu -ARG indx u_int32_t lu -ARG len u_int32_t lu -ARG dup_off u_int32_t lu -ARG add int ld -ARG is_dup int ld -ARG order u_int32_t lu -END - -BEGIN chgpg 34 -DB fileid int32_t ld -ARG mode db_ham_mode ld -ARG old_pgno db_pgno_t lu -ARG new_pgno db_pgno_t lu -ARG old_indx u_int32_t lu -ARG new_indx u_int32_t lu -END - diff --git a/bdb/hash/hash_conv.c b/bdb/hash/hash_conv.c deleted file mode 100644 index a93e56a2ee4..00000000000 --- a/bdb/hash/hash_conv.c +++ /dev/null @@ -1,116 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2002 - * Sleepycat Software. All rights reserved. - */ -#include "db_config.h" - -#ifndef lint -static const char revid[] = "$Id: hash_conv.c,v 11.13 2002/08/06 05:34:35 bostic Exp $"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> -#endif - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_swap.h" -#include "dbinc/hash.h" - -/* - * __ham_pgin -- - * Convert host-specific page layout from the host-independent format - * stored on disk. - * - * PUBLIC: int __ham_pgin __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *)); - */ -int -__ham_pgin(dbenv, dummydbp, pg, pp, cookie) - DB_ENV *dbenv; - DB *dummydbp; - db_pgno_t pg; - void *pp; - DBT *cookie; -{ - DB_PGINFO *pginfo; - PAGE *h; - - h = pp; - pginfo = (DB_PGINFO *)cookie->data; - - /* - * The hash access method does blind reads of pages, causing them - * to be created. If the type field isn't set it's one of them, - * initialize the rest of the page and return. - */ - if (h->type != P_HASHMETA && h->pgno == PGNO_INVALID) { - P_INIT(pp, (db_indx_t)pginfo->db_pagesize, - pg, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - return (0); - } - - if (!F_ISSET(pginfo, DB_AM_SWAP)) - return (0); - - return (h->type == P_HASHMETA ? __ham_mswap(pp) : - __db_byteswap(dbenv, dummydbp, pg, pp, pginfo->db_pagesize, 1)); -} - -/* - * __ham_pgout -- - * Convert host-specific page layout to the host-independent format - * stored on disk. - * - * PUBLIC: int __ham_pgout __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *)); - */ -int -__ham_pgout(dbenv, dummydbp, pg, pp, cookie) - DB_ENV *dbenv; - DB *dummydbp; - db_pgno_t pg; - void *pp; - DBT *cookie; -{ - DB_PGINFO *pginfo; - PAGE *h; - - pginfo = (DB_PGINFO *)cookie->data; - if (!F_ISSET(pginfo, DB_AM_SWAP)) - return (0); - - h = pp; - return (h->type == P_HASHMETA ? __ham_mswap(pp) : - __db_byteswap(dbenv, dummydbp, pg, pp, pginfo->db_pagesize, 0)); -} - -/* - * __ham_mswap -- - * Swap the bytes on the hash metadata page. - * - * PUBLIC: int __ham_mswap __P((void *)); - */ -int -__ham_mswap(pg) - void *pg; -{ - u_int8_t *p; - int i; - - __db_metaswap(pg); - - p = (u_int8_t *)pg + sizeof(DBMETA); - - SWAP32(p); /* max_bucket */ - SWAP32(p); /* high_mask */ - SWAP32(p); /* low_mask */ - SWAP32(p); /* ffactor */ - SWAP32(p); /* nelem */ - SWAP32(p); /* h_charkey */ - for (i = 0; i < NCACHED; ++i) - SWAP32(p); /* spares */ - p += 59 * sizeof(u_int32_t); /* unusued */ - SWAP32(p); /* crypto_magic */ - return (0); -} diff --git a/bdb/hash/hash_dup.c b/bdb/hash/hash_dup.c deleted file mode 100644 index ec70e519d54..00000000000 --- a/bdb/hash/hash_dup.c +++ /dev/null @@ -1,891 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2002 - * Sleepycat Software. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include "db_config.h" - -#ifndef lint -static const char revid[] = "$Id: hash_dup.c,v 11.76 2002/08/06 05:34:40 bostic Exp $"; -#endif /* not lint */ - -/* - * PACKAGE: hashing - * - * DESCRIPTION: - * Manipulation of duplicates for the hash package. - */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <string.h> -#endif - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/hash.h" -#include "dbinc/btree.h" - -static int __ham_c_chgpg __P((DBC *, - db_pgno_t, u_int32_t, db_pgno_t, u_int32_t)); -static int __ham_check_move __P((DBC *, u_int32_t)); -static int __ham_dcursor __P((DBC *, db_pgno_t, u_int32_t)); -static int __ham_move_offpage __P((DBC *, PAGE *, u_int32_t, db_pgno_t)); - -/* - * Called from hash_access to add a duplicate key. nval is the new - * value that we want to add. The flags correspond to the flag values - * to cursor_put indicating where to add the new element. - * There are 4 cases. - * Case 1: The existing duplicate set already resides on a separate page. - * We return and let the common code handle this. - * Case 2: The element is small enough to just be added to the existing set. - * Case 3: The element is large enough to be a big item, so we're going to - * have to push the set onto a new page. - * Case 4: The element is large enough to push the duplicate set onto a - * separate page. - * - * PUBLIC: int __ham_add_dup __P((DBC *, DBT *, u_int32_t, db_pgno_t *)); - */ -int -__ham_add_dup(dbc, nval, flags, pgnop) - DBC *dbc; - DBT *nval; - u_int32_t flags; - db_pgno_t *pgnop; -{ - DB *dbp; - DBT pval, tmp_val; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - u_int32_t add_bytes, new_size; - int cmp, ret; - u_int8_t *hk; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - DB_ASSERT(flags != DB_CURRENT); - - add_bytes = nval->size + - (F_ISSET(nval, DB_DBT_PARTIAL) ? nval->doff : 0); - add_bytes = DUP_SIZE(add_bytes); - - if ((ret = __ham_check_move(dbc, add_bytes)) != 0) - return (ret); - - /* - * Check if resulting duplicate set is going to need to go - * onto a separate duplicate page. If so, convert the - * duplicate set and add the new one. After conversion, - * hcp->dndx is the first free ndx or the index of the - * current pointer into the duplicate set. - */ - hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - /* Add the len bytes to the current singleton. */ - if (HPAGE_PTYPE(hk) != H_DUPLICATE) - add_bytes += DUP_SIZE(0); - new_size = - LEN_HKEYDATA(dbp, hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx)) + - add_bytes; - - /* - * We convert to off-page duplicates if the item is a big item, - * the addition of the new item will make the set large, or - * if there isn't enough room on this page to add the next item. - */ - if (HPAGE_PTYPE(hk) != H_OFFDUP && - (HPAGE_PTYPE(hk) == H_OFFPAGE || ISBIG(hcp, new_size) || - add_bytes > P_FREESPACE(dbp, hcp->page))) { - - if ((ret = __ham_dup_convert(dbc)) != 0) - return (ret); - return (hcp->opd->c_am_put(hcp->opd, - NULL, nval, flags, NULL)); - } - - /* There are two separate cases here: on page and off page. */ - if (HPAGE_PTYPE(hk) != H_OFFDUP) { - if (HPAGE_PTYPE(hk) != H_DUPLICATE) { - pval.flags = 0; - pval.data = HKEYDATA_DATA(hk); - pval.size = LEN_HDATA(dbp, hcp->page, dbp->pgsize, - hcp->indx); - if ((ret = __ham_make_dup(dbp->dbenv, - &pval, &tmp_val, &dbc->my_rdata.data, - &dbc->my_rdata.ulen)) != 0 || (ret = - __ham_replpair(dbc, &tmp_val, 1)) != 0) - return (ret); - hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - HPAGE_PTYPE(hk) = H_DUPLICATE; - - /* - * Update the cursor position since we now are in - * duplicates. - */ - F_SET(hcp, H_ISDUP); - hcp->dup_off = 0; - hcp->dup_len = pval.size; - hcp->dup_tlen = DUP_SIZE(hcp->dup_len); - } - - /* Now make the new entry a duplicate. */ - if ((ret = __ham_make_dup(dbp->dbenv, nval, - &tmp_val, &dbc->my_rdata.data, &dbc->my_rdata.ulen)) != 0) - return (ret); - - tmp_val.dlen = 0; - switch (flags) { /* On page. */ - case DB_KEYFIRST: - case DB_KEYLAST: - case DB_NODUPDATA: - if (dbp->dup_compare != NULL) { - __ham_dsearch(dbc, - nval, &tmp_val.doff, &cmp, flags); - - /* dup dups are not supported w/ sorted dups */ - if (cmp == 0) - return (__db_duperr(dbp, flags)); - } else { - hcp->dup_tlen = LEN_HDATA(dbp, hcp->page, - dbp->pgsize, hcp->indx); - hcp->dup_len = nval->size; - F_SET(hcp, H_ISDUP); - if (flags == DB_KEYFIRST) - hcp->dup_off = tmp_val.doff = 0; - else - hcp->dup_off = - tmp_val.doff = hcp->dup_tlen; - } - break; - case DB_BEFORE: - tmp_val.doff = hcp->dup_off; - break; - case DB_AFTER: - tmp_val.doff = hcp->dup_off + DUP_SIZE(hcp->dup_len); - break; - } - /* Add the duplicate. */ - ret = __ham_replpair(dbc, &tmp_val, 0); - if (ret == 0) - ret = mpf->set(mpf, hcp->page, DB_MPOOL_DIRTY); - if (ret != 0) - return (ret); - - /* Now, update the cursor if necessary. */ - switch (flags) { - case DB_AFTER: - hcp->dup_off += DUP_SIZE(hcp->dup_len); - hcp->dup_len = nval->size; - hcp->dup_tlen += (db_indx_t)DUP_SIZE(nval->size); - break; - case DB_KEYFIRST: - case DB_KEYLAST: - case DB_BEFORE: - hcp->dup_tlen += (db_indx_t)DUP_SIZE(nval->size); - hcp->dup_len = nval->size; - break; - } - ret = __ham_c_update(dbc, tmp_val.size, 1, 1); - return (ret); - } - - /* - * If we get here, then we're on duplicate pages; set pgnop and - * return so the common code can handle it. - */ - memcpy(pgnop, HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)), - sizeof(db_pgno_t)); - - return (ret); -} - -/* - * Convert an on-page set of duplicates to an offpage set of duplicates. - * - * PUBLIC: int __ham_dup_convert __P((DBC *)); - */ -int -__ham_dup_convert(dbc) - DBC *dbc; -{ - BOVERFLOW bo; - DB *dbp; - DBC **hcs; - DBT dbt; - DB_LSN lsn; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - HOFFPAGE ho; - PAGE *dp; - db_indx_t i, len, off; - int c, ret, t_ret; - u_int8_t *p, *pend; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - /* - * Create a new page for the duplicates. - */ - if ((ret = __db_new(dbc, - dbp->dup_compare == NULL ? P_LRECNO : P_LDUP, &dp)) != 0) - return (ret); - P_INIT(dp, dbp->pgsize, - dp->pgno, PGNO_INVALID, PGNO_INVALID, LEAFLEVEL, TYPE(dp)); - - /* - * Get the list of cursors that may need to be updated. - */ - if ((ret = __ham_get_clist(dbp, - PGNO(hcp->page), (u_int32_t)hcp->indx, &hcs)) != 0) - goto err; - - /* - * Now put the duplicates onto the new page. - */ - dbt.flags = 0; - switch (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))) { - case H_KEYDATA: - /* Simple case, one key on page; move it to dup page. */ - dbt.size = LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); - dbt.data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); - ret = __db_pitem(dbc, - dp, 0, BKEYDATA_SIZE(dbt.size), NULL, &dbt); - goto finish; - case H_OFFPAGE: - /* Simple case, one key on page; move it to dup page. */ - memcpy(&ho, P_ENTRY(dbp, hcp->page, H_DATAINDEX(hcp->indx)), - HOFFPAGE_SIZE); - UMRW_SET(bo.unused1); - B_TSET(bo.type, ho.type, 0); - UMRW_SET(bo.unused2); - bo.pgno = ho.pgno; - bo.tlen = ho.tlen; - dbt.size = BOVERFLOW_SIZE; - dbt.data = &bo; - - ret = __db_pitem(dbc, dp, 0, dbt.size, &dbt, NULL); -finish: if (ret == 0) { - if ((ret = mpf->set(mpf, dp, DB_MPOOL_DIRTY)) != 0) - break; - - /* Update any other cursors. */ - if (hcs != NULL && DBC_LOGGING(dbc) && - IS_SUBTRANSACTION(dbc->txn)) { - if ((ret = __ham_chgpg_log(dbp, dbc->txn, - &lsn, 0, DB_HAM_DUP, PGNO(hcp->page), - PGNO(dp), hcp->indx, 0)) != 0) - break; - } - for (c = 0; hcs != NULL && hcs[c] != NULL; c++) - if ((ret = __ham_dcursor(hcs[c], - PGNO(dp), 0)) != 0) - break; - } - break; - case H_DUPLICATE: - p = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); - pend = p + - LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); - - /* - * We need to maintain the duplicate cursor position. - * Keep track of where we are in the duplicate set via - * the offset, and when it matches the one in the cursor, - * set the off-page duplicate cursor index to the current - * index. - */ - for (off = 0, i = 0; p < pend; i++) { - memcpy(&len, p, sizeof(db_indx_t)); - dbt.size = len; - p += sizeof(db_indx_t); - dbt.data = p; - p += len + sizeof(db_indx_t); - if ((ret = __db_pitem(dbc, dp, - i, BKEYDATA_SIZE(dbt.size), NULL, &dbt)) != 0) - break; - - /* Update any other cursors */ - if (hcs != NULL && DBC_LOGGING(dbc) && - IS_SUBTRANSACTION(dbc->txn)) { - if ((ret = __ham_chgpg_log(dbp, dbc->txn, - &lsn, 0, DB_HAM_DUP, PGNO(hcp->page), - PGNO(dp), hcp->indx, i)) != 0) - break; - } - for (c = 0; hcs != NULL && hcs[c] != NULL; c++) - if (((HASH_CURSOR *)(hcs[c]->internal))->dup_off - == off && (ret = __ham_dcursor(hcs[c], - PGNO(dp), i)) != 0) - goto err; - off += len + 2 * sizeof(db_indx_t); - } - break; - default: - ret = __db_pgfmt(dbp->dbenv, (u_long)hcp->pgno); - break; - } - - /* - * Now attach this to the source page in place of the old duplicate - * item. - */ - if (ret == 0) - ret = __ham_move_offpage(dbc, hcp->page, - (u_int32_t)H_DATAINDEX(hcp->indx), PGNO(dp)); - -err: if (ret == 0) - ret = mpf->set(mpf, hcp->page, DB_MPOOL_DIRTY); - - if ((t_ret = - mpf->put(mpf, dp, ret == 0 ? DB_MPOOL_DIRTY : 0)) != 0 && ret == 0) - ret = t_ret; - - if (ret == 0) - hcp->dup_tlen = hcp->dup_off = hcp->dup_len = 0; - - if (hcs != NULL) - __os_free(dbp->dbenv, hcs); - - return (ret); -} - -/* - * __ham_make_dup - * - * Take a regular dbt and make it into a duplicate item with all the partial - * information set appropriately. If the incoming dbt is a partial, assume - * we are creating a new entry and make sure that we do any initial padding. - * - * PUBLIC: int __ham_make_dup __P((DB_ENV *, - * PUBLIC: const DBT *, DBT *d, void **, u_int32_t *)); - */ -int -__ham_make_dup(dbenv, notdup, duplicate, bufp, sizep) - DB_ENV *dbenv; - const DBT *notdup; - DBT *duplicate; - void **bufp; - u_int32_t *sizep; -{ - db_indx_t tsize, item_size; - int ret; - u_int8_t *p; - - item_size = (db_indx_t)notdup->size; - if (F_ISSET(notdup, DB_DBT_PARTIAL)) - item_size += notdup->doff; - - tsize = DUP_SIZE(item_size); - if ((ret = __ham_init_dbt(dbenv, duplicate, tsize, bufp, sizep)) != 0) - return (ret); - - duplicate->dlen = 0; - duplicate->flags = notdup->flags; - F_SET(duplicate, DB_DBT_PARTIAL); - - p = duplicate->data; - memcpy(p, &item_size, sizeof(db_indx_t)); - p += sizeof(db_indx_t); - if (F_ISSET(notdup, DB_DBT_PARTIAL)) { - memset(p, 0, notdup->doff); - p += notdup->doff; - } - memcpy(p, notdup->data, notdup->size); - p += notdup->size; - memcpy(p, &item_size, sizeof(db_indx_t)); - - duplicate->doff = 0; - duplicate->dlen = notdup->size; - - return (0); -} - -/* - * __ham_check_move -- - * - * Check if we can do whatever we need to on this page. If not, - * then we'll have to move the current element to a new page. - */ -static int -__ham_check_move(dbc, add_len) - DBC *dbc; - u_int32_t add_len; -{ - DB *dbp; - DBT k, d; - DB_LSN new_lsn; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - PAGE *next_pagep; - db_pgno_t next_pgno; - u_int32_t new_datalen, old_len, rectype; - u_int8_t *hk; - int ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - - /* - * If the item is already off page duplicates or an offpage item, - * then we know we can do whatever we need to do in-place - */ - if (HPAGE_PTYPE(hk) == H_OFFDUP || HPAGE_PTYPE(hk) == H_OFFPAGE) - return (0); - - old_len = LEN_HITEM(dbp, hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx)); - new_datalen = old_len - HKEYDATA_SIZE(0) + add_len; - if (HPAGE_PTYPE(hk) != H_DUPLICATE) - new_datalen += DUP_SIZE(0); - - /* - * We need to add a new page under two conditions: - * 1. The addition makes the total data length cross the BIG - * threshold and the OFFDUP structure won't fit on this page. - * 2. The addition does not make the total data cross the - * threshold, but the new data won't fit on the page. - * If neither of these is true, then we can return. - */ - if (ISBIG(hcp, new_datalen) && (old_len > HOFFDUP_SIZE || - HOFFDUP_SIZE - old_len <= P_FREESPACE(dbp, hcp->page))) - return (0); - - if (!ISBIG(hcp, new_datalen) && add_len <= P_FREESPACE(dbp, hcp->page)) - return (0); - - /* - * If we get here, then we need to move the item to a new page. - * Check if there are more pages in the chain. We now need to - * update new_datalen to include the size of both the key and - * the data that we need to move. - */ - - new_datalen = ISBIG(hcp, new_datalen) ? - HOFFDUP_SIZE : HKEYDATA_SIZE(new_datalen); - new_datalen += LEN_HITEM(dbp, hcp->page, dbp->pgsize, H_KEYINDEX(hcp->indx)); - - next_pagep = NULL; - for (next_pgno = NEXT_PGNO(hcp->page); next_pgno != PGNO_INVALID; - next_pgno = NEXT_PGNO(next_pagep)) { - if (next_pagep != NULL && - (ret = mpf->put(mpf, next_pagep, 0)) != 0) - return (ret); - - if ((ret = mpf->get(mpf, - &next_pgno, DB_MPOOL_CREATE, &next_pagep)) != 0) - return (ret); - - if (P_FREESPACE(dbp, next_pagep) >= new_datalen) - break; - } - - /* No more pages, add one. */ - if (next_pagep == NULL && (ret = __ham_add_ovflpage(dbc, - hcp->page, 0, &next_pagep)) != 0) - return (ret); - - /* Add new page at the end of the chain. */ - if (P_FREESPACE(dbp, next_pagep) < new_datalen && (ret = - __ham_add_ovflpage(dbc, next_pagep, 1, &next_pagep)) != 0) { - (void)mpf->put(mpf, next_pagep, 0); - return (ret); - } - - /* Copy the item to the new page. */ - if (DBC_LOGGING(dbc)) { - rectype = PUTPAIR; - k.flags = 0; - d.flags = 0; - if (HPAGE_PTYPE( - H_PAIRKEY(dbp, hcp->page, hcp->indx)) == H_OFFPAGE) { - rectype |= PAIR_KEYMASK; - k.data = H_PAIRKEY(dbp, hcp->page, hcp->indx); - k.size = HOFFPAGE_SIZE; - } else { - k.data = - HKEYDATA_DATA(H_PAIRKEY(dbp, hcp->page, hcp->indx)); - k.size = - LEN_HKEY(dbp, hcp->page, dbp->pgsize, hcp->indx); - } - - if (HPAGE_PTYPE(hk) == H_OFFPAGE) { - rectype |= PAIR_DATAMASK; - d.data = H_PAIRDATA(dbp, hcp->page, hcp->indx); - d.size = HOFFPAGE_SIZE; - } else { - if (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx)) - == H_DUPLICATE) - rectype |= PAIR_DUPMASK; - d.data = - HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); - d.size = LEN_HDATA(dbp, hcp->page, - dbp->pgsize, hcp->indx); - } - - if ((ret = __ham_insdel_log(dbp, - dbc->txn, &new_lsn, 0, rectype, PGNO(next_pagep), - (u_int32_t)NUM_ENT(next_pagep), &LSN(next_pagep), - &k, &d)) != 0) { - (void)mpf->put(mpf, next_pagep, 0); - return (ret); - } - } else - LSN_NOT_LOGGED(new_lsn); - - /* Move lsn onto page. */ - LSN(next_pagep) = new_lsn; /* Structure assignment. */ - - __ham_copy_item(dbp, hcp->page, H_KEYINDEX(hcp->indx), next_pagep); - __ham_copy_item(dbp, hcp->page, H_DATAINDEX(hcp->indx), next_pagep); - - /* - * We've just manually inserted a key and set of data onto - * next_pagep; however, it's possible that our caller will - * return without further modifying the new page, for instance - * if DB_NODUPDATA is set and our new item is a duplicate duplicate. - * Thus, to be on the safe side, we need to mark the page dirty - * here. [#2996] - * - * Note that __ham_del_pair should dirty the page we're moving - * the items from, so we need only dirty the new page ourselves. - */ - if ((ret = mpf->set(mpf, next_pagep, DB_MPOOL_DIRTY)) != 0) - goto out; - - /* Update all cursors that used to point to this item. */ - if ((ret = __ham_c_chgpg(dbc, PGNO(hcp->page), H_KEYINDEX(hcp->indx), - PGNO(next_pagep), NUM_ENT(next_pagep) - 2)) != 0) - goto out; - - /* Now delete the pair from the current page. */ - ret = __ham_del_pair(dbc, 0); - - /* - * __ham_del_pair decremented nelem. This is incorrect; we - * manually copied the element elsewhere, so the total number - * of elements hasn't changed. Increment it again. - * - * !!! - * Note that we still have the metadata page pinned, and - * __ham_del_pair dirtied it, so we don't need to set the dirty - * flag again. - */ - if (!STD_LOCKING(dbc)) - hcp->hdr->nelem++; - -out: - (void)mpf->put(mpf, hcp->page, DB_MPOOL_DIRTY); - hcp->page = next_pagep; - hcp->pgno = PGNO(hcp->page); - hcp->indx = NUM_ENT(hcp->page) - 2; - F_SET(hcp, H_EXPAND); - F_CLR(hcp, H_DELETED); - - return (ret); -} - -/* - * __ham_move_offpage -- - * Replace an onpage set of duplicates with the OFFDUP structure - * that references the duplicate page. - * - * XXX - * This is really just a special case of __onpage_replace; we should - * probably combine them. - * - */ -static int -__ham_move_offpage(dbc, pagep, ndx, pgno) - DBC *dbc; - PAGE *pagep; - u_int32_t ndx; - db_pgno_t pgno; -{ - DB *dbp; - DBT new_dbt; - DBT old_dbt; - HOFFDUP od; - db_indx_t i, *inp; - int32_t shrink; - u_int8_t *src; - int ret; - - dbp = dbc->dbp; - od.type = H_OFFDUP; - UMRW_SET(od.unused[0]); - UMRW_SET(od.unused[1]); - UMRW_SET(od.unused[2]); - od.pgno = pgno; - ret = 0; - - if (DBC_LOGGING(dbc)) { - new_dbt.data = &od; - new_dbt.size = HOFFDUP_SIZE; - old_dbt.data = P_ENTRY(dbp, pagep, ndx); - old_dbt.size = LEN_HITEM(dbp, pagep, dbp->pgsize, ndx); - if ((ret = __ham_replace_log(dbp, dbc->txn, &LSN(pagep), 0, - PGNO(pagep), (u_int32_t)ndx, &LSN(pagep), -1, - &old_dbt, &new_dbt, 0)) != 0) - return (ret); - } else - LSN_NOT_LOGGED(LSN(pagep)); - - shrink = LEN_HITEM(dbp, pagep, dbp->pgsize, ndx) - HOFFDUP_SIZE; - inp = P_INP(dbp, pagep); - - if (shrink != 0) { - /* Copy data. */ - src = (u_int8_t *)(pagep) + HOFFSET(pagep); - memmove(src + shrink, src, inp[ndx] - HOFFSET(pagep)); - HOFFSET(pagep) += shrink; - - /* Update index table. */ - for (i = ndx; i < NUM_ENT(pagep); i++) - inp[i] += shrink; - } - - /* Now copy the offdup entry onto the page. */ - memcpy(P_ENTRY(dbp, pagep, ndx), &od, HOFFDUP_SIZE); - return (ret); -} - -/* - * __ham_dsearch: - * Locate a particular duplicate in a duplicate set. Make sure that - * we exit with the cursor set appropriately. - * - * PUBLIC: void __ham_dsearch - * PUBLIC: __P((DBC *, DBT *, u_int32_t *, int *, u_int32_t)); - */ -void -__ham_dsearch(dbc, dbt, offp, cmpp, flags) - DBC *dbc; - DBT *dbt; - u_int32_t *offp, flags; - int *cmpp; -{ - DB *dbp; - HASH_CURSOR *hcp; - DBT cur; - db_indx_t i, len; - int (*func) __P((DB *, const DBT *, const DBT *)); - u_int8_t *data; - - dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; - func = dbp->dup_compare == NULL ? __bam_defcmp : dbp->dup_compare; - - i = F_ISSET(hcp, H_CONTINUE) ? hcp->dup_off: 0; - data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)) + i; - hcp->dup_tlen = LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); - while (i < hcp->dup_tlen) { - memcpy(&len, data, sizeof(db_indx_t)); - data += sizeof(db_indx_t); - cur.data = data; - cur.size = (u_int32_t)len; - - /* - * If we find an exact match, we're done. If in a sorted - * duplicate set and the item is larger than our test item, - * we're done. In the latter case, if permitting partial - * matches, it's not a failure. - */ - *cmpp = func(dbp, dbt, &cur); - if (*cmpp == 0) - break; - if (*cmpp < 0 && dbp->dup_compare != NULL) { - if (flags == DB_GET_BOTH_RANGE) - *cmpp = 0; - break; - } - - i += len + 2 * sizeof(db_indx_t); - data += len + sizeof(db_indx_t); - } - - *offp = i; - hcp->dup_off = i; - hcp->dup_len = len; - F_SET(hcp, H_ISDUP); -} - -#ifdef DEBUG -/* - * __ham_cprint -- - * Display the current cursor list. - * - * PUBLIC: void __ham_cprint __P((DBC *)); - */ -void -__ham_cprint(dbc) - DBC *dbc; -{ - HASH_CURSOR *cp; - - cp = (HASH_CURSOR *)dbc->internal; - - fprintf(stderr, "%#0lx->%#0lx: page: %lu index: %lu", - P_TO_ULONG(dbc), P_TO_ULONG(cp), (u_long)cp->pgno, - (u_long)cp->indx); - if (F_ISSET(cp, H_DELETED)) - fprintf(stderr, " (deleted)"); - fprintf(stderr, "\n"); -} -#endif /* DEBUG */ - -/* - * __ham_dcursor -- - * - * Create an off page duplicate cursor for this cursor. - */ -static int -__ham_dcursor(dbc, pgno, indx) - DBC *dbc; - db_pgno_t pgno; - u_int32_t indx; -{ - DB *dbp; - HASH_CURSOR *hcp; - BTREE_CURSOR *dcp; - int ret; - - dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; - - if ((ret = __db_c_newopd(dbc, pgno, hcp->opd, &hcp->opd)) != 0) - return (ret); - - dcp = (BTREE_CURSOR *)hcp->opd->internal; - dcp->pgno = pgno; - dcp->indx = indx; - - if (dbp->dup_compare == NULL) { - /* - * Converting to off-page Recno trees is tricky. The - * record number for the cursor is the index + 1 (to - * convert to 1-based record numbers). - */ - dcp->recno = indx + 1; - } - - /* - * Transfer the deleted flag from the top-level cursor to the - * created one. - */ - if (F_ISSET(hcp, H_DELETED)) { - F_SET(dcp, C_DELETED); - F_CLR(hcp, H_DELETED); - } - - return (0); -} - -/* - * __ham_c_chgpg -- - * Adjust the cursors after moving an item to a new page. We only - * move cursors that are pointing at this one item and are not - * deleted; since we only touch non-deleted cursors, and since - * (by definition) no item existed at the pgno/indx we're moving the - * item to, we're guaranteed that all the cursors we affect here or - * on abort really do refer to this one item. - */ -static int -__ham_c_chgpg(dbc, old_pgno, old_index, new_pgno, new_index) - DBC *dbc; - db_pgno_t old_pgno, new_pgno; - u_int32_t old_index, new_index; -{ - DB *dbp, *ldbp; - DB_ENV *dbenv; - DB_LSN lsn; - DB_TXN *my_txn; - DBC *cp; - HASH_CURSOR *hcp; - int found, ret; - - dbp = dbc->dbp; - dbenv = dbp->dbenv; - - my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL; - found = 0; - - MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp); - for (ldbp = __dblist_get(dbenv, dbp->adj_fileid); - ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; - ldbp = LIST_NEXT(ldbp, dblistlinks)) { - MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); - for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL; - cp = TAILQ_NEXT(cp, links)) { - if (cp == dbc || cp->dbtype != DB_HASH) - continue; - - hcp = (HASH_CURSOR *)cp->internal; - - /* - * If a cursor is deleted, it doesn't refer to this - * item--it just happens to have the same indx, but - * it points to a former neighbor. Don't move it. - */ - if (F_ISSET(hcp, H_DELETED)) - continue; - - if (hcp->pgno == old_pgno) { - if (hcp->indx == old_index) { - hcp->pgno = new_pgno; - hcp->indx = new_index; - } else - continue; - if (my_txn != NULL && cp->txn != my_txn) - found = 1; - } - } - MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); - } - MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); - - if (found != 0 && DBC_LOGGING(dbc)) { - if ((ret = __ham_chgpg_log(dbp, my_txn, &lsn, 0, DB_HAM_CHGPG, - old_pgno, new_pgno, old_index, new_index)) != 0) - return (ret); - } - return (0); -} diff --git a/bdb/hash/hash_func.c b/bdb/hash/hash_func.c deleted file mode 100644 index c6cc2ad4460..00000000000 --- a/bdb/hash/hash_func.c +++ /dev/null @@ -1,245 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2002 - * Sleepycat Software. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993 - * Margo Seltzer. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "db_config.h" - -#ifndef lint -static const char revid[] = "$Id: hash_func.c,v 11.12 2002/03/28 19:49:42 bostic Exp $"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> -#endif - -#include "db_int.h" - -/* - * __ham_func2 -- - * Phong Vo's linear congruential hash. - * - * PUBLIC: u_int32_t __ham_func2 __P((DB *, const void *, u_int32_t)); - */ -#define DCHARHASH(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c)) - -u_int32_t -__ham_func2(dbp, key, len) - DB *dbp; - const void *key; - u_int32_t len; -{ - const u_int8_t *e, *k; - u_int32_t h; - u_int8_t c; - - if (dbp != NULL) - COMPQUIET(dbp, NULL); - - k = key; - e = k + len; - for (h = 0; k != e;) { - c = *k++; - if (!c && k > e) - break; - DCHARHASH(h, c); - } - return (h); -} - -/* - * __ham_func3 -- - * Ozan Yigit's original sdbm hash. - * - * Ugly, but fast. Break the string up into 8 byte units. On the first time - * through the loop get the "leftover bytes" (strlen % 8). On every other - * iteration, perform 8 HASHC's so we handle all 8 bytes. Essentially, this - * saves us 7 cmp & branch instructions. - * - * PUBLIC: u_int32_t __ham_func3 __P((DB *, const void *, u_int32_t)); - */ -u_int32_t -__ham_func3(dbp, key, len) - DB *dbp; - const void *key; - u_int32_t len; -{ - const u_int8_t *k; - u_int32_t n, loop; - - if (dbp != NULL) - COMPQUIET(dbp, NULL); - - if (len == 0) - return (0); - -#define HASHC n = *k++ + 65599 * n - n = 0; - k = key; - - loop = (len + 8 - 1) >> 3; - switch (len & (8 - 1)) { - case 0: - do { - HASHC; - case 7: - HASHC; - case 6: - HASHC; - case 5: - HASHC; - case 4: - HASHC; - case 3: - HASHC; - case 2: - HASHC; - case 1: - HASHC; - } while (--loop); - } - return (n); -} - -/* - * __ham_func4 -- - * Chris Torek's hash function. Although this function performs only - * slightly worse than __ham_func5 on strings, it performs horribly on - * numbers. - * - * PUBLIC: u_int32_t __ham_func4 __P((DB *, const void *, u_int32_t)); - */ -u_int32_t -__ham_func4(dbp, key, len) - DB *dbp; - const void *key; - u_int32_t len; -{ - const u_int8_t *k; - u_int32_t h, loop; - - if (dbp != NULL) - COMPQUIET(dbp, NULL); - - if (len == 0) - return (0); - -#define HASH4a h = (h << 5) - h + *k++; -#define HASH4b h = (h << 5) + h + *k++; -#define HASH4 HASH4b - h = 0; - k = key; - - loop = (len + 8 - 1) >> 3; - switch (len & (8 - 1)) { - case 0: - do { - HASH4; - case 7: - HASH4; - case 6: - HASH4; - case 5: - HASH4; - case 4: - HASH4; - case 3: - HASH4; - case 2: - HASH4; - case 1: - HASH4; - } while (--loop); - } - return (h); -} - -/* - * Fowler/Noll/Vo hash - * - * The basis of the hash algorithm was taken from an idea sent by email to the - * IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and - * Glenn Fowler (gsf@research.att.com). Landon Curt Noll (chongo@toad.com) - * later improved on their algorithm. - * - * The magic is in the interesting relationship between the special prime - * 16777619 (2^24 + 403) and 2^32 and 2^8. - * - * This hash produces the fewest collisions of any function that we've seen so - * far, and works well on both numbers and strings. - * - * PUBLIC: u_int32_t __ham_func5 __P((DB *, const void *, u_int32_t)); - */ -u_int32_t -__ham_func5(dbp, key, len) - DB *dbp; - const void *key; - u_int32_t len; -{ - const u_int8_t *k, *e; - u_int32_t h; - - if (dbp != NULL) - COMPQUIET(dbp, NULL); - - k = key; - e = k + len; - for (h = 0; k < e; ++k) { - h *= 16777619; - h ^= *k; - } - return (h); -} - -/* - * __ham_test -- - * - * PUBLIC: u_int32_t __ham_test __P((DB *, const void *, u_int32_t)); - */ -u_int32_t -__ham_test(dbp, key, len) - DB *dbp; - const void *key; - u_int32_t len; -{ - COMPQUIET(dbp, NULL); - COMPQUIET(len, 0); - return ((u_int32_t)*(char *)key); -} diff --git a/bdb/hash/hash_meta.c b/bdb/hash/hash_meta.c deleted file mode 100644 index 9f224454869..00000000000 --- a/bdb/hash/hash_meta.c +++ /dev/null @@ -1,125 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1999-2002 - * Sleepycat Software. All rights reserved. - */ - -#include "db_config.h" - -#ifndef lint -static const char revid[] = "$Id: hash_meta.c,v 11.19 2002/06/03 14:22:15 ubell Exp $"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> -#endif - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_shash.h" -#include "dbinc/hash.h" -#include "dbinc/lock.h" - -/* - * Acquire the meta-data page. - * - * PUBLIC: int __ham_get_meta __P((DBC *)); - */ -int -__ham_get_meta(dbc) - DBC *dbc; -{ - DB *dbp; - DB_ENV *dbenv; - DB_MPOOLFILE *mpf; - HASH *hashp; - HASH_CURSOR *hcp; - int ret; - - dbp = dbc->dbp; - dbenv = dbp->dbenv; - mpf = dbp->mpf; - hashp = dbp->h_internal; - hcp = (HASH_CURSOR *)dbc->internal; - - if (dbenv != NULL && - STD_LOCKING(dbc) && !F_ISSET(dbc, DBC_RECOVER | DBC_COMPENSATE)) { - dbc->lock.pgno = hashp->meta_pgno; - if ((ret = dbenv->lock_get(dbenv, dbc->locker, - DB_NONBLOCK(dbc) ? DB_LOCK_NOWAIT : 0, - &dbc->lock_dbt, DB_LOCK_READ, &hcp->hlock)) != 0) - return (ret); - } - - if ((ret = mpf->get(mpf, - &hashp->meta_pgno, DB_MPOOL_CREATE, &(hcp->hdr))) != 0 && - LOCK_ISSET(hcp->hlock)) - (void)dbenv->lock_put(dbenv, &hcp->hlock); - - return (ret); -} - -/* - * Release the meta-data page. - * - * PUBLIC: int __ham_release_meta __P((DBC *)); - */ -int -__ham_release_meta(dbc) - DBC *dbc; -{ - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - - mpf = dbc->dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - if (hcp->hdr) - (void)mpf->put(mpf, hcp->hdr, - F_ISSET(hcp, H_DIRTY) ? DB_MPOOL_DIRTY : 0); - hcp->hdr = NULL; - if (!F_ISSET(dbc, DBC_RECOVER | DBC_COMPENSATE) && - dbc->txn == NULL && LOCK_ISSET(hcp->hlock)) - (void)dbc->dbp->dbenv->lock_put(dbc->dbp->dbenv, &hcp->hlock); - F_CLR(hcp, H_DIRTY); - - return (0); -} - -/* - * Mark the meta-data page dirty. - * - * PUBLIC: int __ham_dirty_meta __P((DBC *)); - */ -int -__ham_dirty_meta(dbc) - DBC *dbc; -{ - DB *dbp; - DB_ENV *dbenv; - DB_LOCK _tmp; - HASH *hashp; - HASH_CURSOR *hcp; - int ret; - - dbp = dbc->dbp; - hashp = dbp->h_internal; - hcp = (HASH_CURSOR *)dbc->internal; - - ret = 0; - if (STD_LOCKING(dbc) && !F_ISSET(dbc, DBC_RECOVER | DBC_COMPENSATE)) { - dbenv = dbp->dbenv; - dbc->lock.pgno = hashp->meta_pgno; - if ((ret = dbenv->lock_get(dbenv, dbc->locker, - DB_NONBLOCK(dbc) ? DB_LOCK_NOWAIT : 0, - &dbc->lock_dbt, DB_LOCK_WRITE, &_tmp)) == 0) { - ret = dbenv->lock_put(dbenv, &hcp->hlock); - hcp->hlock = _tmp; - } - } - - if (ret == 0) - F_SET(hcp, H_DIRTY); - return (ret); -} diff --git a/bdb/hash/hash_method.c b/bdb/hash/hash_method.c deleted file mode 100644 index 9a6bf59536a..00000000000 --- a/bdb/hash/hash_method.c +++ /dev/null @@ -1,126 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1999-2002 - * Sleepycat Software. All rights reserved. - */ - -#include "db_config.h" - -#ifndef lint -static const char revid[] = "$Id: hash_method.c,v 11.12 2002/03/27 04:32:12 bostic Exp $"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> -#endif - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/hash.h" - -static int __ham_set_h_ffactor __P((DB *, u_int32_t)); -static int __ham_set_h_hash - __P((DB *, u_int32_t(*)(DB *, const void *, u_int32_t))); -static int __ham_set_h_nelem __P((DB *, u_int32_t)); - -/* - * __ham_db_create -- - * Hash specific initialization of the DB structure. - * - * PUBLIC: int __ham_db_create __P((DB *)); - */ -int -__ham_db_create(dbp) - DB *dbp; -{ - HASH *hashp; - int ret; - - if ((ret = __os_malloc(dbp->dbenv, - sizeof(HASH), &dbp->h_internal)) != 0) - return (ret); - - hashp = dbp->h_internal; - - hashp->h_nelem = 0; /* Defaults. */ - hashp->h_ffactor = 0; - hashp->h_hash = NULL; - - dbp->set_h_ffactor = __ham_set_h_ffactor; - dbp->set_h_hash = __ham_set_h_hash; - dbp->set_h_nelem = __ham_set_h_nelem; - - return (0); -} - -/* - * PUBLIC: int __ham_db_close __P((DB *)); - */ -int -__ham_db_close(dbp) - DB *dbp; -{ - if (dbp->h_internal == NULL) - return (0); - __os_free(dbp->dbenv, dbp->h_internal); - dbp->h_internal = NULL; - return (0); -} - -/* - * __ham_set_h_ffactor -- - * Set the fill factor. - */ -static int -__ham_set_h_ffactor(dbp, h_ffactor) - DB *dbp; - u_int32_t h_ffactor; -{ - HASH *hashp; - - DB_ILLEGAL_AFTER_OPEN(dbp, "set_h_ffactor"); - DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); - - hashp = dbp->h_internal; - hashp->h_ffactor = h_ffactor; - return (0); -} - -/* - * __ham_set_h_hash -- - * Set the hash function. - */ -static int -__ham_set_h_hash(dbp, func) - DB *dbp; - u_int32_t (*func) __P((DB *, const void *, u_int32_t)); -{ - HASH *hashp; - - DB_ILLEGAL_AFTER_OPEN(dbp, "set_h_hash"); - DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); - - hashp = dbp->h_internal; - hashp->h_hash = func; - return (0); -} - -/* - * __ham_set_h_nelem -- - * Set the table size. - */ -static int -__ham_set_h_nelem(dbp, h_nelem) - DB *dbp; - u_int32_t h_nelem; -{ - HASH *hashp; - - DB_ILLEGAL_AFTER_OPEN(dbp, "set_h_nelem"); - DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); - - hashp = dbp->h_internal; - hashp->h_nelem = h_nelem; - return (0); -} diff --git a/bdb/hash/hash_open.c b/bdb/hash/hash_open.c deleted file mode 100644 index f976f5b6816..00000000000 --- a/bdb/hash/hash_open.c +++ /dev/null @@ -1,558 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2002 - * Sleepycat Software. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994 - * Margo Seltzer. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "db_config.h" - -#ifndef lint -static const char revid[] = "$Id: hash_open.c,v 11.175 2002/09/04 19:06:44 margo Exp $"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <stdlib.h> -#include <string.h> -#endif - -#include "db_int.h" -#include "dbinc/crypto.h" -#include "dbinc/db_page.h" -#include "dbinc/hash.h" -#include "dbinc/log.h" -#include "dbinc/db_shash.h" -#include "dbinc/lock.h" -#include "dbinc/db_swap.h" -#include "dbinc/btree.h" -#include "dbinc/fop.h" - -static db_pgno_t __ham_init_meta __P((DB *, HMETA *, db_pgno_t, DB_LSN *)); - -/* - * __ham_open -- - * - * PUBLIC: int __ham_open __P((DB *, - * PUBLIC: DB_TXN *, const char * name, db_pgno_t, u_int32_t)); - */ -int -__ham_open(dbp, txn, name, base_pgno, flags) - DB *dbp; - DB_TXN *txn; - const char *name; - db_pgno_t base_pgno; - u_int32_t flags; -{ - DB_ENV *dbenv; - DBC *dbc; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - HASH *hashp; - int ret, t_ret; - - COMPQUIET(name, NULL); - dbenv = dbp->dbenv; - dbc = NULL; - mpf = dbp->mpf; - - /* Initialize the remaining fields/methods of the DB. */ - dbp->stat = __ham_stat; - - /* - * Get a cursor. If DB_CREATE is specified, we may be creating - * pages, and to do that safely in CDB we need a write cursor. - * In STD_LOCKING mode, we'll synchronize using the meta page - * lock instead. - */ - if ((ret = dbp->cursor(dbp, - txn, &dbc, LF_ISSET(DB_CREATE) && CDB_LOCKING(dbenv) ? - DB_WRITECURSOR : 0)) != 0) - return (ret); - - hcp = (HASH_CURSOR *)dbc->internal; - hashp = dbp->h_internal; - hashp->meta_pgno = base_pgno; - if ((ret = __ham_get_meta(dbc)) != 0) - goto err1; - - /* Initialize the hdr structure. */ - if (hcp->hdr->dbmeta.magic == DB_HASHMAGIC) { - /* File exists, verify the data in the header. */ - if (hashp->h_hash == NULL) - hashp->h_hash = hcp->hdr->dbmeta.version < 5 - ? __ham_func4 : __ham_func5; - if (!F_ISSET(dbp, DB_AM_RDONLY) && !IS_RECOVERING(dbenv) && - hashp->h_hash(dbp, - CHARKEY, sizeof(CHARKEY)) != hcp->hdr->h_charkey) { - __db_err(dbp->dbenv, - "hash: incompatible hash function"); - ret = EINVAL; - goto err2; - } - if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUP)) - F_SET(dbp, DB_AM_DUP); - if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUPSORT)) - F_SET(dbp, DB_AM_DUPSORT); - if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_SUBDB)) - F_SET(dbp, DB_AM_SUBDB); - - /* We must initialize last_pgno, it could be stale. */ - if (!F_ISSET(dbp, DB_AM_RDONLY) && - dbp->meta_pgno == PGNO_BASE_MD) { - if ((ret = __ham_dirty_meta(dbc)) != 0) - goto err2; - mpf->last_pgno(mpf, &hcp->hdr->dbmeta.last_pgno); - } - } else if (!IS_RECOVERING(dbenv) && !F_ISSET(dbp, DB_AM_RECOVER)) - DB_ASSERT(0); - -err2: /* Release the meta data page */ - if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) - ret = t_ret; -err1: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __ham_metachk -- - * - * PUBLIC: int __ham_metachk __P((DB *, const char *, HMETA *)); - */ -int -__ham_metachk(dbp, name, hashm) - DB *dbp; - const char *name; - HMETA *hashm; -{ - DB_ENV *dbenv; - u_int32_t vers; - int ret; - - dbenv = dbp->dbenv; - - /* - * At this point, all we know is that the magic number is for a Hash. - * Check the version, the database may be out of date. - */ - vers = hashm->dbmeta.version; - if (F_ISSET(dbp, DB_AM_SWAP)) - M_32_SWAP(vers); - switch (vers) { - case 4: - case 5: - case 6: - __db_err(dbenv, - "%s: hash version %lu requires a version upgrade", - name, (u_long)vers); - return (DB_OLD_VERSION); - case 7: - case 8: - break; - default: - __db_err(dbenv, - "%s: unsupported hash version: %lu", name, (u_long)vers); - return (EINVAL); - } - - /* Swap the page if we need to. */ - if (F_ISSET(dbp, DB_AM_SWAP) && (ret = __ham_mswap((PAGE *)hashm)) != 0) - return (ret); - - /* Check the type. */ - if (dbp->type != DB_HASH && dbp->type != DB_UNKNOWN) - return (EINVAL); - dbp->type = DB_HASH; - DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); - - /* - * Check application info against metadata info, and set info, flags, - * and type based on metadata info. - */ - if ((ret = __db_fchk(dbenv, - "DB->open", hashm->dbmeta.flags, - DB_HASH_DUP | DB_HASH_SUBDB | DB_HASH_DUPSORT)) != 0) - return (ret); - - if (F_ISSET(&hashm->dbmeta, DB_HASH_DUP)) - F_SET(dbp, DB_AM_DUP); - else - if (F_ISSET(dbp, DB_AM_DUP)) { - __db_err(dbenv, - "%s: DB_DUP specified to open method but not set in database", - name); - return (EINVAL); - } - - if (F_ISSET(&hashm->dbmeta, DB_HASH_SUBDB)) - F_SET(dbp, DB_AM_SUBDB); - else - if (F_ISSET(dbp, DB_AM_SUBDB)) { - __db_err(dbenv, - "%s: multiple databases specified but not supported in file", - name); - return (EINVAL); - } - - if (F_ISSET(&hashm->dbmeta, DB_HASH_DUPSORT)) { - if (dbp->dup_compare == NULL) - dbp->dup_compare = __bam_defcmp; - } else - if (dbp->dup_compare != NULL) { - __db_err(dbenv, - "%s: duplicate sort function specified but not set in database", - name); - return (EINVAL); - } - - /* Set the page size. */ - dbp->pgsize = hashm->dbmeta.pagesize; - - /* Copy the file's ID. */ - memcpy(dbp->fileid, hashm->dbmeta.uid, DB_FILE_ID_LEN); - - return (0); -} - -/* - * __ham_init_meta -- - * - * Initialize a hash meta-data page. We assume that the meta-data page is - * contiguous with the initial buckets that we create. If that turns out - * to be false, we'll fix it up later. Return the initial number of buckets - * allocated. - */ -static db_pgno_t -__ham_init_meta(dbp, meta, pgno, lsnp) - DB *dbp; - HMETA *meta; - db_pgno_t pgno; - DB_LSN *lsnp; -{ - HASH *hashp; - db_pgno_t nbuckets; - int i; - int32_t l2; - - hashp = dbp->h_internal; - if (hashp->h_hash == NULL) - hashp->h_hash = DB_HASHVERSION < 5 ? __ham_func4 : __ham_func5; - - if (hashp->h_nelem != 0 && hashp->h_ffactor != 0) { - hashp->h_nelem = (hashp->h_nelem - 1) / hashp->h_ffactor + 1; - l2 = __db_log2(hashp->h_nelem > 2 ? hashp->h_nelem : 2); - } else - l2 = 1; - nbuckets = (db_pgno_t)(1 << l2); - - memset(meta, 0, sizeof(HMETA)); - meta->dbmeta.lsn = *lsnp; - meta->dbmeta.pgno = pgno; - meta->dbmeta.magic = DB_HASHMAGIC; - meta->dbmeta.version = DB_HASHVERSION; - meta->dbmeta.pagesize = dbp->pgsize; - if (F_ISSET(dbp, DB_AM_CHKSUM)) - FLD_SET(meta->dbmeta.metaflags, DBMETA_CHKSUM); - if (F_ISSET(dbp, DB_AM_ENCRYPT)) { - meta->dbmeta.encrypt_alg = - ((DB_CIPHER *)dbp->dbenv->crypto_handle)->alg; - DB_ASSERT(meta->dbmeta.encrypt_alg != 0); - meta->crypto_magic = meta->dbmeta.magic; - } - meta->dbmeta.type = P_HASHMETA; - meta->dbmeta.free = PGNO_INVALID; - meta->dbmeta.last_pgno = pgno; - meta->max_bucket = nbuckets - 1; - meta->high_mask = nbuckets - 1; - meta->low_mask = (nbuckets >> 1) - 1; - meta->ffactor = hashp->h_ffactor; - meta->h_charkey = hashp->h_hash(dbp, CHARKEY, sizeof(CHARKEY)); - memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN); - - if (F_ISSET(dbp, DB_AM_DUP)) - F_SET(&meta->dbmeta, DB_HASH_DUP); - if (F_ISSET(dbp, DB_AM_SUBDB)) - F_SET(&meta->dbmeta, DB_HASH_SUBDB); - if (dbp->dup_compare != NULL) - F_SET(&meta->dbmeta, DB_HASH_DUPSORT); - - /* - * Create the first and second buckets pages so that we have the - * page numbers for them and we can store that page number in the - * meta-data header (spares[0]). - */ - meta->spares[0] = pgno + 1; - - /* Fill in the last fields of the meta data page. */ - for (i = 1; i <= l2; i++) - meta->spares[i] = meta->spares[0]; - for (; i < NCACHED; i++) - meta->spares[i] = PGNO_INVALID; - - return (nbuckets); -} - -/* - * __ham_new_file -- - * Create the necessary pages to begin a new database file. If name - * is NULL, then this is an unnamed file, the mpf has been set in the dbp - * and we simply create the pages using mpool. In this case, we don't log - * because we never have to redo an unnamed create and the undo simply - * frees resources. - * - * This code appears more complex than it is because of the two cases (named - * and unnamed). The way to read the code is that for each page being created, - * there are three parts: 1) a "get page" chunk (which either uses malloc'd - * memory or calls mpf->get), 2) the initialization, and 3) the "put page" - * chunk which either does a fop write or an mpf->put. - * - * PUBLIC: int __ham_new_file __P((DB *, DB_TXN *, DB_FH *, const char *)); - */ -int -__ham_new_file(dbp, txn, fhp, name) - DB *dbp; - DB_TXN *txn; - DB_FH *fhp; - const char *name; -{ - DB_ENV *dbenv; - DB_LSN lsn; - DB_MPOOLFILE *mpf; - DB_PGINFO pginfo; - DBT pdbt; - HMETA *meta; - PAGE *page; - int ret; - db_pgno_t lpgno; - void *buf; - - dbenv = dbp->dbenv; - mpf = dbp->mpf; - meta = NULL; - page = NULL; - memset(&pdbt, 0, sizeof(pdbt)); - - /* Build meta-data page. */ - if (name == NULL) { - lpgno = PGNO_BASE_MD; - ret = mpf->get(mpf, &lpgno, DB_MPOOL_CREATE, &meta); - } else { - pginfo.db_pagesize = dbp->pgsize; - pginfo.type = dbp->type; - pginfo.flags = - F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); - pdbt.data = &pginfo; - pdbt.size = sizeof(pginfo); - ret = __os_calloc(dbp->dbenv, 1, dbp->pgsize, &buf); - meta = (HMETA *)buf; - } - if (ret != 0) - return (ret); - - LSN_NOT_LOGGED(lsn); - lpgno = __ham_init_meta(dbp, meta, PGNO_BASE_MD, &lsn); - meta->dbmeta.last_pgno = lpgno; - - if (name == NULL) - ret = mpf->put(mpf, meta, DB_MPOOL_DIRTY); - else { - if ((ret = __db_pgout(dbenv, PGNO_BASE_MD, meta, &pdbt)) != 0) - goto err; - ret = __fop_write(dbenv, txn, name, - DB_APP_DATA, fhp, 0, buf, dbp->pgsize, 1); - } - if (ret != 0) - goto err; - meta = NULL; - - /* Now allocate the final hash bucket. */ - if (name == NULL) { - if ((ret = mpf->get(mpf, &lpgno, DB_MPOOL_CREATE, &page)) != 0) - goto err; - } else { -#ifdef DIAGNOSTIC - memset(buf, 0, dbp->pgsize); -#endif - page = (PAGE *)buf; - } - - P_INIT(page, dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - LSN_NOT_LOGGED(page->lsn); - - if (name == NULL) - ret = mpf->put(mpf, page, DB_MPOOL_DIRTY); - else { - if ((ret = __db_pgout(dbenv, lpgno, buf, &pdbt)) != 0) - goto err; - ret = __fop_write(dbenv, txn, name, - DB_APP_DATA, fhp, lpgno * dbp->pgsize, buf, dbp->pgsize, 1); - } - if (ret != 0) - goto err; - page = NULL; - -err: if (name != NULL) - __os_free(dbenv, buf); - else { - if (meta != NULL) - (void)mpf->put(mpf, meta, 0); - if (page != NULL) - (void)mpf->put(mpf, page, 0); - } - return (ret); -} - -/* - * __ham_new_subdb -- - * Create the necessary pages to begin a new subdatabase. - * - * PUBLIC: int __ham_new_subdb __P((DB *, DB *, DB_TXN *)); - */ -int -__ham_new_subdb(mdbp, dbp, txn) - DB *mdbp, *dbp; - DB_TXN *txn; -{ - DBC *dbc; - DB_ENV *dbenv; - DB_LOCK metalock, mmlock; - DB_LSN lsn; - DB_MPOOLFILE *mpf; - DBMETA *mmeta; - HMETA *meta; - PAGE *h; - int i, ret, t_ret; - db_pgno_t lpgno, mpgno; - - dbenv = mdbp->dbenv; - mpf = mdbp->mpf; - dbc = NULL; - meta = NULL; - mmeta = NULL; - LOCK_INIT(metalock); - LOCK_INIT(mmlock); - - if ((ret = mdbp->cursor(mdbp, txn, - &dbc, CDB_LOCKING(dbenv) ? DB_WRITECURSOR : 0)) != 0) - return (ret); - - /* Get and lock the new meta data page. */ - if ((ret = __db_lget(dbc, - 0, dbp->meta_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) - goto err; - if ((ret = mpf->get(mpf, &dbp->meta_pgno, DB_MPOOL_CREATE, &meta)) != 0) - goto err; - - /* Initialize the new meta-data page. */ - lsn = meta->dbmeta.lsn; - lpgno = __ham_init_meta(dbp, meta, dbp->meta_pgno, &lsn); - - /* - * We are about to allocate a set of contiguous buckets (lpgno - * worth). We need to get the master meta-data page to figure - * out where these pages are and to allocate them. So, lock and - * get the master meta data page. - */ - mpgno = PGNO_BASE_MD; - if ((ret = __db_lget(dbc, 0, mpgno, DB_LOCK_WRITE, 0, &mmlock)) != 0) - goto err; - if ((ret = mpf->get(mpf, &mpgno, 0, &mmeta)) != 0) - goto err; - - /* - * Now update the hash meta-data page to reflect where the first - * set of buckets are actually located. - */ - meta->spares[0] = mmeta->last_pgno + 1; - for (i = 0; i < NCACHED && meta->spares[i] != PGNO_INVALID; i++) - meta->spares[i] = meta->spares[0]; - - /* The new meta data page is now complete; log it. */ - if ((ret = __db_log_page(mdbp, - txn, &meta->dbmeta.lsn, dbp->meta_pgno, (PAGE *)meta)) != 0) - goto err; - - /* Reflect the group allocation. */ - if (DBENV_LOGGING(dbenv)) - if ((ret = __ham_groupalloc_log(mdbp, txn, - &LSN(mmeta), 0, &LSN(mmeta), - meta->spares[0], meta->max_bucket + 1, mmeta->free)) != 0) - goto err; - - /* Release the new meta-data page. */ - if ((ret = mpf->put(mpf, meta, DB_MPOOL_DIRTY)) != 0) - goto err; - meta = NULL; - - mmeta->last_pgno +=lpgno; - lpgno = mmeta->last_pgno; - - /* Now allocate the final hash bucket. */ - if ((ret = mpf->get(mpf, &lpgno, DB_MPOOL_CREATE, &h)) != 0) - goto err; - P_INIT(h, dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - LSN(h) = LSN(mmeta); - if ((ret = mpf->put(mpf, h, DB_MPOOL_DIRTY)) != 0) - goto err; - - /* Now put the master-metadata page back. */ - if ((ret = mpf->put(mpf, mmeta, DB_MPOOL_DIRTY)) != 0) - goto err; - mmeta = NULL; - -err: - if (mmeta != NULL) - if ((t_ret = mpf->put(mpf, mmeta, 0)) != 0 && ret == 0) - ret = t_ret; - if (LOCK_ISSET(mmlock)) - if ((t_ret = __LPUT(dbc, mmlock)) != 0 && ret == 0) - ret = t_ret; - if (meta != NULL) - if ((t_ret = mpf->put(mpf, meta, 0)) != 0 && ret == 0) - ret = t_ret; - if (LOCK_ISSET(metalock)) - if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) - ret = t_ret; - if (dbc != NULL) - if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} diff --git a/bdb/hash/hash_page.c b/bdb/hash/hash_page.c deleted file mode 100644 index 6788129773f..00000000000 --- a/bdb/hash/hash_page.c +++ /dev/null @@ -1,1862 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2002 - * Sleepycat Software. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994 - * Margo Seltzer. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "db_config.h" - -#ifndef lint -static const char revid[] = "$Id: hash_page.c,v 11.87 2002/08/15 02:46:20 bostic Exp $"; -#endif /* not lint */ - -/* - * PACKAGE: hashing - * - * DESCRIPTION: - * Page manipulation for hashing package. - */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <string.h> -#endif - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_shash.h" -#include "dbinc/hash.h" -#include "dbinc/lock.h" - -static int __ham_c_delpg - __P((DBC *, db_pgno_t, db_pgno_t, u_int32_t, db_ham_mode, u_int32_t *)); - -/* - * PUBLIC: int __ham_item __P((DBC *, db_lockmode_t, db_pgno_t *)); - */ -int -__ham_item(dbc, mode, pgnop) - DBC *dbc; - db_lockmode_t mode; - db_pgno_t *pgnop; -{ - DB *dbp; - HASH_CURSOR *hcp; - db_pgno_t next_pgno; - int ret; - - dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; - - if (F_ISSET(hcp, H_DELETED)) { - __db_err(dbp->dbenv, "Attempt to return a deleted item"); - return (EINVAL); - } - F_CLR(hcp, H_OK | H_NOMORE); - - /* Check if we need to get a page for this cursor. */ - if ((ret = __ham_get_cpage(dbc, mode)) != 0) - return (ret); - -recheck: - /* Check if we are looking for space in which to insert an item. */ - if (hcp->seek_size && hcp->seek_found_page == PGNO_INVALID && - hcp->seek_size < P_FREESPACE(dbp, hcp->page)) - hcp->seek_found_page = hcp->pgno; - - /* Check for off-page duplicates. */ - if (hcp->indx < NUM_ENT(hcp->page) && - HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) { - memcpy(pgnop, - HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)), - sizeof(db_pgno_t)); - F_SET(hcp, H_OK); - return (0); - } - - /* Check if we need to go on to the next page. */ - if (F_ISSET(hcp, H_ISDUP)) - /* - * ISDUP is set, and offset is at the beginning of the datum. - * We need to grab the length of the datum, then set the datum - * pointer to be the beginning of the datum. - */ - memcpy(&hcp->dup_len, - HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)) + - hcp->dup_off, sizeof(db_indx_t)); - - if (hcp->indx >= (db_indx_t)NUM_ENT(hcp->page)) { - /* Fetch next page. */ - if (NEXT_PGNO(hcp->page) == PGNO_INVALID) { - F_SET(hcp, H_NOMORE); - return (DB_NOTFOUND); - } - next_pgno = NEXT_PGNO(hcp->page); - hcp->indx = 0; - if ((ret = __ham_next_cpage(dbc, next_pgno, 0)) != 0) - return (ret); - goto recheck; - } - - F_SET(hcp, H_OK); - return (0); -} - -/* - * PUBLIC: int __ham_item_reset __P((DBC *)); - */ -int -__ham_item_reset(dbc) - DBC *dbc; -{ - DB *dbp; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - int ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - ret = 0; - if (hcp->page != NULL) - ret = mpf->put(mpf, hcp->page, 0); - - __ham_item_init(dbc); - return (ret); -} - -/* - * PUBLIC: void __ham_item_init __P((DBC *)); - */ -void -__ham_item_init(dbc) - DBC *dbc; -{ - HASH_CURSOR *hcp; - - hcp = (HASH_CURSOR *)dbc->internal; - /* - * If this cursor still holds any locks, we must - * release them if we are not running with transactions. - */ - (void)__TLPUT(dbc, hcp->lock); - - /* - * The following fields must *not* be initialized here - * because they may have meaning across inits. - * hlock, hdr, split_buf, stats - */ - hcp->bucket = BUCKET_INVALID; - hcp->lbucket = BUCKET_INVALID; - LOCK_INIT(hcp->lock); - hcp->lock_mode = DB_LOCK_NG; - hcp->dup_off = 0; - hcp->dup_len = 0; - hcp->dup_tlen = 0; - hcp->seek_size = 0; - hcp->seek_found_page = PGNO_INVALID; - hcp->flags = 0; - - hcp->pgno = PGNO_INVALID; - hcp->indx = NDX_INVALID; - hcp->page = NULL; -} - -/* - * Returns the last item in a bucket. - * - * PUBLIC: int __ham_item_last __P((DBC *, db_lockmode_t, db_pgno_t *)); - */ -int -__ham_item_last(dbc, mode, pgnop) - DBC *dbc; - db_lockmode_t mode; - db_pgno_t *pgnop; -{ - HASH_CURSOR *hcp; - int ret; - - hcp = (HASH_CURSOR *)dbc->internal; - if ((ret = __ham_item_reset(dbc)) != 0) - return (ret); - - hcp->bucket = hcp->hdr->max_bucket; - hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - F_SET(hcp, H_OK); - return (__ham_item_prev(dbc, mode, pgnop)); -} - -/* - * PUBLIC: int __ham_item_first __P((DBC *, db_lockmode_t, db_pgno_t *)); - */ -int -__ham_item_first(dbc, mode, pgnop) - DBC *dbc; - db_lockmode_t mode; - db_pgno_t *pgnop; -{ - HASH_CURSOR *hcp; - int ret; - - hcp = (HASH_CURSOR *)dbc->internal; - if ((ret = __ham_item_reset(dbc)) != 0) - return (ret); - F_SET(hcp, H_OK); - hcp->bucket = 0; - hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - return (__ham_item_next(dbc, mode, pgnop)); -} - -/* - * __ham_item_prev -- - * Returns a pointer to key/data pair on a page. In the case of - * bigkeys, just returns the page number and index of the bigkey - * pointer pair. - * - * PUBLIC: int __ham_item_prev __P((DBC *, db_lockmode_t, db_pgno_t *)); - */ -int -__ham_item_prev(dbc, mode, pgnop) - DBC *dbc; - db_lockmode_t mode; - db_pgno_t *pgnop; -{ - DB *dbp; - HASH_CURSOR *hcp; - db_pgno_t next_pgno; - int ret; - - hcp = (HASH_CURSOR *)dbc->internal; - dbp = dbc->dbp; - - /* - * There are 5 cases for backing up in a hash file. - * Case 1: In the middle of a page, no duplicates, just dec the index. - * Case 2: In the middle of a duplicate set, back up one. - * Case 3: At the beginning of a duplicate set, get out of set and - * back up to next key. - * Case 4: At the beginning of a page; go to previous page. - * Case 5: At the beginning of a bucket; go to prev bucket. - */ - F_CLR(hcp, H_OK | H_NOMORE | H_DELETED); - - if ((ret = __ham_get_cpage(dbc, mode)) != 0) - return (ret); - - /* - * First handle the duplicates. Either you'll get the key here - * or you'll exit the duplicate set and drop into the code below - * to handle backing up through keys. - */ - if (!F_ISSET(hcp, H_NEXT_NODUP) && F_ISSET(hcp, H_ISDUP)) { - if (HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == - H_OFFDUP) { - memcpy(pgnop, - HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)), - sizeof(db_pgno_t)); - F_SET(hcp, H_OK); - return (0); - } - - /* Duplicates are on-page. */ - if (hcp->dup_off != 0) { - memcpy(&hcp->dup_len, HKEYDATA_DATA( - H_PAIRDATA(dbp, hcp->page, hcp->indx)) - + hcp->dup_off - sizeof(db_indx_t), - sizeof(db_indx_t)); - hcp->dup_off -= - DUP_SIZE(hcp->dup_len); - return (__ham_item(dbc, mode, pgnop)); - } - } - - /* - * If we get here, we are not in a duplicate set, and just need - * to back up the cursor. There are still three cases: - * midpage, beginning of page, beginning of bucket. - */ - - if (F_ISSET(hcp, H_DUPONLY)) { - F_CLR(hcp, H_OK); - F_SET(hcp, H_NOMORE); - return (0); - } else - /* - * We are no longer in a dup set; flag this so the dup code - * will reinitialize should we stumble upon another one. - */ - F_CLR(hcp, H_ISDUP); - - if (hcp->indx == 0) { /* Beginning of page. */ - hcp->pgno = PREV_PGNO(hcp->page); - if (hcp->pgno == PGNO_INVALID) { - /* Beginning of bucket. */ - F_SET(hcp, H_NOMORE); - return (DB_NOTFOUND); - } else if ((ret = - __ham_next_cpage(dbc, hcp->pgno, 0)) != 0) - return (ret); - else - hcp->indx = NUM_ENT(hcp->page); - } - - /* - * Either we've got the cursor set up to be decremented, or we - * have to find the end of a bucket. - */ - if (hcp->indx == NDX_INVALID) { - DB_ASSERT(hcp->page != NULL); - - hcp->indx = NUM_ENT(hcp->page); - for (next_pgno = NEXT_PGNO(hcp->page); - next_pgno != PGNO_INVALID; - next_pgno = NEXT_PGNO(hcp->page)) { - if ((ret = __ham_next_cpage(dbc, next_pgno, 0)) != 0) - return (ret); - hcp->indx = NUM_ENT(hcp->page); - } - - if (hcp->indx == 0) { - /* Bucket was empty. */ - F_SET(hcp, H_NOMORE); - return (DB_NOTFOUND); - } - } - - hcp->indx -= 2; - - return (__ham_item(dbc, mode, pgnop)); -} - -/* - * Sets the cursor to the next key/data pair on a page. - * - * PUBLIC: int __ham_item_next __P((DBC *, db_lockmode_t, db_pgno_t *)); - */ -int -__ham_item_next(dbc, mode, pgnop) - DBC *dbc; - db_lockmode_t mode; - db_pgno_t *pgnop; -{ - HASH_CURSOR *hcp; - int ret; - - hcp = (HASH_CURSOR *)dbc->internal; - - if ((ret = __ham_get_cpage(dbc, mode)) != 0) - return (ret); - - /* - * Deleted on-page duplicates are a weird case. If we delete the last - * one, then our cursor is at the very end of a duplicate set and - * we actually need to go on to the next key. - */ - if (F_ISSET(hcp, H_DELETED)) { - if (hcp->indx != NDX_INVALID && - F_ISSET(hcp, H_ISDUP) && - HPAGE_TYPE(dbc->dbp, hcp->page, H_DATAINDEX(hcp->indx)) - == H_DUPLICATE && hcp->dup_tlen == hcp->dup_off) { - if (F_ISSET(hcp, H_DUPONLY)) { - F_CLR(hcp, H_OK); - F_SET(hcp, H_NOMORE); - return (0); - } else { - F_CLR(hcp, H_ISDUP); - hcp->indx += 2; - } - } else if (!F_ISSET(hcp, H_ISDUP) && F_ISSET(hcp, H_DUPONLY)) { - F_CLR(hcp, H_OK); - F_SET(hcp, H_NOMORE); - return (0); - } else if (F_ISSET(hcp, H_ISDUP) && - F_ISSET(hcp, H_NEXT_NODUP)) { - F_CLR(hcp, H_ISDUP); - hcp->indx += 2; - } - F_CLR(hcp, H_DELETED); - } else if (hcp->indx == NDX_INVALID) { - hcp->indx = 0; - F_CLR(hcp, H_ISDUP); - } else if (F_ISSET(hcp, H_NEXT_NODUP)) { - hcp->indx += 2; - F_CLR(hcp, H_ISDUP); - } else if (F_ISSET(hcp, H_ISDUP) && hcp->dup_tlen != 0) { - if (hcp->dup_off + DUP_SIZE(hcp->dup_len) >= - hcp->dup_tlen && F_ISSET(hcp, H_DUPONLY)) { - F_CLR(hcp, H_OK); - F_SET(hcp, H_NOMORE); - return (0); - } - hcp->dup_off += DUP_SIZE(hcp->dup_len); - if (hcp->dup_off >= hcp->dup_tlen) { - F_CLR(hcp, H_ISDUP); - hcp->indx += 2; - } - } else if (F_ISSET(hcp, H_DUPONLY)) { - F_CLR(hcp, H_OK); - F_SET(hcp, H_NOMORE); - return (0); - } else { - hcp->indx += 2; - F_CLR(hcp, H_ISDUP); - } - - return (__ham_item(dbc, mode, pgnop)); -} - -/* - * PUBLIC: void __ham_putitem __P((DB *, PAGE *p, const DBT *, int)); - * - * This is a little bit sleazy in that we're overloading the meaning - * of the H_OFFPAGE type here. When we recover deletes, we have the - * entire entry instead of having only the DBT, so we'll pass type - * H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing - * an H_KEYDATA around it. - */ -void -__ham_putitem(dbp, p, dbt, type) - DB *dbp; - PAGE *p; - const DBT *dbt; - int type; -{ - u_int16_t n, off; - db_indx_t *inp; - - n = NUM_ENT(p); - inp = P_INP(dbp, p); - - /* Put the item element on the page. */ - if (type == H_OFFPAGE) { - off = HOFFSET(p) - dbt->size; - HOFFSET(p) = inp[n] = off; - memcpy(P_ENTRY(dbp, p, n), dbt->data, dbt->size); - } else { - off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size); - HOFFSET(p) = inp[n] = off; - PUT_HKEYDATA(P_ENTRY(dbp, p, n), dbt->data, dbt->size, type); - } - - /* Adjust page info. */ - NUM_ENT(p) += 1; -} - -/* - * PUBLIC: void __ham_reputpair __P((DB *, PAGE *, - * PUBLIC: u_int32_t, const DBT *, const DBT *)); - * - * This is a special case to restore a key/data pair to its original - * location during recovery. We are guaranteed that the pair fits - * on the page and is not the last pair on the page (because if it's - * the last pair, the normal insert works). - */ -void -__ham_reputpair(dbp, p, ndx, key, data) - DB *dbp; - PAGE *p; - u_int32_t ndx; - const DBT *key, *data; -{ - db_indx_t i, *inp, movebytes, newbytes; - size_t psize; - u_int8_t *from; - - psize = dbp->pgsize; - inp = P_INP(dbp, p); - /* First shuffle the existing items up on the page. */ - movebytes = (db_indx_t)( - (ndx == 0 ? psize : inp[H_DATAINDEX(ndx - 2)]) - HOFFSET(p)); - newbytes = key->size + data->size; - from = (u_int8_t *)p + HOFFSET(p); - memmove(from - newbytes, from, movebytes); - - /* - * Adjust the indices and move them up 2 spaces. Note that we - * have to check the exit condition inside the loop just in case - * we are dealing with index 0 (db_indx_t's are unsigned). - */ - for (i = NUM_ENT(p) - 1; ; i-- ) { - inp[i + 2] = inp[i] - newbytes; - if (i == H_KEYINDEX(ndx)) - break; - } - - /* Put the key and data on the page. */ - inp[H_KEYINDEX(ndx)] = (db_indx_t)( - (ndx == 0 ? psize : inp[H_DATAINDEX(ndx - 2)]) - key->size); - inp[H_DATAINDEX(ndx)] = inp[H_KEYINDEX(ndx)] - data->size; - memcpy(P_ENTRY(dbp, p, H_KEYINDEX(ndx)), key->data, key->size); - memcpy(P_ENTRY(dbp, p, H_DATAINDEX(ndx)), data->data, data->size); - - /* Adjust page info. */ - HOFFSET(p) -= newbytes; - NUM_ENT(p) += 2; -} - -/* - * PUBLIC: int __ham_del_pair __P((DBC *, int)); - */ -int -__ham_del_pair(dbc, reclaim_page) - DBC *dbc; - int reclaim_page; -{ - DB *dbp; - DBT data_dbt, key_dbt; - DB_LSN new_lsn, *n_lsn, tmp_lsn; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - PAGE *n_pagep, *nn_pagep, *p, *p_pagep; - db_ham_mode op; - db_indx_t ndx; - db_pgno_t chg_pgno, pgno, tmp_pgno; - int ret, t_ret; - u_int32_t order; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - n_pagep = p_pagep = nn_pagep = NULL; - ndx = hcp->indx; - - if (hcp->page == NULL && - (ret = mpf->get(mpf, &hcp->pgno, DB_MPOOL_CREATE, &hcp->page)) != 0) - return (ret); - p = hcp->page; - - /* - * We optimize for the normal case which is when neither the key nor - * the data are large. In this case, we write a single log record - * and do the delete. If either is large, we'll call __big_delete - * to remove the big item and then update the page to remove the - * entry referring to the big item. - */ - ret = 0; - if (HPAGE_PTYPE(H_PAIRKEY(dbp, p, ndx)) == H_OFFPAGE) { - memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_KEYINDEX(ndx))), - sizeof(db_pgno_t)); - ret = __db_doff(dbc, pgno); - } - - if (ret == 0) - switch (HPAGE_PTYPE(H_PAIRDATA(dbp, p, ndx))) { - case H_OFFPAGE: - memcpy(&pgno, - HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_DATAINDEX(ndx))), - sizeof(db_pgno_t)); - ret = __db_doff(dbc, pgno); - break; - case H_OFFDUP: - case H_DUPLICATE: - /* - * If we delete a pair that is/was a duplicate, then - * we had better clear the flag so that we update the - * cursor appropriately. - */ - F_CLR(hcp, H_ISDUP); - break; - } - - if (ret) - return (ret); - - /* Now log the delete off this page. */ - if (DBC_LOGGING(dbc)) { - key_dbt.data = P_ENTRY(dbp, p, H_KEYINDEX(ndx)); - key_dbt.size = LEN_HITEM(dbp, p, dbp->pgsize, H_KEYINDEX(ndx)); - data_dbt.data = P_ENTRY(dbp, p, H_DATAINDEX(ndx)); - data_dbt.size = LEN_HITEM(dbp, p, dbp->pgsize, H_DATAINDEX(ndx)); - - if ((ret = __ham_insdel_log(dbp, - dbc->txn, &new_lsn, 0, DELPAIR, PGNO(p), (u_int32_t)ndx, - &LSN(p), &key_dbt, &data_dbt)) != 0) - return (ret); - } else - LSN_NOT_LOGGED(new_lsn); - - /* Move lsn onto page. */ - LSN(p) = new_lsn; - - /* Do the delete. */ - __ham_dpair(dbp, p, ndx); - - /* - * Mark item deleted so that we don't try to return it, and - * so that we update the cursor correctly on the next call - * to next. - */ - F_SET(hcp, H_DELETED); - F_CLR(hcp, H_OK); - - /* - * Update cursors that are on the page where the delete happend. - */ - if ((ret = __ham_c_update(dbc, 0, 0, 0)) != 0) - return (ret); - - /* - * If we are locking, we will not maintain this, because it is - * a hot spot. - * - * XXX - * Perhaps we can retain incremental numbers and apply them later. - */ - if (!STD_LOCKING(dbc)) { - --hcp->hdr->nelem; - if ((ret = __ham_dirty_meta(dbc)) != 0) - return (ret); - } - - /* - * If we need to reclaim the page, then check if the page is empty. - * There are two cases. If it's empty and it's not the first page - * in the bucket (i.e., the bucket page) then we can simply remove - * it. If it is the first chain in the bucket, then we need to copy - * the second page into it and remove the second page. - * If its the only page in the bucket we leave it alone. - */ - if (!reclaim_page || - NUM_ENT(p) != 0 || - (PREV_PGNO(p) == PGNO_INVALID && NEXT_PGNO(p) == PGNO_INVALID)) - return (mpf->set(mpf, p, DB_MPOOL_DIRTY)); - - if (PREV_PGNO(p) == PGNO_INVALID) { - /* - * First page in chain is empty and we know that there - * are more pages in the chain. - */ - if ((ret = mpf->get(mpf, &NEXT_PGNO(p), 0, &n_pagep)) != 0) - return (ret); - - if (NEXT_PGNO(n_pagep) != PGNO_INVALID && (ret = - mpf->get(mpf, &NEXT_PGNO(n_pagep), 0, &nn_pagep)) != 0) - goto err; - - if (DBC_LOGGING(dbc)) { - key_dbt.data = n_pagep; - key_dbt.size = dbp->pgsize; - if ((ret = __ham_copypage_log(dbp, - dbc->txn, &new_lsn, 0, PGNO(p), - &LSN(p), PGNO(n_pagep), &LSN(n_pagep), - NEXT_PGNO(n_pagep), - nn_pagep == NULL ? NULL : &LSN(nn_pagep), - &key_dbt)) != 0) - goto err; - } else - LSN_NOT_LOGGED(new_lsn); - - /* Move lsn onto page. */ - LSN(p) = new_lsn; /* Structure assignment. */ - LSN(n_pagep) = new_lsn; - if (NEXT_PGNO(n_pagep) != PGNO_INVALID) - LSN(nn_pagep) = new_lsn; - - if (nn_pagep != NULL) { - PREV_PGNO(nn_pagep) = PGNO(p); - if ((ret = - mpf->put(mpf, nn_pagep, DB_MPOOL_DIRTY)) != 0) { - nn_pagep = NULL; - goto err; - } - } - - tmp_pgno = PGNO(p); - tmp_lsn = LSN(p); - memcpy(p, n_pagep, dbp->pgsize); - PGNO(p) = tmp_pgno; - LSN(p) = tmp_lsn; - PREV_PGNO(p) = PGNO_INVALID; - - /* - * Update cursors to reflect the fact that records - * on the second page have moved to the first page. - */ - if ((ret = __ham_c_delpg(dbc, PGNO(n_pagep), - PGNO(p), 0, DB_HAM_DELFIRSTPG, &order)) != 0) - goto err; - - /* - * Update the cursor to reflect its new position. - */ - hcp->indx = 0; - hcp->pgno = PGNO(p); - hcp->order += order; - - if ((ret = mpf->set(mpf, p, DB_MPOOL_DIRTY)) != 0) - goto err; - if ((ret = __db_free(dbc, n_pagep)) != 0) { - n_pagep = NULL; - goto err; - } - } else { - if ((ret = mpf->get(mpf, &PREV_PGNO(p), 0, &p_pagep)) != 0) - goto err; - - if (NEXT_PGNO(p) != PGNO_INVALID) { - if ((ret = - mpf->get(mpf, &NEXT_PGNO(p), 0, &n_pagep)) != 0) - goto err; - n_lsn = &LSN(n_pagep); - } else { - n_pagep = NULL; - n_lsn = NULL; - } - - NEXT_PGNO(p_pagep) = NEXT_PGNO(p); - if (n_pagep != NULL) - PREV_PGNO(n_pagep) = PGNO(p_pagep); - - if (DBC_LOGGING(dbc)) { - if ((ret = __ham_newpage_log(dbp, dbc->txn, - &new_lsn, 0, DELOVFL, PREV_PGNO(p), &LSN(p_pagep), - PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0) - goto err; - } else - LSN_NOT_LOGGED(new_lsn); - - /* Move lsn onto page. */ - LSN(p_pagep) = new_lsn; /* Structure assignment. */ - if (n_pagep) - LSN(n_pagep) = new_lsn; - LSN(p) = new_lsn; - - if (NEXT_PGNO(p) == PGNO_INVALID) { - /* - * There is no next page; put the cursor on the - * previous page as if we'd deleted the last item - * on that page, with index after the last valid - * entry. - * - * The deleted flag was set up above. - */ - hcp->pgno = PGNO(p_pagep); - hcp->indx = NUM_ENT(p_pagep); - op = DB_HAM_DELLASTPG; - } else { - /* - * There is a next page, so put the cursor at - * the beginning of it. - */ - hcp->pgno = NEXT_PGNO(p); - hcp->indx = 0; - op = DB_HAM_DELMIDPG; - } - - /* - * Since we are about to delete the cursor page and we have - * just moved the cursor, we need to make sure that the - * old page pointer isn't left hanging around in the cursor. - */ - hcp->page = NULL; - chg_pgno = PGNO(p); - ret = __db_free(dbc, p); - if ((t_ret = - mpf->put(mpf, p_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0) - ret = t_ret; - if (n_pagep != NULL && (t_ret = - mpf->put(mpf, n_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0) - ret = t_ret; - if (ret != 0) - return (ret); - if ((ret = __ham_c_delpg(dbc, - chg_pgno, hcp->pgno, hcp->indx, op, &order)) != 0) - return (ret); - hcp->order += order; - } - return (ret); - -err: /* Clean up any pages. */ - if (n_pagep != NULL) - (void)mpf->put(mpf, n_pagep, 0); - if (nn_pagep != NULL) - (void)mpf->put(mpf, nn_pagep, 0); - if (p_pagep != NULL) - (void)mpf->put(mpf, p_pagep, 0); - return (ret); -} - -/* - * __ham_replpair -- - * Given the key data indicated by the cursor, replace part/all of it - * according to the fields in the dbt. - * - * PUBLIC: int __ham_replpair __P((DBC *, DBT *, u_int32_t)); - */ -int -__ham_replpair(dbc, dbt, make_dup) - DBC *dbc; - DBT *dbt; - u_int32_t make_dup; -{ - DB *dbp; - DBT old_dbt, tdata, tmp; - DB_ENV *dbenv; - DB_LSN new_lsn; - HASH_CURSOR *hcp; - int32_t change; /* XXX: Possible overflow. */ - u_int32_t dup_flag, len, memsize; - int beyond_eor, is_big, ret, type; - u_int8_t *beg, *dest, *end, *hk, *src; - void *memp; - - /* - * Big item replacements are handled in generic code. - * Items that fit on the current page fall into 4 classes. - * 1. On-page element, same size - * 2. On-page element, new is bigger (fits) - * 3. On-page element, new is bigger (does not fit) - * 4. On-page element, old is bigger - * Numbers 1, 2, and 4 are essentially the same (and should - * be the common case). We handle case 3 as a delete and - * add. - */ - dbp = dbc->dbp; - dbenv = dbp->dbenv; - hcp = (HASH_CURSOR *)dbc->internal; - - /* - * We need to compute the number of bytes that we are adding or - * removing from the entry. Normally, we can simply substract - * the number of bytes we are replacing (dbt->dlen) from the - * number of bytes we are inserting (dbt->size). However, if - * we are doing a partial put off the end of a record, then this - * formula doesn't work, because we are essentially adding - * new bytes. - */ - change = dbt->size - dbt->dlen; - - hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - is_big = HPAGE_PTYPE(hk) == H_OFFPAGE; - - if (is_big) - memcpy(&len, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); - else - len = LEN_HKEYDATA(dbp, hcp->page, - dbp->pgsize, H_DATAINDEX(hcp->indx)); - - beyond_eor = dbt->doff + dbt->dlen > len; - if (beyond_eor) - change += dbt->doff + dbt->dlen - len; - - if (change > (int32_t)P_FREESPACE(dbp, hcp->page) || - beyond_eor || is_big) { - /* - * Case 3 -- two subcases. - * A. This is not really a partial operation, but an overwrite. - * Simple del and add works. - * B. This is a partial and we need to construct the data that - * we are really inserting (yuck). - * In both cases, we need to grab the key off the page (in - * some cases we could do this outside of this routine; for - * cleanliness we do it here. If you happen to be on a big - * key, this could be a performance hit). - */ - memset(&tmp, 0, sizeof(tmp)); - if ((ret = - __db_ret(dbp, hcp->page, H_KEYINDEX(hcp->indx), - &tmp, &dbc->rkey->data, &dbc->rkey->ulen)) != 0) - return (ret); - - /* Preserve duplicate info. */ - dup_flag = F_ISSET(hcp, H_ISDUP); - if (dbt->doff == 0 && dbt->dlen == len) { - ret = __ham_del_pair(dbc, 0); - if (ret == 0) - ret = __ham_add_el(dbc, - &tmp, dbt, dup_flag ? H_DUPLICATE : H_KEYDATA); - } else { /* Case B */ - type = HPAGE_PTYPE(hk) != H_OFFPAGE ? - HPAGE_PTYPE(hk) : H_KEYDATA; - memset(&tdata, 0, sizeof(tdata)); - memp = NULL; - memsize = 0; - if ((ret = __db_ret(dbp, hcp->page, - H_DATAINDEX(hcp->indx), &tdata, &memp, &memsize)) - != 0) - goto err; - - /* Now we can delete the item. */ - if ((ret = __ham_del_pair(dbc, 0)) != 0) { - __os_free(dbenv, memp); - goto err; - } - - /* Now shift old data around to make room for new. */ - if (change > 0) { - if ((ret = __os_realloc(dbenv, - tdata.size + change, &tdata.data)) != 0) - return (ret); - memp = tdata.data; - memsize = tdata.size + change; - memset((u_int8_t *)tdata.data + tdata.size, - 0, change); - } - end = (u_int8_t *)tdata.data + tdata.size; - - src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen; - if (src < end && tdata.size > dbt->doff + dbt->dlen) { - len = tdata.size - dbt->doff - dbt->dlen; - dest = src + change; - memmove(dest, src, len); - } - memcpy((u_int8_t *)tdata.data + dbt->doff, - dbt->data, dbt->size); - tdata.size += change; - - /* Now add the pair. */ - ret = __ham_add_el(dbc, &tmp, &tdata, type); - __os_free(dbenv, memp); - } - F_SET(hcp, dup_flag); -err: return (ret); - } - - /* - * Set up pointer into existing data. Do it before the log - * message so we can use it inside of the log setup. - */ - beg = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); - beg += dbt->doff; - - /* - * If we are going to have to move bytes at all, figure out - * all the parameters here. Then log the call before moving - * anything around. - */ - if (DBC_LOGGING(dbc)) { - old_dbt.data = beg; - old_dbt.size = dbt->dlen; - if ((ret = __ham_replace_log(dbp, - dbc->txn, &new_lsn, 0, PGNO(hcp->page), - (u_int32_t)H_DATAINDEX(hcp->indx), &LSN(hcp->page), - (u_int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0) - return (ret); - - } else - LSN_NOT_LOGGED(new_lsn); - - LSN(hcp->page) = new_lsn; /* Structure assignment. */ - - __ham_onpage_replace(dbp, hcp->page, (u_int32_t)H_DATAINDEX(hcp->indx), - (int32_t)dbt->doff, change, dbt); - - return (0); -} - -/* - * Replace data on a page with new data, possibly growing or shrinking what's - * there. This is called on two different occasions. On one (from replpair) - * we are interested in changing only the data. On the other (from recovery) - * we are replacing the entire data (header and all) with a new element. In - * the latter case, the off argument is negative. - * pagep: the page that we're changing - * ndx: page index of the element that is growing/shrinking. - * off: Offset at which we are beginning the replacement. - * change: the number of bytes (+ or -) that the element is growing/shrinking. - * dbt: the new data that gets written at beg. - * - * PUBLIC: void __ham_onpage_replace __P((DB *, PAGE *, u_int32_t, - * PUBLIC: int32_t, int32_t, DBT *)); - */ -void -__ham_onpage_replace(dbp, pagep, ndx, off, change, dbt) - DB *dbp; - PAGE *pagep; - u_int32_t ndx; - int32_t off; - int32_t change; - DBT *dbt; -{ - db_indx_t i, *inp; - int32_t len; - size_t pgsize; - u_int8_t *src, *dest; - int zero_me; - - pgsize = dbp->pgsize; - inp = P_INP(dbp, pagep); - if (change != 0) { - zero_me = 0; - src = (u_int8_t *)(pagep) + HOFFSET(pagep); - if (off < 0) - len = inp[ndx] - HOFFSET(pagep); - else if ((u_int32_t)off >= - LEN_HKEYDATA(dbp, pagep, pgsize, ndx)) { - len = (int32_t)(HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) - + LEN_HKEYDATA(dbp, pagep, pgsize, ndx) - src); - zero_me = 1; - } else - len = (int32_t)( - (HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + off) - - src); - dest = src - change; - memmove(dest, src, len); - if (zero_me) - memset(dest + len, 0, change); - - /* Now update the indices. */ - for (i = ndx; i < NUM_ENT(pagep); i++) - inp[i] -= change; - HOFFSET(pagep) -= change; - } - if (off >= 0) - memcpy(HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + off, - dbt->data, dbt->size); - else - memcpy(P_ENTRY(dbp, pagep, ndx), dbt->data, dbt->size); -} - -/* - * PUBLIC: int __ham_split_page __P((DBC *, u_int32_t, u_int32_t)); - */ -int -__ham_split_page(dbc, obucket, nbucket) - DBC *dbc; - u_int32_t obucket, nbucket; -{ - DB *dbp; - DBC **carray; - DBT key, page_dbt; - DB_ENV *dbenv; - DB_LOCK block; - DB_LSN new_lsn; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp, *cp; - PAGE **pp, *old_pagep, *temp_pagep, *new_pagep; - db_indx_t n; - db_pgno_t bucket_pgno, npgno, next_pgno; - u_int32_t big_len, len; - int found, i, ret, t_ret; - void *big_buf; - - dbp = dbc->dbp; - dbenv = dbp->dbenv; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - temp_pagep = old_pagep = new_pagep = NULL; - carray = NULL; - LOCK_INIT(block); - - bucket_pgno = BUCKET_TO_PAGE(hcp, obucket); - if ((ret = __db_lget(dbc, - 0, bucket_pgno, DB_LOCK_WRITE, 0, &block)) != 0) - goto err; - if ((ret = mpf->get(mpf, - &bucket_pgno, DB_MPOOL_CREATE, &old_pagep)) != 0) - goto err; - - /* Properly initialize the new bucket page. */ - npgno = BUCKET_TO_PAGE(hcp, nbucket); - if ((ret = mpf->get(mpf, &npgno, DB_MPOOL_CREATE, &new_pagep)) != 0) - goto err; - P_INIT(new_pagep, - dbp->pgsize, npgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - - temp_pagep = hcp->split_buf; - memcpy(temp_pagep, old_pagep, dbp->pgsize); - - if (DBC_LOGGING(dbc)) { - page_dbt.size = dbp->pgsize; - page_dbt.data = old_pagep; - if ((ret = __ham_splitdata_log(dbp, - dbc->txn, &new_lsn, 0, SPLITOLD, - PGNO(old_pagep), &page_dbt, &LSN(old_pagep))) != 0) - goto err; - } else - LSN_NOT_LOGGED(new_lsn); - - LSN(old_pagep) = new_lsn; /* Structure assignment. */ - - P_INIT(old_pagep, dbp->pgsize, PGNO(old_pagep), PGNO_INVALID, - PGNO_INVALID, 0, P_HASH); - - big_len = 0; - big_buf = NULL; - key.flags = 0; - while (temp_pagep != NULL) { - if ((ret = __ham_get_clist(dbp, - PGNO(temp_pagep), NDX_INVALID, &carray)) != 0) - goto err; - - for (n = 0; n < (db_indx_t)NUM_ENT(temp_pagep); n += 2) { - if ((ret = __db_ret(dbp, temp_pagep, - H_KEYINDEX(n), &key, &big_buf, &big_len)) != 0) - goto err; - - if (__ham_call_hash(dbc, key.data, key.size) == obucket) - pp = &old_pagep; - else - pp = &new_pagep; - - /* - * Figure out how many bytes we need on the new - * page to store the key/data pair. - */ - len = LEN_HITEM(dbp, temp_pagep, dbp->pgsize, - H_DATAINDEX(n)) + - LEN_HITEM(dbp, temp_pagep, dbp->pgsize, - H_KEYINDEX(n)) + - 2 * sizeof(db_indx_t); - - if (P_FREESPACE(dbp, *pp) < len) { - if (DBC_LOGGING(dbc)) { - page_dbt.size = dbp->pgsize; - page_dbt.data = *pp; - if ((ret = __ham_splitdata_log(dbp, - dbc->txn, &new_lsn, 0, - SPLITNEW, PGNO(*pp), &page_dbt, - &LSN(*pp))) != 0) - goto err; - } else - LSN_NOT_LOGGED(new_lsn); - LSN(*pp) = new_lsn; - if ((ret = - __ham_add_ovflpage(dbc, *pp, 1, pp)) != 0) - goto err; - } - - /* Check if we need to update a cursor. */ - if (carray != NULL) { - found = 0; - for (i = 0; carray[i] != NULL; i++) { - cp = - (HASH_CURSOR *)carray[i]->internal; - if (cp->pgno == PGNO(temp_pagep) && - cp->indx == n) { - cp->pgno = PGNO(*pp); - cp->indx = NUM_ENT(*pp); - found = 1; - } - } - if (found && DBC_LOGGING(dbc) && - IS_SUBTRANSACTION(dbc->txn)) { - if ((ret = - __ham_chgpg_log(dbp, - dbc->txn, &new_lsn, 0, - DB_HAM_SPLIT, PGNO(temp_pagep), - PGNO(*pp), n, NUM_ENT(*pp))) != 0) - goto err; - } - } - __ham_copy_item(dbp, temp_pagep, H_KEYINDEX(n), *pp); - __ham_copy_item(dbp, temp_pagep, H_DATAINDEX(n), *pp); - } - next_pgno = NEXT_PGNO(temp_pagep); - - /* Clear temp_page; if it's a link overflow page, free it. */ - if (PGNO(temp_pagep) != bucket_pgno && (ret = - __db_free(dbc, temp_pagep)) != 0) { - temp_pagep = NULL; - goto err; - } - - if (next_pgno == PGNO_INVALID) - temp_pagep = NULL; - else if ((ret = mpf->get( - mpf, &next_pgno, DB_MPOOL_CREATE, &temp_pagep)) != 0) - goto err; - - if (temp_pagep != NULL) { - if (DBC_LOGGING(dbc)) { - page_dbt.size = dbp->pgsize; - page_dbt.data = temp_pagep; - if ((ret = __ham_splitdata_log(dbp, - dbc->txn, &new_lsn, 0, - SPLITOLD, PGNO(temp_pagep), - &page_dbt, &LSN(temp_pagep))) != 0) - goto err; - } else - LSN_NOT_LOGGED(new_lsn); - LSN(temp_pagep) = new_lsn; - } - - if (carray != NULL) /* We never knew its size. */ - __os_free(dbenv, carray); - carray = NULL; - } - if (big_buf != NULL) - __os_free(dbenv, big_buf); - - /* - * If the original bucket spanned multiple pages, then we've got - * a pointer to a page that used to be on the bucket chain. It - * should be deleted. - */ - if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno && - (ret = __db_free(dbc, temp_pagep)) != 0) { - temp_pagep = NULL; - goto err; - } - - /* - * Write new buckets out. - */ - if (DBC_LOGGING(dbc)) { - page_dbt.size = dbp->pgsize; - page_dbt.data = old_pagep; - if ((ret = __ham_splitdata_log(dbp, dbc->txn, - &new_lsn, 0, SPLITNEW, PGNO(old_pagep), &page_dbt, - &LSN(old_pagep))) != 0) - goto err; - LSN(old_pagep) = new_lsn; - - page_dbt.data = new_pagep; - if ((ret = __ham_splitdata_log(dbp, dbc->txn, &new_lsn, 0, - SPLITNEW, PGNO(new_pagep), &page_dbt, - &LSN(new_pagep))) != 0) - goto err; - LSN(new_pagep) = new_lsn; - } else { - LSN_NOT_LOGGED(LSN(old_pagep)); - LSN_NOT_LOGGED(LSN(new_pagep)); - } - - ret = mpf->put(mpf, old_pagep, DB_MPOOL_DIRTY); - if ((t_ret = - mpf->put(mpf, new_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0) - ret = t_ret; - - if (0) { -err: if (old_pagep != NULL) - (void)mpf->put(mpf, old_pagep, DB_MPOOL_DIRTY); - if (new_pagep != NULL) - (void)mpf->put(mpf, new_pagep, DB_MPOOL_DIRTY); - if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno) - (void)mpf->put(mpf, temp_pagep, DB_MPOOL_DIRTY); - } - if (LOCK_ISSET(block)) - __TLPUT(dbc, block); - if (carray != NULL) /* We never knew its size. */ - __os_free(dbenv, carray); - return (ret); -} - -/* - * Add the given pair to the page. The page in question may already be - * held (i.e. it was already gotten). If it is, then the page is passed - * in via the pagep parameter. On return, pagep will contain the page - * to which we just added something. This allows us to link overflow - * pages and return the new page having correctly put the last page. - * - * PUBLIC: int __ham_add_el __P((DBC *, const DBT *, const DBT *, int)); - */ -int -__ham_add_el(dbc, key, val, type) - DBC *dbc; - const DBT *key, *val; - int type; -{ - const DBT *pkey, *pdata; - DB *dbp; - DBT key_dbt, data_dbt; - DB_LSN new_lsn; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - HOFFPAGE doff, koff; - db_pgno_t next_pgno, pgno; - u_int32_t data_size, key_size, pairsize, rectype; - int do_expand, is_keybig, is_databig, ret; - int key_type, data_type; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - do_expand = 0; - - pgno = hcp->seek_found_page != PGNO_INVALID ? - hcp->seek_found_page : hcp->pgno; - if (hcp->page == NULL && - (ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &hcp->page)) != 0) - return (ret); - - key_size = HKEYDATA_PSIZE(key->size); - data_size = HKEYDATA_PSIZE(val->size); - is_keybig = ISBIG(hcp, key->size); - is_databig = ISBIG(hcp, val->size); - if (is_keybig) - key_size = HOFFPAGE_PSIZE; - if (is_databig) - data_size = HOFFPAGE_PSIZE; - - pairsize = key_size + data_size; - - /* Advance to first page in chain with room for item. */ - while (H_NUMPAIRS(hcp->page) && NEXT_PGNO(hcp->page) != PGNO_INVALID) { - /* - * This may not be the end of the chain, but the pair may fit - * anyway. Check if it's a bigpair that fits or a regular - * pair that fits. - */ - if (P_FREESPACE(dbp, hcp->page) >= pairsize) - break; - next_pgno = NEXT_PGNO(hcp->page); - if ((ret = __ham_next_cpage(dbc, next_pgno, 0)) != 0) - return (ret); - } - - /* - * Check if we need to allocate a new page. - */ - if (P_FREESPACE(dbp, hcp->page) < pairsize) { - do_expand = 1; - if ((ret = __ham_add_ovflpage(dbc, - (PAGE *)hcp->page, 1, (PAGE **)&hcp->page)) != 0) - return (ret); - hcp->pgno = PGNO(hcp->page); - } - - /* - * Update cursor. - */ - hcp->indx = NUM_ENT(hcp->page); - F_CLR(hcp, H_DELETED); - if (is_keybig) { - koff.type = H_OFFPAGE; - UMRW_SET(koff.unused[0]); - UMRW_SET(koff.unused[1]); - UMRW_SET(koff.unused[2]); - if ((ret = __db_poff(dbc, key, &koff.pgno)) != 0) - return (ret); - koff.tlen = key->size; - key_dbt.data = &koff; - key_dbt.size = sizeof(koff); - pkey = &key_dbt; - key_type = H_OFFPAGE; - } else { - pkey = key; - key_type = H_KEYDATA; - } - - if (is_databig) { - doff.type = H_OFFPAGE; - UMRW_SET(doff.unused[0]); - UMRW_SET(doff.unused[1]); - UMRW_SET(doff.unused[2]); - if ((ret = __db_poff(dbc, val, &doff.pgno)) != 0) - return (ret); - doff.tlen = val->size; - data_dbt.data = &doff; - data_dbt.size = sizeof(doff); - pdata = &data_dbt; - data_type = H_OFFPAGE; - } else { - pdata = val; - data_type = type; - } - - if (DBC_LOGGING(dbc)) { - rectype = PUTPAIR; - if (is_databig) - rectype |= PAIR_DATAMASK; - if (is_keybig) - rectype |= PAIR_KEYMASK; - if (type == H_DUPLICATE) - rectype |= PAIR_DUPMASK; - - if ((ret = __ham_insdel_log(dbp, dbc->txn, &new_lsn, 0, - rectype, PGNO(hcp->page), (u_int32_t)NUM_ENT(hcp->page), - &LSN(hcp->page), pkey, pdata)) != 0) - return (ret); - } else - LSN_NOT_LOGGED(new_lsn); - - /* Move lsn onto page. */ - LSN(hcp->page) = new_lsn; /* Structure assignment. */ - - __ham_putitem(dbp, hcp->page, pkey, key_type); - __ham_putitem(dbp, hcp->page, pdata, data_type); - - /* - * For splits, we are going to update item_info's page number - * field, so that we can easily return to the same page the - * next time we come in here. For other operations, this shouldn't - * matter, since odds are this is the last thing that happens before - * we return to the user program. - */ - hcp->pgno = PGNO(hcp->page); - - /* - * XXX - * Maybe keep incremental numbers here. - */ - if (!STD_LOCKING(dbc)) { - hcp->hdr->nelem++; - if ((ret = __ham_dirty_meta(dbc)) != 0) - return (ret); - } - - if (do_expand || (hcp->hdr->ffactor != 0 && - (u_int32_t)H_NUMPAIRS(hcp->page) > hcp->hdr->ffactor)) - F_SET(hcp, H_EXPAND); - return (0); -} - -/* - * Special __putitem call used in splitting -- copies one entry to - * another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA, - * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we - * do not need to do any logging here. - * - * PUBLIC: void __ham_copy_item __P((DB *, PAGE *, u_int32_t, PAGE *)); - */ -void -__ham_copy_item(dbp, src_page, src_ndx, dest_page) - DB *dbp; - PAGE *src_page; - u_int32_t src_ndx; - PAGE *dest_page; -{ - u_int32_t len; - size_t pgsize; - void *src, *dest; - db_indx_t *inp; - - pgsize = dbp->pgsize; - inp = P_INP(dbp, dest_page); - /* - * Copy the key and data entries onto this new page. - */ - src = P_ENTRY(dbp, src_page, src_ndx); - - /* Set up space on dest. */ - len = (u_int32_t)LEN_HITEM(dbp, src_page, pgsize, src_ndx); - HOFFSET(dest_page) -= len; - inp[NUM_ENT(dest_page)] = HOFFSET(dest_page); - dest = P_ENTRY(dbp, dest_page, NUM_ENT(dest_page)); - NUM_ENT(dest_page)++; - - memcpy(dest, src, len); -} - -/* - * - * Returns: - * pointer on success - * NULL on error - * - * PUBLIC: int __ham_add_ovflpage __P((DBC *, PAGE *, int, PAGE **)); - */ -int -__ham_add_ovflpage(dbc, pagep, release, pp) - DBC *dbc; - PAGE *pagep; - int release; - PAGE **pp; -{ - DB *dbp; - DB_LSN new_lsn; - DB_MPOOLFILE *mpf; - PAGE *new_pagep; - int ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - - if ((ret = __db_new(dbc, P_HASH, &new_pagep)) != 0) - return (ret); - - if (DBC_LOGGING(dbc)) { - if ((ret = __ham_newpage_log(dbp, dbc->txn, &new_lsn, 0, - PUTOVFL, PGNO(pagep), &LSN(pagep), - PGNO(new_pagep), &LSN(new_pagep), PGNO_INVALID, NULL)) != 0) - return (ret); - } else - LSN_NOT_LOGGED(new_lsn); - - /* Move lsn onto page. */ - LSN(pagep) = LSN(new_pagep) = new_lsn; - NEXT_PGNO(pagep) = PGNO(new_pagep); - - PREV_PGNO(new_pagep) = PGNO(pagep); - - if (release) - ret = mpf->put(mpf, pagep, DB_MPOOL_DIRTY); - - *pp = new_pagep; - return (ret); -} - -/* - * PUBLIC: int __ham_get_cpage __P((DBC *, db_lockmode_t)); - */ -int -__ham_get_cpage(dbc, mode) - DBC *dbc; - db_lockmode_t mode; -{ - DB *dbp; - DB_LOCK tmp_lock; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - int ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - ret = 0; - - /* - * There are four cases with respect to buckets and locks. - * 1. If there is no lock held, then if we are locking, we should - * get the lock. - * 2. If there is a lock held, it's for the current bucket, and it's - * for the right mode, we don't need to do anything. - * 3. If there is a lock held for the current bucket but it's not - * strong enough, we need to upgrade. - * 4. If there is a lock, but it's for a different bucket, then we need - * to release the existing lock and get a new lock. - */ - LOCK_INIT(tmp_lock); - if (STD_LOCKING(dbc)) { - if (hcp->lbucket != hcp->bucket && /* Case 4 */ - (ret = __TLPUT(dbc, hcp->lock)) != 0) - return (ret); - - if ((LOCK_ISSET(hcp->lock) && - (hcp->lock_mode == DB_LOCK_READ && - mode == DB_LOCK_WRITE))) { - /* Case 3. */ - tmp_lock = hcp->lock; - LOCK_INIT(hcp->lock); - } - - /* Acquire the lock. */ - if (!LOCK_ISSET(hcp->lock)) - /* Cases 1, 3, and 4. */ - if ((ret = __ham_lock_bucket(dbc, mode)) != 0) - return (ret); - - if (ret == 0) { - hcp->lock_mode = mode; - hcp->lbucket = hcp->bucket; - if (LOCK_ISSET(tmp_lock)) - /* Case 3: release the original lock. */ - ret = - dbp->dbenv->lock_put(dbp->dbenv, &tmp_lock); - } else if (LOCK_ISSET(tmp_lock)) - hcp->lock = tmp_lock; - } - - if (ret == 0 && hcp->page == NULL) { - if (hcp->pgno == PGNO_INVALID) - hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - if ((ret = mpf->get(mpf, - &hcp->pgno, DB_MPOOL_CREATE, &hcp->page)) != 0) - return (ret); - } - - return (0); -} - -/* - * Get a new page at the cursor, putting the last page if necessary. - * If the flag is set to H_ISDUP, then we are talking about the - * duplicate page, not the main page. - * - * PUBLIC: int __ham_next_cpage __P((DBC *, db_pgno_t, int)); - */ -int -__ham_next_cpage(dbc, pgno, dirty) - DBC *dbc; - db_pgno_t pgno; - int dirty; -{ - DB *dbp; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - PAGE *p; - int ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - if (hcp->page != NULL && - (ret = mpf->put(mpf, hcp->page, dirty ? DB_MPOOL_DIRTY : 0)) != 0) - return (ret); - hcp->page = NULL; - - if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &p)) != 0) - return (ret); - - hcp->page = p; - hcp->pgno = pgno; - hcp->indx = 0; - - return (0); -} - -/* - * __ham_lock_bucket -- - * Get the lock on a particular bucket. - * - * PUBLIC: int __ham_lock_bucket __P((DBC *, db_lockmode_t)); - */ -int -__ham_lock_bucket(dbc, mode) - DBC *dbc; - db_lockmode_t mode; -{ - HASH_CURSOR *hcp; - db_pgno_t pgno; - int gotmeta, ret; - - hcp = (HASH_CURSOR *)dbc->internal; - gotmeta = hcp->hdr == NULL ? 1 : 0; - if (gotmeta) - if ((ret = __ham_get_meta(dbc)) != 0) - return (ret); - pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - if (gotmeta) - if ((ret = __ham_release_meta(dbc)) != 0) - return (ret); - - ret = __db_lget(dbc, 0, pgno, mode, 0, &hcp->lock); - - hcp->lock_mode = mode; - return (ret); -} - -/* - * __ham_dpair -- - * Delete a pair on a page, paying no attention to what the pair - * represents. The caller is responsible for freeing up duplicates - * or offpage entries that might be referenced by this pair. - * - * Recovery assumes that this may be called without the metadata - * page pinned. - * - * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t)); - */ -void -__ham_dpair(dbp, p, indx) - DB *dbp; - PAGE *p; - u_int32_t indx; -{ - db_indx_t delta, n, *inp; - u_int8_t *dest, *src; - - inp = P_INP(dbp, p); - /* - * Compute "delta", the amount we have to shift all of the - * offsets. To find the delta, we just need to calculate - * the size of the pair of elements we are removing. - */ - delta = H_PAIRSIZE(dbp, p, dbp->pgsize, indx); - - /* - * The hard case: we want to remove something other than - * the last item on the page. We need to shift data and - * offsets down. - */ - if ((db_indx_t)indx != NUM_ENT(p) - 2) { - /* - * Move the data: src is the first occupied byte on - * the page. (Length is delta.) - */ - src = (u_int8_t *)p + HOFFSET(p); - - /* - * Destination is delta bytes beyond src. This might - * be an overlapping copy, so we have to use memmove. - */ - dest = src + delta; - memmove(dest, src, inp[H_DATAINDEX(indx)] - HOFFSET(p)); - } - - /* Adjust page metadata. */ - HOFFSET(p) = HOFFSET(p) + delta; - NUM_ENT(p) = NUM_ENT(p) - 2; - - /* Adjust the offsets. */ - for (n = (db_indx_t)indx; n < (db_indx_t)(NUM_ENT(p)); n++) - inp[n] = inp[n + 2] + delta; - -} - -/* - * __ham_c_delpg -- - * - * Adjust the cursors after we've emptied a page in a bucket, taking - * care that when we move cursors pointing to deleted items, their - * orders don't collide with the orders of cursors on the page we move - * them to (since after this function is called, cursors with the same - * index on the two pages will be otherwise indistinguishable--they'll - * all have pgno new_pgno). There are three cases: - * - * 1) The emptied page is the first page in the bucket. In this - * case, we've copied all the items from the second page into the - * first page, so the first page is new_pgno and the second page is - * old_pgno. new_pgno is empty, but can have deleted cursors - * pointing at indx 0, so we need to be careful of the orders - * there. This is DB_HAM_DELFIRSTPG. - * - * 2) The page is somewhere in the middle of a bucket. Our caller - * can just delete such a page, so it's old_pgno. old_pgno is - * empty, but may have deleted cursors pointing at indx 0, so we - * need to be careful of indx 0 when we move those cursors to - * new_pgno. This is DB_HAM_DELMIDPG. - * - * 3) The page is the last in a bucket. Again the empty page is - * old_pgno, and again it should only have cursors that are deleted - * and at indx == 0. This time, though, there's no next page to - * move them to, so we set them to indx == num_ent on the previous - * page--and indx == num_ent is the index whose cursors we need to - * be careful of. This is DB_HAM_DELLASTPG. - */ -static int -__ham_c_delpg(dbc, old_pgno, new_pgno, num_ent, op, orderp) - DBC *dbc; - db_pgno_t old_pgno, new_pgno; - u_int32_t num_ent; - db_ham_mode op; - u_int32_t *orderp; -{ - DB *dbp, *ldbp; - DB_ENV *dbenv; - DB_LSN lsn; - DB_TXN *my_txn; - DBC *cp; - HASH_CURSOR *hcp; - int found, ret; - db_indx_t indx; - u_int32_t order; - - /* Which is the worrisome index? */ - indx = (op == DB_HAM_DELLASTPG) ? num_ent : 0; - - dbp = dbc->dbp; - dbenv = dbp->dbenv; - - my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL; - found = 0; - - MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp); - /* - * Find the highest order of any cursor our movement - * may collide with. - */ - order = 1; - for (ldbp = __dblist_get(dbenv, dbp->adj_fileid); - ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; - ldbp = LIST_NEXT(ldbp, dblistlinks)) { - MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); - for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL; - cp = TAILQ_NEXT(cp, links)) { - if (cp == dbc || cp->dbtype != DB_HASH) - continue; - hcp = (HASH_CURSOR *)cp->internal; - if (hcp->pgno == new_pgno) { - if (hcp->indx == indx && - F_ISSET(hcp, H_DELETED) && - hcp->order >= order) - order = hcp->order + 1; - DB_ASSERT(op != DB_HAM_DELFIRSTPG || - hcp->indx == NDX_INVALID || - (hcp->indx == 0 && - F_ISSET(hcp, H_DELETED))); - } - } - MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); - } - - for (ldbp = __dblist_get(dbenv, dbp->adj_fileid); - ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; - ldbp = LIST_NEXT(ldbp, dblistlinks)) { - MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); - for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL; - cp = TAILQ_NEXT(cp, links)) { - if (cp == dbc || cp->dbtype != DB_HASH) - continue; - - hcp = (HASH_CURSOR *)cp->internal; - - if (hcp->pgno == old_pgno) { - switch (op) { - case DB_HAM_DELFIRSTPG: - /* - * We're moving all items, - * regardless of index. - */ - hcp->pgno = new_pgno; - - /* - * But we have to be careful of - * the order values. - */ - if (hcp->indx == indx) - hcp->order += order; - break; - case DB_HAM_DELMIDPG: - hcp->pgno = new_pgno; - DB_ASSERT(hcp->indx == 0 && - F_ISSET(hcp, H_DELETED)); - hcp->order += order; - break; - case DB_HAM_DELLASTPG: - hcp->pgno = new_pgno; - DB_ASSERT(hcp->indx == 0 && - F_ISSET(hcp, H_DELETED)); - hcp->indx = indx; - hcp->order += order; - break; - default: - DB_ASSERT(0); - return (__db_panic(dbenv, EINVAL)); - } - if (my_txn != NULL && cp->txn != my_txn) - found = 1; - } - } - MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); - } - MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); - - if (found != 0 && DBC_LOGGING(dbc)) { - if ((ret = __ham_chgpg_log(dbp, my_txn, &lsn, 0, op, - old_pgno, new_pgno, indx, order)) != 0) - return (ret); - } - *orderp = order; - return (0); -} diff --git a/bdb/hash/hash_rec.c b/bdb/hash/hash_rec.c deleted file mode 100644 index 24d3473c508..00000000000 --- a/bdb/hash/hash_rec.c +++ /dev/null @@ -1,1156 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2002 - * Sleepycat Software. All rights reserved. - */ -/* - * Copyright (c) 1995, 1996 - * Margo Seltzer. All rights reserved. - */ -/* - * Copyright (c) 1995, 1996 - * The President and Fellows of Harvard University. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "db_config.h" - -#ifndef lint -static const char revid[] = "$Id: hash_rec.c,v 11.69 2002/09/03 14:12:49 margo Exp $"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <string.h> -#endif - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/log.h" - -static int __ham_alloc_pages __P((DB *, __ham_groupalloc_args *, DB_LSN *)); - -/* - * __ham_insdel_recover -- - * - * PUBLIC: int __ham_insdel_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_insdel_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_insdel_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - u_int32_t flags, opcode; - int cmp_n, cmp_p, ret, type; - - pagep = NULL; - COMPQUIET(info, NULL); - - REC_PRINT(__ham_insdel_print); - REC_INTRO(__ham_insdel_read, 1); - - if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) { - if (DB_UNDO(op)) { - /* - * We are undoing and the page doesn't exist. That - * is equivalent to having a pagelsn of 0, so we - * would not have to undo anything. In this case, - * don't bother creating a page. - */ - goto done; - } else if ((ret = mpf->get(mpf, - &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; - } - - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); - CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn); - /* - * Two possible things going on: - * redo a delete/undo a put: delete the item from the page. - * redo a put/undo a delete: add the item to the page. - * If we are undoing a delete, then the information logged is the - * entire entry off the page, not just the data of a dbt. In - * this case, we want to copy it back onto the page verbatim. - * We do this by calling __putitem with the type H_OFFPAGE instead - * of H_KEYDATA. - */ - opcode = OPCODE_OF(argp->opcode); - - flags = 0; - if ((opcode == DELPAIR && cmp_n == 0 && DB_UNDO(op)) || - (opcode == PUTPAIR && cmp_p == 0 && DB_REDO(op))) { - /* - * Need to redo a PUT or undo a delete. If we are undoing a - * delete, we've got to restore the item back to its original - * position. That's a royal pain in the butt (because we do - * not store item lengths on the page), but there's no choice. - */ - if (opcode != DELPAIR || - argp->ndx == (u_int32_t)NUM_ENT(pagep)) { - __ham_putitem(file_dbp, pagep, &argp->key, - DB_UNDO(op) || PAIR_ISKEYBIG(argp->opcode) ? - H_OFFPAGE : H_KEYDATA); - - if (PAIR_ISDATADUP(argp->opcode)) - type = H_DUPLICATE; - else if (DB_UNDO(op) || PAIR_ISDATABIG(argp->opcode)) - type = H_OFFPAGE; - else - type = H_KEYDATA; - __ham_putitem(file_dbp, pagep, &argp->data, type); - } else - (void)__ham_reputpair(file_dbp, pagep, - argp->ndx, &argp->key, &argp->data); - - LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; - flags = DB_MPOOL_DIRTY; - - } else if ((opcode == DELPAIR && cmp_p == 0 && DB_REDO(op)) || - (opcode == PUTPAIR && cmp_n == 0 && DB_UNDO(op))) { - /* Need to undo a put or redo a delete. */ - __ham_dpair(file_dbp, pagep, argp->ndx); - LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; - flags = DB_MPOOL_DIRTY; - } - - if ((ret = mpf->put(mpf, pagep, flags)) != 0) - goto out; - pagep = NULL; - - /* Return the previous LSN. */ -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)mpf->put(mpf, pagep, 0); - REC_CLOSE; -} - -/* - * __ham_newpage_recover -- - * This log message is used when we add/remove overflow pages. This - * message takes care of the pointer chains, not the data on the pages. - * - * PUBLIC: int __ham_newpage_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_newpage_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_newpage_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - u_int32_t flags; - int cmp_n, cmp_p, ret; - - pagep = NULL; - COMPQUIET(info, NULL); - - REC_PRINT(__ham_newpage_print); - REC_INTRO(__ham_newpage_read, 1); - - if ((ret = mpf->get(mpf, &argp->new_pgno, 0, &pagep)) != 0) { - if (DB_UNDO(op)) { - /* - * We are undoing and the page doesn't exist. That - * is equivalent to having a pagelsn of 0, so we - * would not have to undo anything. In this case, - * don't bother creating a page. - */ - ret = 0; - goto ppage; - } else if ((ret = mpf->get(mpf, - &argp->new_pgno, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; - } - - /* - * There are potentially three pages we need to check: the one - * that we created/deleted, the one before it and the one after - * it. - */ - - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); - CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn); - - flags = 0; - if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) || - (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) { - /* Redo a create new page or undo a delete new page. */ - P_INIT(pagep, file_dbp->pgsize, argp->new_pgno, - argp->prev_pgno, argp->next_pgno, 0, P_HASH); - flags = DB_MPOOL_DIRTY; - } else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) || - (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) { - /* - * Redo a delete or undo a create new page. All we - * really need to do is change the LSN. - */ - flags = DB_MPOOL_DIRTY; - } - - if (flags) - LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; - - if ((ret = mpf->put(mpf, pagep, flags)) != 0) - goto out; - pagep = NULL; - - /* Now do the prev page. */ -ppage: if (argp->prev_pgno != PGNO_INVALID) { - if ((ret = mpf->get(mpf, &argp->prev_pgno, 0, &pagep)) != 0) { - if (DB_UNDO(op)) { - /* - * We are undoing and the page doesn't exist. - * That is equivalent to having a pagelsn of 0, - * so we would not have to undo anything. In - * this case, don't bother creating a page. - */ - ret = 0; - goto npage; - } else if ((ret = mpf->get(mpf, - &argp->prev_pgno, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; - } - - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->prevlsn); - CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->prevlsn); - flags = 0; - - if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) || - (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) { - /* Redo a create new page or undo a delete new page. */ - pagep->next_pgno = argp->new_pgno; - flags = DB_MPOOL_DIRTY; - } else if ((cmp_p == 0 && - DB_REDO(op) && argp->opcode == DELOVFL) || - (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) { - /* Redo a delete or undo a create new page. */ - pagep->next_pgno = argp->next_pgno; - flags = DB_MPOOL_DIRTY; - } - - if (flags) - LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn; - - if ((ret = mpf->put(mpf, pagep, flags)) != 0) - goto out; - pagep = NULL; - } - - /* Now time to do the next page */ -npage: if (argp->next_pgno != PGNO_INVALID) { - if ((ret = mpf->get(mpf, &argp->next_pgno, 0, &pagep)) != 0) { - if (DB_UNDO(op)) { - /* - * We are undoing and the page doesn't exist. - * That is equivalent to having a pagelsn of 0, - * so we would not have to undo anything. In - * this case, don't bother creating a page. - */ - goto done; - } else if ((ret = mpf->get(mpf, - &argp->next_pgno, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; - } - - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->nextlsn); - CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->nextlsn); - flags = 0; - - if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) || - (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) { - /* Redo a create new page or undo a delete new page. */ - pagep->prev_pgno = argp->new_pgno; - flags = DB_MPOOL_DIRTY; - } else if ((cmp_p == 0 && - DB_REDO(op) && argp->opcode == DELOVFL) || - (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) { - /* Redo a delete or undo a create new page. */ - pagep->prev_pgno = argp->prev_pgno; - flags = DB_MPOOL_DIRTY; - } - - if (flags) - LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn; - - if ((ret = mpf->put(mpf, pagep, flags)) != 0) - goto out; - pagep = NULL; - } -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)mpf->put(mpf, pagep, 0); - REC_CLOSE; -} - -/* - * __ham_replace_recover -- - * This log message refers to partial puts that are local to a single - * page. You can think of them as special cases of the more general - * insdel log message. - * - * PUBLIC: int __ham_replace_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_replace_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_replace_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - DBT dbt; - PAGE *pagep; - u_int32_t flags; - int32_t grow; - int cmp_n, cmp_p, ret; - u_int8_t *hk; - - pagep = NULL; - COMPQUIET(info, NULL); - - REC_PRINT(__ham_replace_print); - REC_INTRO(__ham_replace_read, 1); - - if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) { - if (DB_UNDO(op)) { - /* - * We are undoing and the page doesn't exist. That - * is equivalent to having a pagelsn of 0, so we - * would not have to undo anything. In this case, - * don't bother creating a page. - */ - goto done; - } else if ((ret = mpf->get(mpf, - &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; - } - - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); - CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn); - - memset(&dbt, 0, sizeof(dbt)); - flags = 0; - grow = 1; - - if (cmp_p == 0 && DB_REDO(op)) { - /* Reapply the change as specified. */ - dbt.data = argp->newitem.data; - dbt.size = argp->newitem.size; - grow = argp->newitem.size - argp->olditem.size; - LSN(pagep) = *lsnp; - flags = DB_MPOOL_DIRTY; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Undo the already applied change. */ - dbt.data = argp->olditem.data; - dbt.size = argp->olditem.size; - grow = argp->olditem.size - argp->newitem.size; - LSN(pagep) = argp->pagelsn; - flags = DB_MPOOL_DIRTY; - } - - if (flags) { - __ham_onpage_replace(file_dbp, pagep, - argp->ndx, argp->off, grow, &dbt); - if (argp->makedup) { - hk = P_ENTRY(file_dbp, pagep, argp->ndx); - if (DB_REDO(op)) - HPAGE_PTYPE(hk) = H_DUPLICATE; - else - HPAGE_PTYPE(hk) = H_KEYDATA; - } - } - - if ((ret = mpf->put(mpf, pagep, flags)) != 0) - goto out; - pagep = NULL; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)mpf->put(mpf, pagep, 0); - REC_CLOSE; -} - -/* - * __ham_splitdata_recover -- - * - * PUBLIC: int __ham_splitdata_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_splitdata_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_splitdata_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - u_int32_t flags; - int cmp_n, cmp_p, ret; - - pagep = NULL; - COMPQUIET(info, NULL); - - REC_PRINT(__ham_splitdata_print); - REC_INTRO(__ham_splitdata_read, 1); - - if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) { - if (DB_UNDO(op)) { - /* - * We are undoing and the page doesn't exist. That - * is equivalent to having a pagelsn of 0, so we - * would not have to undo anything. In this case, - * don't bother creating a page. - */ - goto done; - } else if ((ret = mpf->get(mpf, - &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; - } - - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); - CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn); - - /* - * There are two types of log messages here, one for the old page - * and one for the new pages created. The original image in the - * SPLITOLD record is used for undo. The image in the SPLITNEW - * is used for redo. We should never have a case where there is - * a redo operation and the SPLITOLD record is on disk, but not - * the SPLITNEW record. Therefore, we only have work to do when - * redo NEW messages and undo OLD messages, but we have to update - * LSNs in both cases. - */ - flags = 0; - if (cmp_p == 0 && DB_REDO(op)) { - if (argp->opcode == SPLITNEW) - /* Need to redo the split described. */ - memcpy(pagep, argp->pageimage.data, - argp->pageimage.size); - LSN(pagep) = *lsnp; - flags = DB_MPOOL_DIRTY; - } else if (cmp_n == 0 && DB_UNDO(op)) { - if (argp->opcode == SPLITOLD) { - /* Put back the old image. */ - memcpy(pagep, argp->pageimage.data, - argp->pageimage.size); - } else - P_INIT(pagep, file_dbp->pgsize, argp->pgno, - PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - LSN(pagep) = argp->pagelsn; - flags = DB_MPOOL_DIRTY; - } - if ((ret = mpf->put(mpf, pagep, flags)) != 0) - goto out; - pagep = NULL; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)mpf->put(mpf, pagep, 0); - REC_CLOSE; -} - -/* - * __ham_copypage_recover -- - * Recovery function for copypage. - * - * PUBLIC: int __ham_copypage_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_copypage_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_copypage_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - u_int32_t flags; - int cmp_n, cmp_p, ret; - - pagep = NULL; - COMPQUIET(info, NULL); - - REC_PRINT(__ham_copypage_print); - REC_INTRO(__ham_copypage_read, 1); - - flags = 0; - - /* This is the bucket page. */ - if ((ret = mpf->get(mpf, &argp->pgno, 0, &pagep)) != 0) { - if (DB_UNDO(op)) { - /* - * We are undoing and the page doesn't exist. That - * is equivalent to having a pagelsn of 0, so we - * would not have to undo anything. In this case, - * don't bother creating a page. - */ - ret = 0; - goto donext; - } else if ((ret = mpf->get(mpf, - &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; - } - - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); - CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn); - - if (cmp_p == 0 && DB_REDO(op)) { - /* Need to redo update described. */ - memcpy(pagep, argp->page.data, argp->page.size); - PGNO(pagep) = argp->pgno; - PREV_PGNO(pagep) = PGNO_INVALID; - LSN(pagep) = *lsnp; - flags = DB_MPOOL_DIRTY; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Need to undo update described. */ - P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID, - argp->next_pgno, 0, P_HASH); - LSN(pagep) = argp->pagelsn; - flags = DB_MPOOL_DIRTY; - } - if ((ret = mpf->put(mpf, pagep, flags)) != 0) - goto out; - pagep = NULL; - -donext: /* Now fix up the "next" page. */ - if ((ret = mpf->get(mpf, &argp->next_pgno, 0, &pagep)) != 0) { - if (DB_UNDO(op)) { - /* - * We are undoing and the page doesn't exist. That - * is equivalent to having a pagelsn of 0, so we - * would not have to undo anything. In this case, - * don't bother creating a page. - */ - ret = 0; - goto do_nn; - } else if ((ret = mpf->get(mpf, - &argp->next_pgno, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; - } - - /* For REDO just update the LSN. For UNDO copy page back. */ - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->nextlsn); - CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->nextlsn); - flags = 0; - if (cmp_p == 0 && DB_REDO(op)) { - LSN(pagep) = *lsnp; - flags = DB_MPOOL_DIRTY; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Need to undo update described. */ - memcpy(pagep, argp->page.data, argp->page.size); - flags = DB_MPOOL_DIRTY; - } - if ((ret = mpf->put(mpf, pagep, flags)) != 0) - goto out; - pagep = NULL; - - /* Now fix up the next's next page. */ -do_nn: if (argp->nnext_pgno == PGNO_INVALID) - goto done; - - if ((ret = mpf->get(mpf, &argp->nnext_pgno, 0, &pagep)) != 0) { - if (DB_UNDO(op)) { - /* - * We are undoing and the page doesn't exist. That - * is equivalent to having a pagelsn of 0, so we - * would not have to undo anything. In this case, - * don't bother creating a page. - */ - goto done; - } else if ((ret = mpf->get(mpf, - &argp->nnext_pgno, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; - } - - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->nnextlsn); - CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->nnextlsn); - - flags = 0; - if (cmp_p == 0 && DB_REDO(op)) { - /* Need to redo update described. */ - PREV_PGNO(pagep) = argp->pgno; - LSN(pagep) = *lsnp; - flags = DB_MPOOL_DIRTY; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Need to undo update described. */ - PREV_PGNO(pagep) = argp->next_pgno; - LSN(pagep) = argp->nnextlsn; - flags = DB_MPOOL_DIRTY; - } - if ((ret = mpf->put(mpf, pagep, flags)) != 0) - goto out; - pagep = NULL; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)mpf->put(mpf, pagep, 0); - REC_CLOSE; -} - -/* - * __ham_metagroup_recover -- - * Recovery function for metagroup. - * - * PUBLIC: int __ham_metagroup_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_metagroup_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_metagroup_args *argp; - HASH_CURSOR *hcp; - DB *file_dbp; - DBMETA *mmeta; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - db_pgno_t pgno; - u_int32_t flags, mmeta_flags; - int cmp_n, cmp_p, did_recover, groupgrow, ret; - - COMPQUIET(info, NULL); - mmeta_flags = 0; - mmeta = NULL; - REC_PRINT(__ham_metagroup_print); - REC_INTRO(__ham_metagroup_read, 1); - - /* - * This logs the virtual create of pages pgno to pgno + bucket - * Since the mpool page-allocation is not really able to be - * transaction protected, we can never undo it. Even in an abort, - * we have to allocate these pages to the hash table if they - * were actually created. In particular, during disaster - * recovery the metapage may be before this point if we - * are rolling backward. If the file has not been extended - * then the metapage could not have been updated. - * The log record contains: - * bucket: new bucket being allocated. - * pgno: page number of the new bucket. - * if bucket is a power of 2, then we allocated a whole batch of - * pages; if it's not, then we simply allocated one new page. - */ - groupgrow = (u_int32_t)(1 << __db_log2(argp->bucket + 1)) == - argp->bucket + 1; - pgno = argp->pgno; - if (argp->newalloc) - pgno += argp->bucket; - - if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; - - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); - CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->pagelsn); - - flags = 0; - if ((cmp_p == 0 && DB_REDO(op)) || (cmp_n == 0 && DB_UNDO(op))) { - /* - * We need to make sure that we redo the allocation of the - * pages. - */ - if (DB_REDO(op)) - pagep->lsn = *lsnp; - else - pagep->lsn = argp->pagelsn; - flags = DB_MPOOL_DIRTY; - } - if ((ret = mpf->put(mpf, pagep, flags)) != 0) - goto out; - - /* Now we have to update the meta-data page. */ - hcp = (HASH_CURSOR *)dbc->internal; - if ((ret = __ham_get_meta(dbc)) != 0) - goto out; - cmp_n = log_compare(lsnp, &hcp->hdr->dbmeta.lsn); - cmp_p = log_compare(&hcp->hdr->dbmeta.lsn, &argp->metalsn); - CHECK_LSN(op, cmp_p, &hcp->hdr->dbmeta.lsn, &argp->metalsn); - did_recover = 0; - if (cmp_p == 0 && DB_REDO(op)) { - /* Redo the actual updating of bucket counts. */ - ++hcp->hdr->max_bucket; - if (groupgrow) { - hcp->hdr->low_mask = hcp->hdr->high_mask; - hcp->hdr->high_mask = - (argp->bucket + 1) | hcp->hdr->low_mask; - } - hcp->hdr->dbmeta.lsn = *lsnp; - did_recover = 1; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Undo the actual updating of bucket counts. */ - --hcp->hdr->max_bucket; - if (groupgrow) { - hcp->hdr->high_mask = hcp->hdr->low_mask; - hcp->hdr->low_mask = hcp->hdr->high_mask >> 1; - } - hcp->hdr->dbmeta.lsn = argp->metalsn; - did_recover = 1; - } - - /* - * Now we need to fix up the spares array. Each entry in the - * spares array indicates the beginning page number for the - * indicated doubling. We need to fill this in whenever the - * spares array is invalid, since we never reclaim pages from - * the spares array and we have to allocate the pages to the - * spares array in both the redo and undo cases. - */ - if (argp->newalloc && - hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] == PGNO_INVALID) { - hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] = - argp->pgno - argp->bucket - 1; - did_recover = 1; - } - - /* - * Finally, we need to potentially fix up the last_pgno field - * in the master meta-data page (which may or may not be the - * same as the hash header page). - */ - if (argp->mmpgno != argp->mpgno) { - if ((ret = - mpf->get(mpf, &argp->mmpgno, 0, (PAGE **)&mmeta)) != 0) - goto out; - mmeta_flags = 0; - cmp_n = log_compare(lsnp, &mmeta->lsn); - cmp_p = log_compare(&mmeta->lsn, &argp->mmetalsn); - if (cmp_p == 0 && DB_REDO(op)) { - mmeta->lsn = *lsnp; - mmeta_flags = DB_MPOOL_DIRTY; - } else if (cmp_n == 0 && DB_UNDO(op)) { - mmeta->lsn = argp->mmetalsn; - mmeta_flags = DB_MPOOL_DIRTY; - } - } else - mmeta = (DBMETA *)hcp->hdr; - - if (argp->newalloc) { - if (mmeta->last_pgno < pgno) - mmeta->last_pgno = pgno; - mmeta_flags = DB_MPOOL_DIRTY; - } - - if (argp->mmpgno != argp->mpgno && - (ret = mpf->put(mpf, mmeta, mmeta_flags)) != 0) - goto out; - mmeta = NULL; - - if (did_recover) - F_SET(hcp, H_DIRTY); - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (mmeta != NULL) - (void)mpf->put(mpf, mmeta, 0); - if (dbc != NULL) - (void)__ham_release_meta(dbc); - if (ret == ENOENT && op == DB_TXN_BACKWARD_ALLOC) - ret = 0; - - REC_CLOSE; -} - -/* - * __ham_groupalloc_recover -- - * Recover the batch creation of a set of pages for a new database. - * - * PUBLIC: int __ham_groupalloc_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_groupalloc_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_groupalloc_args *argp; - DBMETA *mmeta; - DB_MPOOLFILE *mpf; - DB *file_dbp; - DBC *dbc; - PAGE *pagep; - db_pgno_t pgno; - int cmp_n, cmp_p, modified, ret; - - mmeta = NULL; - modified = 0; - REC_PRINT(__ham_groupalloc_print); - REC_INTRO(__ham_groupalloc_read, 0); - - pgno = PGNO_BASE_MD; - if ((ret = mpf->get(mpf, &pgno, 0, &mmeta)) != 0) { - if (DB_REDO(op)) { - /* Page should have existed. */ - __db_pgerr(file_dbp, pgno, ret); - goto out; - } else { - ret = 0; - goto done; - } - } - - cmp_n = log_compare(lsnp, &LSN(mmeta)); - cmp_p = log_compare(&LSN(mmeta), &argp->meta_lsn); - CHECK_LSN(op, cmp_p, &LSN(mmeta), &argp->meta_lsn); - - /* - * Basically, we used mpool to allocate a chunk of pages. - * We need to either add those to a free list (in the undo - * case) or initialize them (in the redo case). - * - * If we are redoing and this is a hash subdatabase, it's possible - * that the pages were never allocated, so we'd better check for - * that and handle it here. - */ - if (DB_REDO(op)) { - if ((ret = __ham_alloc_pages(file_dbp, argp, lsnp)) != 0) - goto out; - if (cmp_p == 0) { - LSN(mmeta) = *lsnp; - modified = 1; - } - } else if (DB_UNDO(op)) { - /* - * Reset the last page back to its preallocation state. - */ - pgno = argp->start_pgno + argp->num - 1; - if ((ret = mpf->get(mpf, &pgno, 0, &pagep)) == 0) { - - if (log_compare(&pagep->lsn, lsnp) == 0) - ZERO_LSN(pagep->lsn); - - if ((ret = mpf->put(mpf, pagep, DB_MPOOL_DIRTY)) != 0) - goto out; - } else if (ret != DB_PAGE_NOTFOUND) - goto out; - /* - * Always put the pages into the limbo list and free them later. - */ - if ((ret = __db_add_limbo(dbenv, - info, argp->fileid, argp->start_pgno, argp->num)) != 0) - goto out; - if (cmp_n == 0) { - LSN(mmeta) = argp->meta_lsn; - modified = 1; - } - } - -done: if (ret == 0) - *lsnp = argp->prev_lsn; - -out: if (mmeta != NULL) - (void)mpf->put(mpf, mmeta, modified ? DB_MPOOL_DIRTY : 0); - - if (ret == ENOENT && op == DB_TXN_BACKWARD_ALLOC) - ret = 0; - REC_CLOSE; -} - -/* - * __ham_alloc_pages -- - * - * Called during redo of a file create. We create new pages in the file - * using the MPOOL_NEW_GROUP flag. We then log the meta-data page with a - * __crdel_metasub message. If we manage to crash without the newly written - * pages getting to disk (I'm not sure this can happen anywhere except our - * test suite?!), then we need to go through a recreate the final pages. - * Hash normally has holes in its files and handles them appropriately. - */ -static int -__ham_alloc_pages(dbp, argp, lsnp) - DB *dbp; - __ham_groupalloc_args *argp; - DB_LSN *lsnp; -{ - DB_MPOOLFILE *mpf; - PAGE *pagep; - db_pgno_t pgno; - int ret; - - mpf = dbp->mpf; - - /* Read the last page of the allocation. */ - pgno = argp->start_pgno + argp->num - 1; - - /* If the page exists, and it has been initialized, then we're done. */ - if ((ret = mpf->get(mpf, &pgno, 0, &pagep)) == 0) { - if (NUM_ENT(pagep) == 0 && IS_ZERO_LSN(pagep->lsn)) - goto reinit_page; - if ((ret = mpf->put(mpf, pagep, 0)) != 0) - return (ret); - return (0); - } - - /* Had to create the page. */ - if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0) { - __db_pgerr(dbp, pgno, ret); - return (ret); - } - -reinit_page: - /* Initialize the newly allocated page. */ - P_INIT(pagep, dbp->pgsize, pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - pagep->lsn = *lsnp; - - if ((ret = mpf->put(mpf, pagep, DB_MPOOL_DIRTY)) != 0) - return (ret); - - return (0); -} - -/* - * __ham_curadj_recover -- - * Undo cursor adjustments if a subtransaction fails. - * - * PUBLIC: int __ham_curadj_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_curadj_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_curadj_args *argp; - DB_MPOOLFILE *mpf; - DB *file_dbp; - DBC *dbc; - int ret; - HASH_CURSOR *hcp; - - COMPQUIET(info, NULL); - REC_PRINT(__ham_curadj_print); - REC_INTRO(__ham_curadj_read, 0); - - if (op != DB_TXN_ABORT) - goto done; - - /* - * Undo the adjustment by reinitializing the the cursor - * to look like the one that was used to do the adustment, - * then we invert the add so that undo the adjustment. - */ - hcp = (HASH_CURSOR *)dbc->internal; - hcp->pgno = argp->pgno; - hcp->indx = argp->indx; - hcp->dup_off = argp->dup_off; - hcp->order = argp->order; - if (!argp->add) - F_SET(hcp, H_DELETED); - (void)__ham_c_update(dbc, argp->len, !argp->add, argp->is_dup); - -done: *lsnp = argp->prev_lsn; -out: REC_CLOSE; -} - -/* - * __ham_chgpg_recover -- - * Undo cursor adjustments if a subtransaction fails. - * - * PUBLIC: int __ham_chgpg_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_chgpg_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_chgpg_args *argp; - BTREE_CURSOR *opdcp; - DB_MPOOLFILE *mpf; - DB *file_dbp, *ldbp; - DBC *dbc; - int ret; - DBC *cp; - HASH_CURSOR *lcp; - u_int32_t order, indx; - - COMPQUIET(info, NULL); - REC_PRINT(__ham_chgpg_print); - REC_INTRO(__ham_chgpg_read, 0); - - if (op != DB_TXN_ABORT) - goto done; - - /* Overloaded fields for DB_HAM_DEL*PG */ - indx = argp->old_indx; - order = argp->new_indx; - - MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp); - for (ldbp = __dblist_get(dbenv, file_dbp->adj_fileid); - ldbp != NULL && ldbp->adj_fileid == file_dbp->adj_fileid; - ldbp = LIST_NEXT(ldbp, dblistlinks)) { - MUTEX_THREAD_LOCK(dbenv, file_dbp->mutexp); - - for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL; - cp = TAILQ_NEXT(cp, links)) { - lcp = (HASH_CURSOR *)cp->internal; - - switch (argp->mode) { - case DB_HAM_DELFIRSTPG: - if (lcp->pgno != argp->new_pgno) - break; - if (lcp->indx != indx || - !F_ISSET(lcp, H_DELETED) || - lcp->order >= order) { - lcp->pgno = argp->old_pgno; - if (lcp->indx == indx) - lcp->order -= order; - } - break; - case DB_HAM_DELMIDPG: - case DB_HAM_DELLASTPG: - if (lcp->pgno == argp->new_pgno && - lcp->indx == indx && - F_ISSET(lcp, H_DELETED) && - lcp->order >= order) { - lcp->pgno = argp->old_pgno; - lcp->order -= order; - lcp->indx = 0; - } - break; - case DB_HAM_CHGPG: - /* - * If we're doing a CHGPG, we're undoing - * the move of a non-deleted item to a - * new page. Any cursors with the deleted - * flag set do not belong to this item; - * don't touch them. - */ - if (F_ISSET(lcp, H_DELETED)) - break; - /* FALLTHROUGH */ - case DB_HAM_SPLIT: - if (lcp->pgno == argp->new_pgno && - lcp->indx == argp->new_indx) { - lcp->indx = argp->old_indx; - lcp->pgno = argp->old_pgno; - } - break; - case DB_HAM_DUP: - if (lcp->opd == NULL) - break; - opdcp = (BTREE_CURSOR *)lcp->opd->internal; - if (opdcp->pgno != argp->new_pgno || - opdcp->indx != argp->new_indx) - break; - - if (F_ISSET(opdcp, C_DELETED)) - F_SET(lcp, H_DELETED); - /* - * We can't close a cursor while we have the - * dbp mutex locked, since c_close reacquires - * it. It should be safe to drop the mutex - * here, though, since newly opened cursors - * are put only at the end of the tailq and - * the cursor we're adjusting can't be closed - * under us. - */ - MUTEX_THREAD_UNLOCK(dbenv, file_dbp->mutexp); - if ((ret = lcp->opd->c_close(lcp->opd)) != 0) - goto out; - MUTEX_THREAD_LOCK(dbenv, file_dbp->mutexp); - lcp->opd = NULL; - break; - } - } - MUTEX_THREAD_UNLOCK(dbenv, file_dbp->mutexp); - } - MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); - -done: *lsnp = argp->prev_lsn; -out: REC_CLOSE; -} diff --git a/bdb/hash/hash_reclaim.c b/bdb/hash/hash_reclaim.c deleted file mode 100644 index ac90ffff08a..00000000000 --- a/bdb/hash/hash_reclaim.c +++ /dev/null @@ -1,111 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2002 - * Sleepycat Software. All rights reserved. - */ - -#include "db_config.h" - -#ifndef lint -static const char revid[] = "$Id: hash_reclaim.c,v 11.12 2002/03/28 19:49:43 bostic Exp $"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <string.h> -#endif - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/hash.h" - -/* - * __ham_reclaim -- - * Reclaim the pages from a subdatabase and return them to the - * parent free list. For now, we link each freed page on the list - * separately. If people really store hash databases in subdatabases - * and do a lot of creates and deletes, this is going to be a problem, - * because hash needs chunks of contiguous storage. We may eventually - * need to go to a model where we maintain the free list with chunks of - * contiguous pages as well. - * - * PUBLIC: int __ham_reclaim __P((DB *, DB_TXN *txn)); - */ -int -__ham_reclaim(dbp, txn) - DB *dbp; - DB_TXN *txn; -{ - DBC *dbc; - HASH_CURSOR *hcp; - int ret; - - /* Open up a cursor that we'll use for traversing. */ - if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0) - return (ret); - hcp = (HASH_CURSOR *)dbc->internal; - - if ((ret = __ham_get_meta(dbc)) != 0) - goto err; - - if ((ret = __ham_traverse(dbc, - DB_LOCK_WRITE, __db_reclaim_callback, dbc, 1)) != 0) - goto err; - if ((ret = dbc->c_close(dbc)) != 0) - goto err; - if ((ret = __ham_release_meta(dbc)) != 0) - goto err; - return (0); - -err: if (hcp->hdr != NULL) - (void)__ham_release_meta(dbc); - (void)dbc->c_close(dbc); - return (ret); -} - -/* - * __ham_truncate -- - * Reclaim the pages from a subdatabase and return them to the - * parent free list. - * - * PUBLIC: int __ham_truncate __P((DB *, DB_TXN *txn, u_int32_t *)); - */ -int -__ham_truncate(dbp, txn, countp) - DB *dbp; - DB_TXN *txn; - u_int32_t *countp; -{ - DBC *dbc; - HASH_CURSOR *hcp; - db_trunc_param trunc; - int ret; - - /* Open up a cursor that we'll use for traversing. */ - if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0) - return (ret); - hcp = (HASH_CURSOR *)dbc->internal; - - if ((ret = __ham_get_meta(dbc)) != 0) - goto err; - - trunc.count = 0; - trunc.dbc = dbc; - - if ((ret = __ham_traverse(dbc, - DB_LOCK_WRITE, __db_truncate_callback, &trunc, 1)) != 0) - goto err; - if ((ret = __ham_release_meta(dbc)) != 0) - goto err; - if ((ret = dbc->c_close(dbc)) != 0) - goto err; - *countp = trunc.count; - return (0); - -err: if (hcp->hdr != NULL) - (void)__ham_release_meta(dbc); - (void)dbc->c_close(dbc); - return (ret); -} diff --git a/bdb/hash/hash_stat.c b/bdb/hash/hash_stat.c deleted file mode 100644 index f9ee1d099cb..00000000000 --- a/bdb/hash/hash_stat.c +++ /dev/null @@ -1,372 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2002 - * Sleepycat Software. All rights reserved. - */ - -#include "db_config.h" - -#ifndef lint -static const char revid[] = "$Id: hash_stat.c,v 11.48 2002/08/06 06:11:28 bostic Exp $"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <string.h> -#endif - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" - -static int __ham_stat_callback __P((DB *, PAGE *, void *, int *)); - -/* - * __ham_stat -- - * Gather/print the hash statistics - * - * PUBLIC: int __ham_stat __P((DB *, void *, u_int32_t)); - */ -int -__ham_stat(dbp, spp, flags) - DB *dbp; - void *spp; - u_int32_t flags; -{ - DBC *dbc; - DB_ENV *dbenv; - DB_HASH_STAT *sp; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - PAGE *h; - db_pgno_t pgno; - int ret; - - dbenv = dbp->dbenv; - - PANIC_CHECK(dbenv); - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->stat"); - - mpf = dbp->mpf; - sp = NULL; - - /* Check for invalid flags. */ - if ((ret = __db_statchk(dbp, flags)) != 0) - return (ret); - - if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) - return (ret); - hcp = (HASH_CURSOR *)dbc->internal; - - if ((ret = __ham_get_meta(dbc)) != 0) - goto err; - - /* Allocate and clear the structure. */ - if ((ret = __os_umalloc(dbenv, sizeof(*sp), &sp)) != 0) - goto err; - memset(sp, 0, sizeof(*sp)); - /* Copy the fields that we have. */ - sp->hash_nkeys = hcp->hdr->dbmeta.key_count; - sp->hash_ndata = hcp->hdr->dbmeta.record_count; - sp->hash_pagesize = dbp->pgsize; - sp->hash_buckets = hcp->hdr->max_bucket + 1; - sp->hash_magic = hcp->hdr->dbmeta.magic; - sp->hash_version = hcp->hdr->dbmeta.version; - sp->hash_metaflags = hcp->hdr->dbmeta.flags; - sp->hash_ffactor = hcp->hdr->ffactor; - - if (flags == DB_FAST_STAT || flags == DB_CACHED_COUNTS) - goto done; - - /* Walk the free list, counting pages. */ - for (sp->hash_free = 0, pgno = hcp->hdr->dbmeta.free; - pgno != PGNO_INVALID;) { - ++sp->hash_free; - - if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0) - goto err; - - pgno = h->next_pgno; - (void)mpf->put(mpf, h, 0); - } - - /* Now traverse the rest of the table. */ - sp->hash_nkeys = 0; - sp->hash_ndata = 0; - if ((ret = __ham_traverse(dbc, - DB_LOCK_READ, __ham_stat_callback, sp, 0)) != 0) - goto err; - - if (!F_ISSET(dbp, DB_AM_RDONLY)) { - if ((ret = __ham_dirty_meta(dbc)) != 0) - goto err; - hcp->hdr->dbmeta.key_count = sp->hash_nkeys; - hcp->hdr->dbmeta.record_count = sp->hash_ndata; - } - -done: - if ((ret = __ham_release_meta(dbc)) != 0) - goto err; - if ((ret = dbc->c_close(dbc)) != 0) - goto err; - - *(DB_HASH_STAT **)spp = sp; - return (0); - -err: if (sp != NULL) - __os_ufree(dbenv, sp); - if (hcp->hdr != NULL) - (void)__ham_release_meta(dbc); - (void)dbc->c_close(dbc); - return (ret); - -} - -/* - * __ham_traverse - * Traverse an entire hash table. We use the callback so that we - * can use this both for stat collection and for deallocation. - * - * PUBLIC: int __ham_traverse __P((DBC *, db_lockmode_t, - * PUBLIC: int (*)(DB *, PAGE *, void *, int *), void *, int)); - */ -int -__ham_traverse(dbc, mode, callback, cookie, look_past_max) - DBC *dbc; - db_lockmode_t mode; - int (*callback) __P((DB *, PAGE *, void *, int *)); - void *cookie; - int look_past_max; -{ - DB *dbp; - DBC *opd; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - HKEYDATA *hk; - db_pgno_t pgno, opgno; - int did_put, i, ret, t_ret; - u_int32_t bucket, spares_entry; - - dbp = dbc->dbp; - opd = NULL; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - ret = 0; - - /* - * In a perfect world, we could simply read each page in the file - * and look at its page type to tally the information necessary. - * Unfortunately, the bucket locking that hash tables do to make - * locking easy, makes this a pain in the butt. We have to traverse - * duplicate, overflow and big pages from the bucket so that we - * don't access anything that isn't properly locked. - * - */ - for (bucket = 0;; bucket++) { - /* - * We put the loop exit condition check here, because - * it made for a really vile extended ?: that made SCO's - * compiler drop core. - * - * If look_past_max is not set, we can stop at max_bucket; - * if it is set, we need to include pages that are part of - * the current doubling but beyond the highest bucket we've - * split into, as well as pages from a "future" doubling - * that may have been created within an aborted - * transaction. To do this, keep looping (and incrementing - * bucket) until the corresponding spares array entries - * cease to be defined. - */ - if (look_past_max) { - spares_entry = __db_log2(bucket + 1); - if (spares_entry >= NCACHED || - hcp->hdr->spares[spares_entry] == 0) - break; - } else { - if (bucket > hcp->hdr->max_bucket) - break; - } - - hcp->bucket = bucket; - hcp->pgno = pgno = BUCKET_TO_PAGE(hcp, bucket); - for (ret = __ham_get_cpage(dbc, mode); ret == 0; - ret = __ham_next_cpage(dbc, pgno, 0)) { - - /* - * If we are cleaning up pages past the max_bucket, - * then they may be on the free list and have their - * next pointers set, but the should be ignored. In - * fact, we really ought to just skip anybody who is - * not a valid page. - */ - if (TYPE(hcp->page) == P_INVALID) - break; - pgno = NEXT_PGNO(hcp->page); - - /* - * Go through each item on the page checking for - * duplicates (in which case we have to count the - * duplicate pages) or big key/data items (in which - * case we have to count those pages). - */ - for (i = 0; i < NUM_ENT(hcp->page); i++) { - hk = (HKEYDATA *)P_ENTRY(dbp, hcp->page, i); - switch (HPAGE_PTYPE(hk)) { - case H_OFFDUP: - memcpy(&opgno, HOFFDUP_PGNO(hk), - sizeof(db_pgno_t)); - if ((ret = __db_c_newopd(dbc, - opgno, NULL, &opd)) != 0) - return (ret); - if ((ret = __bam_traverse(opd, - DB_LOCK_READ, opgno, - callback, cookie)) - != 0) - goto err; - if ((ret = opd->c_close(opd)) != 0) - return (ret); - opd = NULL; - break; - case H_OFFPAGE: - /* - * We are about to get a big page - * which will use the same spot that - * the current page uses, so we need - * to restore the current page before - * looking at it again. - */ - memcpy(&opgno, HOFFPAGE_PGNO(hk), - sizeof(db_pgno_t)); - if ((ret = __db_traverse_big(dbp, - opgno, callback, cookie)) != 0) - goto err; - break; - case H_KEYDATA: - break; - } - } - - /* Call the callback on main pages. */ - if ((ret = callback(dbp, - hcp->page, cookie, &did_put)) != 0) - goto err; - - if (did_put) - hcp->page = NULL; - if (pgno == PGNO_INVALID) - break; - } - if (ret != 0) - goto err; - - if (STD_LOCKING(dbc)) - (void)dbp->dbenv->lock_put(dbp->dbenv, &hcp->lock); - - if (hcp->page != NULL) { - if ((ret = mpf->put(mpf, hcp->page, 0)) != 0) - return (ret); - hcp->page = NULL; - } - - } -err: if (opd != NULL && - (t_ret = opd->c_close(opd)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -static int -__ham_stat_callback(dbp, pagep, cookie, putp) - DB *dbp; - PAGE *pagep; - void *cookie; - int *putp; -{ - DB_HASH_STAT *sp; - DB_BTREE_STAT bstat; - db_indx_t indx, len, off, tlen, top; - u_int8_t *hk; - int ret; - - *putp = 0; - sp = cookie; - - switch (pagep->type) { - case P_INVALID: - /* - * Hash pages may be wholly zeroed; this is not a bug. - * Obviously such pages have no data, so we can just proceed. - */ - break; - case P_HASH: - /* - * We count the buckets and the overflow pages - * separately and tally their bytes separately - * as well. We need to figure out if this page - * is a bucket. - */ - if (PREV_PGNO(pagep) == PGNO_INVALID) - sp->hash_bfree += P_FREESPACE(dbp, pagep); - else { - sp->hash_overflows++; - sp->hash_ovfl_free += P_FREESPACE(dbp, pagep); - } - top = NUM_ENT(pagep); - /* Correct for on-page duplicates and deleted items. */ - for (indx = 0; indx < top; indx += P_INDX) { - switch (*H_PAIRDATA(dbp, pagep, indx)) { - case H_OFFDUP: - case H_OFFPAGE: - break; - case H_KEYDATA: - sp->hash_ndata++; - break; - case H_DUPLICATE: - tlen = LEN_HDATA(dbp, pagep, 0, indx); - hk = H_PAIRDATA(dbp, pagep, indx); - for (off = 0; off < tlen; - off += len + 2 * sizeof (db_indx_t)) { - sp->hash_ndata++; - memcpy(&len, - HKEYDATA_DATA(hk) - + off, sizeof(db_indx_t)); - } - } - } - sp->hash_nkeys += H_NUMPAIRS(pagep); - break; - case P_IBTREE: - case P_IRECNO: - case P_LBTREE: - case P_LRECNO: - case P_LDUP: - /* - * These are all btree pages; get a correct - * cookie and call them. Then add appropriate - * fields into our stat structure. - */ - memset(&bstat, 0, sizeof(bstat)); - bstat.bt_dup_pgfree = 0; - bstat.bt_int_pgfree = 0; - bstat.bt_leaf_pgfree = 0; - bstat.bt_ndata = 0; - if ((ret = __bam_stat_callback(dbp, pagep, &bstat, putp)) != 0) - return (ret); - sp->hash_dup++; - sp->hash_dup_free += bstat.bt_leaf_pgfree + - bstat.bt_dup_pgfree + bstat.bt_int_pgfree; - sp->hash_ndata += bstat.bt_ndata; - break; - case P_OVERFLOW: - sp->hash_bigpages++; - sp->hash_big_bfree += P_OVFLSPACE(dbp, dbp->pgsize, pagep); - break; - default: - return (__db_pgfmt(dbp->dbenv, pagep->pgno)); - } - - return (0); -} diff --git a/bdb/hash/hash_upgrade.c b/bdb/hash/hash_upgrade.c deleted file mode 100644 index 2dd21d7b644..00000000000 --- a/bdb/hash/hash_upgrade.c +++ /dev/null @@ -1,266 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2002 - * Sleepycat Software. All rights reserved. - */ -#include "db_config.h" - -#ifndef lint -static const char revid[] = "$Id: hash_upgrade.c,v 11.32 2002/08/06 05:34:58 bostic Exp $"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <limits.h> -#include <string.h> -#endif - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/hash.h" -#include "dbinc/db_upgrade.h" - -/* - * __ham_30_hashmeta -- - * Upgrade the database from version 4/5 to version 6. - * - * PUBLIC: int __ham_30_hashmeta __P((DB *, char *, u_int8_t *)); - */ -int -__ham_30_hashmeta(dbp, real_name, obuf) - DB *dbp; - char *real_name; - u_int8_t *obuf; -{ - DB_ENV *dbenv; - HASHHDR *oldmeta; - HMETA30 newmeta; - u_int32_t *o_spares, *n_spares; - u_int32_t fillf, maxb, nelem; - int i, max_entry, ret; - - dbenv = dbp->dbenv; - memset(&newmeta, 0, sizeof(newmeta)); - - oldmeta = (HASHHDR *)obuf; - - /* - * The first 32 bytes are similar. The only change is the version - * and that we removed the ovfl_point and have the page type now. - */ - - newmeta.dbmeta.lsn = oldmeta->lsn; - newmeta.dbmeta.pgno = oldmeta->pgno; - newmeta.dbmeta.magic = oldmeta->magic; - newmeta.dbmeta.version = 6; - newmeta.dbmeta.pagesize = oldmeta->pagesize; - newmeta.dbmeta.type = P_HASHMETA; - - /* Move flags */ - newmeta.dbmeta.flags = oldmeta->flags; - - /* Copy the free list, which has changed its name but works the same. */ - newmeta.dbmeta.free = oldmeta->last_freed; - - /* Copy: max_bucket, high_mask, low-mask, ffactor, nelem, h_charkey */ - newmeta.max_bucket = oldmeta->max_bucket; - newmeta.high_mask = oldmeta->high_mask; - newmeta.low_mask = oldmeta->low_mask; - newmeta.ffactor = oldmeta->ffactor; - newmeta.nelem = oldmeta->nelem; - newmeta.h_charkey = oldmeta->h_charkey; - - /* - * There was a bug in 2.X versions where the nelem could go negative. - * In general, this is considered "bad." If it does go negative - * (that is, very large and positive), we'll die trying to dump and - * load this database. So, let's see if we can fix it here. - */ - nelem = newmeta.nelem; - fillf = newmeta.ffactor; - maxb = newmeta.max_bucket; - - if ((fillf != 0 && fillf * maxb < 2 * nelem) || - (fillf == 0 && nelem > 0x8000000)) - newmeta.nelem = 0; - - /* - * We now have to convert the spares array. The old spares array - * contained the total number of extra pages allocated prior to - * the bucket that begins the next doubling. The new spares array - * contains the page number of the first bucket in the next doubling - * MINUS the bucket number of that bucket. - */ - o_spares = oldmeta->spares; - n_spares = newmeta.spares; - max_entry = __db_log2(maxb + 1); /* highest spares entry in use */ - n_spares[0] = 1; - for (i = 1; i < NCACHED && i <= max_entry; i++) - n_spares[i] = 1 + o_spares[i - 1]; - - /* Replace the unique ID. */ - if ((ret = __os_fileid(dbenv, real_name, 1, newmeta.dbmeta.uid)) != 0) - return (ret); - - /* Overwrite the original. */ - memcpy(oldmeta, &newmeta, sizeof(newmeta)); - - return (0); -} - -/* - * __ham_30_sizefix -- - * Make sure that all hash pages belonging to the current - * hash doubling are within the bounds of the file. - * - * PUBLIC: int __ham_30_sizefix __P((DB *, DB_FH *, char *, u_int8_t *)); - */ -int -__ham_30_sizefix(dbp, fhp, realname, metabuf) - DB *dbp; - DB_FH *fhp; - char *realname; - u_int8_t *metabuf; -{ - u_int8_t buf[DB_MAX_PGSIZE]; - DB_ENV *dbenv; - HMETA30 *meta; - db_pgno_t last_actual, last_desired; - int ret; - size_t nw; - u_int32_t pagesize; - - dbenv = dbp->dbenv; - memset(buf, 0, DB_MAX_PGSIZE); - - meta = (HMETA30 *)metabuf; - pagesize = meta->dbmeta.pagesize; - - /* - * Get the last page number. To do this, we'll need dbp->pgsize - * to be set right, so slam it into place. - */ - dbp->pgsize = pagesize; - if ((ret = __db_lastpgno(dbp, realname, fhp, &last_actual)) != 0) - return (ret); - - /* - * The last bucket in the doubling is equal to high_mask; calculate - * the page number that implies. - */ - last_desired = BS_TO_PAGE(meta->high_mask, meta->spares); - - /* - * If last_desired > last_actual, we need to grow the file. Write - * a zeroed page where last_desired would go. - */ - if (last_desired > last_actual) { - if ((ret = __os_seek(dbenv, - fhp, pagesize, last_desired, 0, 0, DB_OS_SEEK_SET)) != 0) - return (ret); - if ((ret = __os_write(dbenv, fhp, buf, pagesize, &nw)) != 0) - return (ret); - } - - return (0); -} - -/* - * __ham_31_hashmeta -- - * Upgrade the database from version 6 to version 7. - * - * PUBLIC: int __ham_31_hashmeta - * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); - */ -int -__ham_31_hashmeta(dbp, real_name, flags, fhp, h, dirtyp) - DB *dbp; - char *real_name; - u_int32_t flags; - DB_FH *fhp; - PAGE *h; - int *dirtyp; -{ - HMETA31 *newmeta; - HMETA30 *oldmeta; - - COMPQUIET(dbp, NULL); - COMPQUIET(real_name, NULL); - COMPQUIET(fhp, NULL); - - newmeta = (HMETA31 *)h; - oldmeta = (HMETA30 *)h; - - /* - * Copy the fields down the page. - * The fields may overlap so start at the bottom and use memmove(). - */ - memmove(newmeta->spares, oldmeta->spares, sizeof(oldmeta->spares)); - newmeta->h_charkey = oldmeta->h_charkey; - newmeta->nelem = oldmeta->nelem; - newmeta->ffactor = oldmeta->ffactor; - newmeta->low_mask = oldmeta->low_mask; - newmeta->high_mask = oldmeta->high_mask; - newmeta->max_bucket = oldmeta->max_bucket; - memmove(newmeta->dbmeta.uid, - oldmeta->dbmeta.uid, sizeof(oldmeta->dbmeta.uid)); - newmeta->dbmeta.flags = oldmeta->dbmeta.flags; - newmeta->dbmeta.record_count = 0; - newmeta->dbmeta.key_count = 0; - ZERO_LSN(newmeta->dbmeta.unused3); - - /* Update the version. */ - newmeta->dbmeta.version = 7; - - /* Upgrade the flags. */ - if (LF_ISSET(DB_DUPSORT)) - F_SET(&newmeta->dbmeta, DB_HASH_DUPSORT); - - *dirtyp = 1; - return (0); -} - -/* - * __ham_31_hash -- - * Upgrade the database hash leaf pages. - * - * PUBLIC: int __ham_31_hash - * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); - */ -int -__ham_31_hash(dbp, real_name, flags, fhp, h, dirtyp) - DB *dbp; - char *real_name; - u_int32_t flags; - DB_FH *fhp; - PAGE *h; - int *dirtyp; -{ - HKEYDATA *hk; - db_pgno_t pgno, tpgno; - db_indx_t indx; - int ret; - - COMPQUIET(flags, 0); - - ret = 0; - for (indx = 0; indx < NUM_ENT(h); indx += 2) { - hk = (HKEYDATA *)H_PAIRDATA(dbp, h, indx); - if (HPAGE_PTYPE(hk) == H_OFFDUP) { - memcpy(&pgno, HOFFDUP_PGNO(hk), sizeof(db_pgno_t)); - tpgno = pgno; - if ((ret = __db_31_offdup(dbp, real_name, fhp, - LF_ISSET(DB_DUPSORT) ? 1 : 0, &tpgno)) != 0) - break; - if (pgno != tpgno) { - *dirtyp = 1; - memcpy(HOFFDUP_PGNO(hk), - &tpgno, sizeof(db_pgno_t)); - } - } - } - - return (ret); -} diff --git a/bdb/hash/hash_verify.c b/bdb/hash/hash_verify.c deleted file mode 100644 index e6f5a2b0d65..00000000000 --- a/bdb/hash/hash_verify.c +++ /dev/null @@ -1,1079 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1999-2002 - * Sleepycat Software. All rights reserved. - * - * $Id: hash_verify.c,v 1.53 2002/08/06 05:35:02 bostic Exp $ - */ - -#include "db_config.h" - -#ifndef lint -static const char revid[] = "$Id: hash_verify.c,v 1.53 2002/08/06 05:35:02 bostic Exp $"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <string.h> -#endif - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_verify.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" - -static int __ham_dups_unsorted __P((DB *, u_int8_t *, u_int32_t)); -static int __ham_vrfy_bucket __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t, - u_int32_t)); -static int __ham_vrfy_item __P((DB *, - VRFY_DBINFO *, db_pgno_t, PAGE *, u_int32_t, u_int32_t)); - -/* - * __ham_vrfy_meta -- - * Verify the hash-specific part of a metadata page. - * - * Note that unlike btree, we don't save things off, because we - * will need most everything again to verify each page and the - * amount of state here is significant. - * - * PUBLIC: int __ham_vrfy_meta __P((DB *, VRFY_DBINFO *, HMETA *, - * PUBLIC: db_pgno_t, u_int32_t)); - */ -int -__ham_vrfy_meta(dbp, vdp, m, pgno, flags) - DB *dbp; - VRFY_DBINFO *vdp; - HMETA *m; - db_pgno_t pgno; - u_int32_t flags; -{ - HASH *hashp; - VRFY_PAGEINFO *pip; - int i, ret, t_ret, isbad; - u_int32_t pwr, mbucket; - u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); - - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - return (ret); - isbad = 0; - - hashp = dbp->h_internal; - - if (hashp != NULL && hashp->h_hash != NULL) - hfunc = hashp->h_hash; - else - hfunc = __ham_func5; - - /* - * If we haven't already checked the common fields in pagezero, - * check them. - */ - if (!F_ISSET(pip, VRFY_INCOMPLETE) && - (ret = __db_vrfy_meta(dbp, vdp, &m->dbmeta, pgno, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - - /* h_charkey */ - if (!LF_ISSET(DB_NOORDERCHK)) - if (m->h_charkey != hfunc(dbp, CHARKEY, sizeof(CHARKEY))) { - EPRINT((dbp->dbenv, -"Page %lu: database has different custom hash function; reverify with DB_NOORDERCHK set", - (u_long)pgno)); - /* - * Return immediately; this is probably a sign - * of user error rather than database corruption, so - * we want to avoid extraneous errors. - */ - isbad = 1; - goto err; - } - - /* max_bucket must be less than the last pgno. */ - if (m->max_bucket > vdp->last_pgno) { - EPRINT((dbp->dbenv, - "Page %lu: Impossible max_bucket %lu on meta page", - (u_long)pgno, (u_long)m->max_bucket)); - /* - * Most other fields depend somehow on max_bucket, so - * we just return--there will be lots of extraneous - * errors. - */ - isbad = 1; - goto err; - } - - /* - * max_bucket, high_mask and low_mask: high_mask must be one - * less than the next power of two above max_bucket, and - * low_mask must be one less than the power of two below it. - * - * - */ - pwr = (m->max_bucket == 0) ? 1 : 1 << __db_log2(m->max_bucket + 1); - if (m->high_mask != pwr - 1) { - EPRINT((dbp->dbenv, - "Page %lu: incorrect high_mask %lu, should be %lu", - (u_long)pgno, (u_long)m->high_mask, (u_long)pwr - 1)); - isbad = 1; - } - pwr >>= 1; - if (m->low_mask != pwr - 1) { - EPRINT((dbp->dbenv, - "Page %lu: incorrect low_mask %lu, should be %lu", - (u_long)pgno, (u_long)m->low_mask, (u_long)pwr - 1)); - isbad = 1; - } - - /* ffactor: no check possible. */ - pip->h_ffactor = m->ffactor; - - /* - * nelem: just make sure it's not astronomical for now. This is the - * same check that hash_upgrade does, since there was a bug in 2.X - * which could make nelem go "negative". - */ - if (m->nelem > 0x80000000) { - EPRINT((dbp->dbenv, - "Page %lu: suspiciously high nelem of %lu", - (u_long)pgno, (u_long)m->nelem)); - isbad = 1; - pip->h_nelem = 0; - } else - pip->h_nelem = m->nelem; - - /* flags */ - if (F_ISSET(&m->dbmeta, DB_HASH_DUP)) - F_SET(pip, VRFY_HAS_DUPS); - if (F_ISSET(&m->dbmeta, DB_HASH_DUPSORT)) - F_SET(pip, VRFY_HAS_DUPSORT); - /* XXX: Why is the DB_HASH_SUBDB flag necessary? */ - - /* spares array */ - for (i = 0; m->spares[i] != 0 && i < NCACHED; i++) { - /* - * We set mbucket to the maximum bucket that would use a given - * spares entry; we want to ensure that it's always less - * than last_pgno. - */ - mbucket = (1 << i) - 1; - if (BS_TO_PAGE(mbucket, m->spares) > vdp->last_pgno) { - EPRINT((dbp->dbenv, - "Page %lu: spares array entry %d is invalid", - (u_long)pgno, i)); - isbad = 1; - } - } - -err: if ((t_ret = - __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) - ret = t_ret; - return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); -} - -/* - * __ham_vrfy -- - * Verify hash page. - * - * PUBLIC: int __ham_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, - * PUBLIC: u_int32_t)); - */ -int -__ham_vrfy(dbp, vdp, h, pgno, flags) - DB *dbp; - VRFY_DBINFO *vdp; - PAGE *h; - db_pgno_t pgno; - u_int32_t flags; -{ - VRFY_PAGEINFO *pip; - u_int32_t ent, himark, inpend; - db_indx_t *inp; - int isbad, ret, t_ret; - - isbad = 0; - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - return (ret); - - /* Sanity check our flags and page type. */ - if ((ret = __db_fchk(dbp->dbenv, "__ham_vrfy", - flags, DB_AGGRESSIVE | DB_NOORDERCHK | DB_SALVAGE)) != 0) - goto err; - - if (TYPE(h) != P_HASH) { - TYPE_ERR_PRINT(dbp->dbenv, "__ham_vrfy", pgno, TYPE(h)); - DB_ASSERT(0); - ret = EINVAL; - goto err; - } - - /* Verify and save off fields common to all PAGEs. */ - if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - - /* - * Verify inp[]. Each offset from 0 to NUM_ENT(h) must be lower - * than the previous one, higher than the current end of the inp array, - * and lower than the page size. - * - * In any case, we return immediately if things are bad, as it would - * be unsafe to proceed. - */ - inp = P_INP(dbp, h); - for (ent = 0, himark = dbp->pgsize, - inpend = (u_int32_t)((u_int8_t *)inp - (u_int8_t *)h); - ent < NUM_ENT(h); ent++) - if (inp[ent] >= himark) { - EPRINT((dbp->dbenv, - "Page %lu: item %lu is out of order or nonsensical", - (u_long)pgno, (u_long)ent)); - isbad = 1; - goto err; - } else if (inpend >= himark) { - EPRINT((dbp->dbenv, - "Page %lu: entries array collided with data", - (u_long)pgno)); - isbad = 1; - goto err; - - } else { - himark = inp[ent]; - inpend += sizeof(db_indx_t); - if ((ret = __ham_vrfy_item( - dbp, vdp, pgno, h, ent, flags)) != 0) - goto err; - } - -err: if ((t_ret = - __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) - ret = t_ret; - return (ret == 0 && isbad == 1 ? DB_VERIFY_BAD : ret); -} - -/* - * __ham_vrfy_item -- - * Given a hash page and an offset, sanity-check the item itself, - * and save off any overflow items or off-page dup children as necessary. - */ -static int -__ham_vrfy_item(dbp, vdp, pgno, h, i, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t pgno; - PAGE *h; - u_int32_t i, flags; -{ - HOFFPAGE hop; - HOFFDUP hod; - VRFY_CHILDINFO child; - VRFY_PAGEINFO *pip; - db_indx_t offset, len, dlen, elen; - int ret, t_ret; - u_int8_t *databuf; - - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - return (ret); - - switch (HPAGE_TYPE(dbp, h, i)) { - case H_KEYDATA: - /* Nothing to do here--everything but the type field is data */ - break; - case H_DUPLICATE: - /* Are we a datum or a key? Better be the former. */ - if (i % 2 == 0) { - EPRINT((dbp->dbenv, - "Page %lu: hash key stored as duplicate item %lu", - (u_long)pip->pgno, (u_long)i)); - } - /* - * Dups are encoded as a series within a single HKEYDATA, - * in which each dup is surrounded by a copy of its length - * on either side (so that the series can be walked in either - * direction. We loop through this series and make sure - * each dup is reasonable. - * - * Note that at this point, we've verified item i-1, so - * it's safe to use LEN_HKEYDATA (which looks at inp[i-1]). - */ - len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); - databuf = HKEYDATA_DATA(P_ENTRY(dbp, h, i)); - for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) { - memcpy(&dlen, databuf + offset, sizeof(db_indx_t)); - - /* Make sure the length is plausible. */ - if (offset + DUP_SIZE(dlen) > len) { - EPRINT((dbp->dbenv, - "Page %lu: duplicate item %lu has bad length", - (u_long)pip->pgno, (u_long)i)); - ret = DB_VERIFY_BAD; - goto err; - } - - /* - * Make sure the second copy of the length is the - * same as the first. - */ - memcpy(&elen, - databuf + offset + dlen + sizeof(db_indx_t), - sizeof(db_indx_t)); - if (elen != dlen) { - EPRINT((dbp->dbenv, - "Page %lu: duplicate item %lu has two different lengths", - (u_long)pip->pgno, (u_long)i)); - ret = DB_VERIFY_BAD; - goto err; - } - } - F_SET(pip, VRFY_HAS_DUPS); - if (!LF_ISSET(DB_NOORDERCHK) && - __ham_dups_unsorted(dbp, databuf, len)) - F_SET(pip, VRFY_DUPS_UNSORTED); - break; - case H_OFFPAGE: - /* Offpage item. Make sure pgno is sane, save off. */ - memcpy(&hop, P_ENTRY(dbp, h, i), HOFFPAGE_SIZE); - if (!IS_VALID_PGNO(hop.pgno) || hop.pgno == pip->pgno || - hop.pgno == PGNO_INVALID) { - EPRINT((dbp->dbenv, - "Page %lu: offpage item %lu has bad pgno %lu", - (u_long)pip->pgno, (u_long)i, (u_long)hop.pgno)); - ret = DB_VERIFY_BAD; - goto err; - } - memset(&child, 0, sizeof(VRFY_CHILDINFO)); - child.pgno = hop.pgno; - child.type = V_OVERFLOW; - child.tlen = hop.tlen; /* This will get checked later. */ - if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0) - goto err; - break; - case H_OFFDUP: - /* Offpage duplicate item. Same drill. */ - memcpy(&hod, P_ENTRY(dbp, h, i), HOFFDUP_SIZE); - if (!IS_VALID_PGNO(hod.pgno) || hod.pgno == pip->pgno || - hod.pgno == PGNO_INVALID) { - EPRINT((dbp->dbenv, - "Page %lu: offpage item %lu has bad page number", - (u_long)pip->pgno, (u_long)i)); - ret = DB_VERIFY_BAD; - goto err; - } - memset(&child, 0, sizeof(VRFY_CHILDINFO)); - child.pgno = hod.pgno; - child.type = V_DUPLICATE; - if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0) - goto err; - F_SET(pip, VRFY_HAS_DUPS); - break; - default: - EPRINT((dbp->dbenv, - "Page %lu: item %i has bad type", - (u_long)pip->pgno, (u_long)i)); - ret = DB_VERIFY_BAD; - break; - } - -err: if ((t_ret = - __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -/* - * __ham_vrfy_structure -- - * Verify the structure of a hash database. - * - * PUBLIC: int __ham_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, - * PUBLIC: u_int32_t)); - */ -int -__ham_vrfy_structure(dbp, vdp, meta_pgno, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t meta_pgno; - u_int32_t flags; -{ - DB *pgset; - DB_MPOOLFILE *mpf; - HMETA *m; - PAGE *h; - VRFY_PAGEINFO *pip; - int isbad, p, ret, t_ret; - db_pgno_t pgno; - u_int32_t bucket, spares_entry; - - mpf = dbp->mpf; - pgset = vdp->pgset; - h = NULL; - ret = isbad = 0; - - if ((ret = __db_vrfy_pgset_get(pgset, meta_pgno, &p)) != 0) - return (ret); - if (p != 0) { - EPRINT((dbp->dbenv, - "Page %lu: Hash meta page referenced twice", - (u_long)meta_pgno)); - return (DB_VERIFY_BAD); - } - if ((ret = __db_vrfy_pgset_inc(pgset, meta_pgno)) != 0) - return (ret); - - /* Get the meta page; we'll need it frequently. */ - if ((ret = mpf->get(mpf, &meta_pgno, 0, &m)) != 0) - return (ret); - - /* Loop through bucket by bucket. */ - for (bucket = 0; bucket <= m->max_bucket; bucket++) - if ((ret = - __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - - /* - * There may be unused hash pages corresponding to buckets - * that have been allocated but not yet used. These may be - * part of the current doubling above max_bucket, or they may - * correspond to buckets that were used in a transaction - * that then aborted. - * - * Loop through them, as far as the spares array defines them, - * and make sure they're all empty. - * - * Note that this should be safe, since we've already verified - * that the spares array is sane. - */ - for (bucket = m->max_bucket + 1; spares_entry = __db_log2(bucket + 1), - spares_entry < NCACHED && m->spares[spares_entry] != 0; bucket++) { - pgno = BS_TO_PAGE(bucket, m->spares); - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - goto err; - - /* It's okay if these pages are totally zeroed; unmark it. */ - F_CLR(pip, VRFY_IS_ALLZEROES); - - /* It's also OK if this page is simply invalid. */ - if (pip->type == P_INVALID) { - if ((ret = __db_vrfy_putpageinfo(dbp->dbenv, - vdp, pip)) != 0) - goto err; - continue; - } - - if (pip->type != P_HASH) { - EPRINT((dbp->dbenv, - "Page %lu: hash bucket %lu maps to non-hash page", - (u_long)pgno, (u_long)bucket)); - isbad = 1; - } else if (pip->entries != 0) { - EPRINT((dbp->dbenv, - "Page %lu: non-empty page in unused hash bucket %lu", - (u_long)pgno, (u_long)bucket)); - isbad = 1; - } else { - if ((ret = __db_vrfy_pgset_get(pgset, pgno, &p)) != 0) - goto err; - if (p != 0) { - EPRINT((dbp->dbenv, - "Page %lu: above max_bucket referenced", - (u_long)pgno)); - isbad = 1; - } else { - if ((ret = - __db_vrfy_pgset_inc(pgset, pgno)) != 0) - goto err; - if ((ret = __db_vrfy_putpageinfo(dbp->dbenv, - vdp, pip)) != 0) - goto err; - continue; - } - } - - /* If we got here, it's an error. */ - (void)__db_vrfy_putpageinfo(dbp->dbenv, vdp, pip); - goto err; - } - -err: if ((t_ret = mpf->put(mpf, m, 0)) != 0) - return (t_ret); - if (h != NULL && (t_ret = mpf->put(mpf, h, 0)) != 0) - return (t_ret); - return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD: ret); -} - -/* - * __ham_vrfy_bucket -- - * Verify a given bucket. - */ -static int -__ham_vrfy_bucket(dbp, vdp, m, bucket, flags) - DB *dbp; - VRFY_DBINFO *vdp; - HMETA *m; - u_int32_t bucket, flags; -{ - HASH *hashp; - VRFY_CHILDINFO *child; - VRFY_PAGEINFO *mip, *pip; - int ret, t_ret, isbad, p; - db_pgno_t pgno, next_pgno; - DBC *cc; - u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); - - isbad = 0; - pip = NULL; - cc = NULL; - - hashp = dbp->h_internal; - if (hashp != NULL && hashp->h_hash != NULL) - hfunc = hashp->h_hash; - else - hfunc = __ham_func5; - - if ((ret = __db_vrfy_getpageinfo(vdp, PGNO(m), &mip)) != 0) - return (ret); - - /* Calculate the first pgno for this bucket. */ - pgno = BS_TO_PAGE(bucket, m->spares); - - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - goto err; - - /* Make sure we got a plausible page number. */ - if (pgno > vdp->last_pgno || pip->type != P_HASH) { - EPRINT((dbp->dbenv, - "Page %lu: impossible first page in bucket %lu", - (u_long)pgno, (u_long)bucket)); - /* Unsafe to continue. */ - isbad = 1; - goto err; - } - - if (pip->prev_pgno != PGNO_INVALID) { - EPRINT((dbp->dbenv, - "Page %lu: first page in hash bucket %lu has a prev_pgno", - (u_long)pgno, (u_long)bucket)); - isbad = 1; - } - - /* - * Set flags for dups and sorted dups. - */ - flags |= F_ISSET(mip, VRFY_HAS_DUPS) ? ST_DUPOK : 0; - flags |= F_ISSET(mip, VRFY_HAS_DUPSORT) ? ST_DUPSORT : 0; - - /* Loop until we find a fatal bug, or until we run out of pages. */ - for (;;) { - /* Provide feedback on our progress to the application. */ - if (!LF_ISSET(DB_SALVAGE)) - __db_vrfy_struct_feedback(dbp, vdp); - - if ((ret = __db_vrfy_pgset_get(vdp->pgset, pgno, &p)) != 0) - goto err; - if (p != 0) { - EPRINT((dbp->dbenv, - "Page %lu: hash page referenced twice", - (u_long)pgno)); - isbad = 1; - /* Unsafe to continue. */ - goto err; - } else if ((ret = __db_vrfy_pgset_inc(vdp->pgset, pgno)) != 0) - goto err; - - /* - * Hash pages that nothing has ever hashed to may never - * have actually come into existence, and may appear to be - * entirely zeroed. This is acceptable, and since there's - * no real way for us to know whether this has actually - * occurred, we clear the "wholly zeroed" flag on every - * hash page. A wholly zeroed page, by nature, will appear - * to have no flags set and zero entries, so should - * otherwise verify correctly. - */ - F_CLR(pip, VRFY_IS_ALLZEROES); - - /* If we have dups, our meta page had better know about it. */ - if (F_ISSET(pip, VRFY_HAS_DUPS) && - !F_ISSET(mip, VRFY_HAS_DUPS)) { - EPRINT((dbp->dbenv, - "Page %lu: duplicates present in non-duplicate database", - (u_long)pgno)); - isbad = 1; - } - - /* - * If the database has sorted dups, this page had better - * not have unsorted ones. - */ - if (F_ISSET(mip, VRFY_HAS_DUPSORT) && - F_ISSET(pip, VRFY_DUPS_UNSORTED)) { - EPRINT((dbp->dbenv, - "Page %lu: unsorted dups in sorted-dup database", - (u_long)pgno)); - isbad = 1; - } - - /* Walk overflow chains and offpage dup trees. */ - if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0) - goto err; - for (ret = __db_vrfy_ccset(cc, pip->pgno, &child); ret == 0; - ret = __db_vrfy_ccnext(cc, &child)) - if (child->type == V_OVERFLOW) { - if ((ret = __db_vrfy_ovfl_structure(dbp, vdp, - child->pgno, child->tlen, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - } else if (child->type == V_DUPLICATE) { - if ((ret = __db_vrfy_duptype(dbp, - vdp, child->pgno, flags)) != 0) { - isbad = 1; - continue; - } - if ((ret = __bam_vrfy_subtree(dbp, vdp, - child->pgno, NULL, NULL, - flags | ST_RECNUM | ST_DUPSET | ST_TOPLEVEL, - NULL, NULL, NULL)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - } - if ((ret = __db_vrfy_ccclose(cc)) != 0) - goto err; - cc = NULL; - - /* If it's safe to check that things hash properly, do so. */ - if (isbad == 0 && !LF_ISSET(DB_NOORDERCHK) && - (ret = __ham_vrfy_hashing(dbp, pip->entries, - m, bucket, pgno, flags, hfunc)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - - next_pgno = pip->next_pgno; - ret = __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip); - - pip = NULL; - if (ret != 0) - goto err; - - if (next_pgno == PGNO_INVALID) - break; /* End of the bucket. */ - - /* We already checked this, but just in case... */ - if (!IS_VALID_PGNO(next_pgno)) { - DB_ASSERT(0); - EPRINT((dbp->dbenv, - "Page %lu: hash page has bad next_pgno", - (u_long)pgno)); - isbad = 1; - goto err; - } - - if ((ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0) - goto err; - - if (pip->prev_pgno != pgno) { - EPRINT((dbp->dbenv, - "Page %lu: hash page has bad prev_pgno", - (u_long)next_pgno)); - isbad = 1; - } - pgno = next_pgno; - } - -err: if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0) - ret = t_ret; - if (mip != NULL && ((t_ret = - __db_vrfy_putpageinfo(dbp->dbenv, vdp, mip)) != 0) && ret == 0) - ret = t_ret; - if (pip != NULL && ((t_ret = - __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0) && ret == 0) - ret = t_ret; - return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); -} - -/* - * __ham_vrfy_hashing -- - * Verify that all items on a given hash page hash correctly. - * - * PUBLIC: int __ham_vrfy_hashing __P((DB *, - * PUBLIC: u_int32_t, HMETA *, u_int32_t, db_pgno_t, u_int32_t, - * PUBLIC: u_int32_t (*) __P((DB *, const void *, u_int32_t)))); - */ -int -__ham_vrfy_hashing(dbp, nentries, m, thisbucket, pgno, flags, hfunc) - DB *dbp; - u_int32_t nentries; - HMETA *m; - u_int32_t thisbucket; - db_pgno_t pgno; - u_int32_t flags; - u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); -{ - DBT dbt; - DB_MPOOLFILE *mpf; - PAGE *h; - db_indx_t i; - int ret, t_ret, isbad; - u_int32_t hval, bucket; - - mpf = dbp->mpf; - ret = isbad = 0; - - memset(&dbt, 0, sizeof(DBT)); - F_SET(&dbt, DB_DBT_REALLOC); - - if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0) - return (ret); - - for (i = 0; i < nentries; i += 2) { - /* - * We've already verified the page integrity and that of any - * overflow chains linked off it; it is therefore safe to use - * __db_ret. It's also not all that much slower, since we have - * to copy every hash item to deal with alignment anyway; we - * can tweak this a bit if this proves to be a bottleneck, - * but for now, take the easy route. - */ - if ((ret = __db_ret(dbp, h, i, &dbt, NULL, NULL)) != 0) - goto err; - hval = hfunc(dbp, dbt.data, dbt.size); - - bucket = hval & m->high_mask; - if (bucket > m->max_bucket) - bucket = bucket & m->low_mask; - - if (bucket != thisbucket) { - EPRINT((dbp->dbenv, - "Page %lu: item %lu hashes incorrectly", - (u_long)pgno, (u_long)i)); - isbad = 1; - } - } - -err: if (dbt.data != NULL) - __os_ufree(dbp->dbenv, dbt.data); - if ((t_ret = mpf->put(mpf, h, 0)) != 0) - return (t_ret); - - return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); -} - -/* - * __ham_salvage -- - * Safely dump out anything that looks like a key on an alleged - * hash page. - * - * PUBLIC: int __ham_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, - * PUBLIC: void *, int (*)(void *, const void *), u_int32_t)); - */ -int -__ham_salvage(dbp, vdp, pgno, h, handle, callback, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t pgno; - PAGE *h; - void *handle; - int (*callback) __P((void *, const void *)); - u_int32_t flags; -{ - DBT dbt, unkdbt; - db_pgno_t dpgno; - int ret, err_ret, t_ret; - u_int32_t himark, tlen; - u_int8_t *hk; - void *buf; - u_int32_t dlen, len, i; - - memset(&dbt, 0, sizeof(DBT)); - dbt.flags = DB_DBT_REALLOC; - - memset(&unkdbt, 0, sizeof(DBT)); - unkdbt.size = (u_int32_t)strlen("UNKNOWN") + 1; - unkdbt.data = "UNKNOWN"; - - err_ret = 0; - - /* - * Allocate a buffer for overflow items. Start at one page; - * __db_safe_goff will realloc as needed. - */ - if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &buf)) != 0) - return (ret); - - himark = dbp->pgsize; - for (i = 0;; i++) { - /* If we're not aggressive, break when we hit NUM_ENT(h). */ - if (!LF_ISSET(DB_AGGRESSIVE) && i >= NUM_ENT(h)) - break; - - /* Verify the current item. */ - ret = __db_vrfy_inpitem(dbp, - h, pgno, i, 0, flags, &himark, NULL); - /* If this returned a fatality, it's time to break. */ - if (ret == DB_VERIFY_FATAL) - break; - - if (ret == 0) { - hk = P_ENTRY(dbp, h, i); - len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); - if ((u_int32_t)(hk + len - (u_int8_t *)h) > - dbp->pgsize) { - /* - * Item is unsafely large; either continue - * or set it to the whole page, depending on - * aggressiveness. - */ - if (!LF_ISSET(DB_AGGRESSIVE)) - continue; - len = dbp->pgsize - - (u_int32_t)(hk - (u_int8_t *)h); - err_ret = DB_VERIFY_BAD; - } - switch (HPAGE_PTYPE(hk)) { - default: - if (!LF_ISSET(DB_AGGRESSIVE)) - break; - err_ret = DB_VERIFY_BAD; - /* FALLTHROUGH */ - case H_KEYDATA: -keydata: memcpy(buf, HKEYDATA_DATA(hk), len); - dbt.size = len; - dbt.data = buf; - if ((ret = __db_prdbt(&dbt, - 0, " ", handle, callback, 0, vdp)) != 0) - err_ret = ret; - break; - case H_OFFPAGE: - if (len < HOFFPAGE_SIZE) { - err_ret = DB_VERIFY_BAD; - continue; - } - memcpy(&dpgno, - HOFFPAGE_PGNO(hk), sizeof(dpgno)); - if ((ret = __db_safe_goff(dbp, vdp, - dpgno, &dbt, &buf, flags)) != 0) { - err_ret = ret; - (void)__db_prdbt(&unkdbt, 0, " ", - handle, callback, 0, vdp); - break; - } - if ((ret = __db_prdbt(&dbt, - 0, " ", handle, callback, 0, vdp)) != 0) - err_ret = ret; - break; - case H_OFFDUP: - if (len < HOFFPAGE_SIZE) { - err_ret = DB_VERIFY_BAD; - continue; - } - memcpy(&dpgno, - HOFFPAGE_PGNO(hk), sizeof(dpgno)); - /* UNKNOWN iff pgno is bad or we're a key. */ - if (!IS_VALID_PGNO(dpgno) || (i % 2 == 0)) { - if ((ret = __db_prdbt(&unkdbt, 0, " ", - handle, callback, 0, vdp)) != 0) - err_ret = ret; - } else if ((ret = __db_salvage_duptree(dbp, - vdp, dpgno, &dbt, handle, callback, - flags | SA_SKIPFIRSTKEY)) != 0) - err_ret = ret; - break; - case H_DUPLICATE: - /* - * We're a key; printing dups will seriously - * foul the output. If we're being aggressive, - * pretend this is a key and let the app. - * programmer sort out the mess. - */ - if (i % 2 == 0) { - err_ret = ret; - if (LF_ISSET(DB_AGGRESSIVE)) - goto keydata; - break; - } - - /* Too small to have any data. */ - if (len < - HKEYDATA_SIZE(2 * sizeof(db_indx_t))) { - err_ret = DB_VERIFY_BAD; - continue; - } - - /* Loop until we hit the total length. */ - for (tlen = 0; tlen + sizeof(db_indx_t) < len; - tlen += dlen) { - tlen += sizeof(db_indx_t); - memcpy(&dlen, hk, sizeof(db_indx_t)); - /* - * If dlen is too long, print all the - * rest of the dup set in a chunk. - */ - if (dlen + tlen > len) - dlen = len - tlen; - memcpy(buf, hk + tlen, dlen); - dbt.size = dlen; - dbt.data = buf; - if ((ret = __db_prdbt(&dbt, 0, " ", - handle, callback, 0, vdp)) != 0) - err_ret = ret; - tlen += sizeof(db_indx_t); - } - break; - } - } - } - - __os_free(dbp->dbenv, buf); - if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0) - return (t_ret); - return ((ret == 0 && err_ret != 0) ? err_ret : ret); -} - -/* - * __ham_meta2pgset -- - * Return the set of hash pages corresponding to the given - * known-good meta page. - * - * PUBLIC: int __ham_meta2pgset __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t, - * PUBLIC: DB *)); - */ -int __ham_meta2pgset(dbp, vdp, hmeta, flags, pgset) - DB *dbp; - VRFY_DBINFO *vdp; - HMETA *hmeta; - u_int32_t flags; - DB *pgset; -{ - DB_MPOOLFILE *mpf; - PAGE *h; - db_pgno_t pgno; - u_int32_t bucket, totpgs; - int ret, val; - - /* - * We don't really need flags, but leave them for consistency with - * __bam_meta2pgset. - */ - COMPQUIET(flags, 0); - - DB_ASSERT(pgset != NULL); - - mpf = dbp->mpf; - totpgs = 0; - - /* - * Loop through all the buckets, pushing onto pgset the corresponding - * page(s) for each one. - */ - for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) { - pgno = BS_TO_PAGE(bucket, hmeta->spares); - - /* - * We know the initial pgno is safe because the spares array has - * been verified. - * - * Safely walk the list of pages in this bucket. - */ - for (;;) { - if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0) - return (ret); - if (TYPE(h) == P_HASH) { - - /* - * Make sure we don't go past the end of - * pgset. - */ - if (++totpgs > vdp->last_pgno) { - (void)mpf->put(mpf, h, 0); - return (DB_VERIFY_BAD); - } - if ((ret = - __db_vrfy_pgset_inc(pgset, pgno)) != 0) { - (void)mpf->put(mpf, h, 0); - return (ret); - } - - pgno = NEXT_PGNO(h); - } else - pgno = PGNO_INVALID; - - if ((ret = mpf->put(mpf, h, 0)) != 0) - return (ret); - - /* If the new pgno is wonky, go onto the next bucket. */ - if (!IS_VALID_PGNO(pgno) || - pgno == PGNO_INVALID) - break; - - /* - * If we've touched this page before, we have a cycle; - * go on to the next bucket. - */ - if ((ret = __db_vrfy_pgset_get(pgset, pgno, &val)) != 0) - return (ret); - if (val != 0) - break; - } - } - return (0); -} - -/* - * __ham_dups_unsorted -- - * Takes a known-safe hash duplicate set and its total length. - * Returns 1 if there are out-of-order duplicates in this set, - * 0 if there are not. - */ -static int -__ham_dups_unsorted(dbp, buf, len) - DB *dbp; - u_int8_t *buf; - u_int32_t len; -{ - DBT a, b; - db_indx_t offset, dlen; - int (*func) __P((DB *, const DBT *, const DBT *)); - - memset(&a, 0, sizeof(DBT)); - memset(&b, 0, sizeof(DBT)); - - func = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare; - - /* - * Loop through the dup set until we hit the end or we find - * a pair of dups that's out of order. b is always the current - * dup, a the one before it. - */ - for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) { - memcpy(&dlen, buf + offset, sizeof(db_indx_t)); - b.data = buf + offset + sizeof(db_indx_t); - b.size = dlen; - - if (a.data != NULL && func(dbp, &a, &b) > 0) - return (1); - - a.data = b.data; - a.size = b.size; - } - - return (0); -} |