diff options
| author | Lorry Tar Creator <lorry-tar-importer@baserock.org> | 2015-02-17 17:25:57 +0000 |
|---|---|---|
| committer | <> | 2015-03-17 16:26:24 +0000 |
| commit | 780b92ada9afcf1d58085a83a0b9e6bc982203d1 (patch) | |
| tree | 598f8b9fa431b228d29897e798de4ac0c1d3d970 /src/btree/bt_split.c | |
| parent | 7a2660ba9cc2dc03a69ddfcfd95369395cc87444 (diff) | |
| download | berkeleydb-master.tar.gz | |
Diffstat (limited to 'src/btree/bt_split.c')
| -rw-r--r-- | src/btree/bt_split.c | 118 |
1 files changed, 86 insertions, 32 deletions
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 8299c69a..f7719dc4 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. */ /* * Copyright (c) 1990, 1993, 1994, 1995, 1996 @@ -63,7 +63,7 @@ __bam_split(dbc, arg, root_pgnop) db_pgno_t *root_pgnop; { BTREE_CURSOR *cp; - DB_LOCK metalock, next_lock; + DB_LOCK meta_lock, next_lock; enum { UP, DOWN } dir; db_pgno_t pgno, next_pgno, root_pgno; int exact, level, ret; @@ -72,17 +72,16 @@ __bam_split(dbc, arg, root_pgnop) LOCK_CHECK_OFF(dbc->thread_info); cp = (BTREE_CURSOR *)dbc->internal; + LOCK_INIT(meta_lock); LOCK_INIT(next_lock); next_pgno = PGNO_INVALID; /* - * First get a lock on the metadata page, we will have to allocate + * First get a lock on the metadata page; we will have to allocate * pages and cannot get a lock while we have the search tree pinned. */ - pgno = PGNO_BASE_MD; - if ((ret = __db_lget(dbc, - 0, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, &meta_lock)) != 0) goto err; root_pgno = BAM_ROOT_PGNO(dbc); @@ -189,7 +188,7 @@ no_split: /* Once we've split the leaf page, we're done. */ if (root_pgnop != NULL) *root_pgnop = BAM_ROOT_PGNO(dbc); err: -done: (void)__LPUT(dbc, metalock); +done: (void)__LPUT(dbc, meta_lock); (void)__TLPUT(dbc, next_lock); if (F_ISSET(dbc, DBC_OPD)) @@ -685,6 +684,7 @@ __bam_broot(dbc, rootp, split, lp, rp) DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data)); DB_SET_DBT(data, &bo, BOVERFLOW_SIZE); break; + case B_BLOB: case B_DUPLICATE: default: goto pgfmt; @@ -772,7 +772,30 @@ __ram_root(dbc, rootp, lp, rp) /* * __bam_pinsert -- - * Insert a new key into a parent page, completing the split. + * + * Construct a internal index item and place it in the parent page. It is + * primarily used by __bam_page() to add a new page into the tree. The sole + * other use is by __bam_pupdate() after a reverse split or compact has + * removed pages underneath it, in order to replace the parent's key/nrecs + * to match the new subtree. + * + * Parameters: + * parent - the page from the cursor stack to be modifed. The next entry + * in the stack (i.e., the next lower level in the tree) contains + * the key of the new item. The indx field must have been set + * when searching down the tree, to point to the new/replaced + * parent item. + * split - the indx in the cursor stack of the 'source' of the new item. + * lchild - the left child page is used *only* when attempting to use + * prefix key compression on a leaf (data) page. + * rchild - right child page. The source of the pgno of the new item. + * flags - BPI_REPLACE | BPI_NORENCUM + * BPI_NOLOGGING + * + * The pgno of the item always comes from rchild, which often is the same + * as parent[1].page. The key for DB_BTREE comes from the next lower page + * in the stack under parent, not from either lchild or rchild parameter -- + * though often rchild is a copy of parent[1].page. * * PUBLIC: int __bam_pinsert * PUBLIC: __P((DBC *, EPG *, u_int32_t, PAGE *, PAGE *, int)); @@ -867,12 +890,27 @@ __bam_pinsert(dbc, parent, split, lchild, rchild, flags) size = BINTERNAL_SIZE(child_bi->len); break; case B_OVERFLOW: - /* Reuse the overflow key. */ + /* Copy the overflow key. */ child_bo = (BOVERFLOW *)child_bi->data; memset(&bo, 0, sizeof(bo)); bo.type = B_OVERFLOW; bo.tlen = child_bo->tlen; - bo.pgno = child_bo->pgno; + if (LF_ISSET(BPI_REPLACE)) { + /* + * Replace (compact or reverse split) needs to + * copy in case the data item gets removed. + */ + memset(&hdr, 0, sizeof(hdr)); + if ((ret = __db_goff(dbc, &hdr, + child_bo->tlen, child_bo->pgno, + &hdr.data, &hdr.size)) == 0) + ret = __db_poff(dbc, &hdr, &bo.pgno); + if (hdr.data != NULL) + __os_free(dbp->env, hdr.data); + if (ret != 0) + return (ret); + } else + bo.pgno = child_bo->pgno; bi.len = BOVERFLOW_SIZE; B_TSET(bi.type, B_OVERFLOW); bi.pgno = rchild->pgno; @@ -881,6 +919,7 @@ __bam_pinsert(dbc, parent, split, lchild, rchild, flags) DB_SET_DBT(data, &bo, BOVERFLOW_SIZE); size = BINTERNAL_SIZE(BOVERFLOW_SIZE); break; + case B_BLOB: case B_DUPLICATE: default: goto pgfmt; @@ -982,8 +1021,8 @@ noprefix: if (P_FREESPACE(dbp, ppage) + oldsize < nbytes) DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data)); DB_SET_DBT(data, &bo, BOVERFLOW_SIZE); size = BINTERNAL_SIZE(BOVERFLOW_SIZE); - break; + case B_BLOB: case B_DUPLICATE: default: goto pgfmt; @@ -1153,23 +1192,32 @@ __bam_psplit(dbc, cp, lp, rp, splitret) nbytes += BINTERNAL_SIZE(BOVERFLOW_SIZE); break; case P_LBTREE: - if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) == - B_KEYDATA) - nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp, - pp, off)->len); - else + switch (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type)) { + case B_KEYDATA: + nbytes += BKEYDATA_SIZE( + GET_BKEYDATA(dbp, pp, off)->len); + break; + case B_BLOB: + nbytes += BBLOB_SIZE; + break; + default: nbytes += BOVERFLOW_SIZE; - + } ++off; /* FALLTHROUGH */ case P_LDUP: case P_LRECNO: - if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) == - B_KEYDATA) - nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp, - pp, off)->len); - else + switch (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type)) { + case B_KEYDATA: + nbytes += BKEYDATA_SIZE( + GET_BKEYDATA(dbp, pp, off)->len); + break; + case B_BLOB: + nbytes += BBLOB_SIZE; + break; + default: nbytes += BOVERFLOW_SIZE; + } break; case P_IRECNO: nbytes += RINTERNAL_SIZE; @@ -1269,7 +1317,7 @@ __bam_copy(dbp, pp, cp, nxt, stop) PAGE *pp, *cp; u_int32_t nxt, stop; { - BINTERNAL internal; + BINTERNAL *bi, internal; db_indx_t *cinp, nbytes, off, *pinp; cinp = P_INP(dbp, cp); @@ -1302,12 +1350,17 @@ __bam_copy(dbp, pp, cp, nxt, stop) /* FALLTHROUGH */ case P_LDUP: case P_LRECNO: - if (B_TYPE(GET_BKEYDATA(dbp, pp, nxt)->type) == - B_KEYDATA) - nbytes = BKEYDATA_SIZE(GET_BKEYDATA(dbp, - pp, nxt)->len); - else + switch (B_TYPE(GET_BKEYDATA(dbp, pp, nxt)->type)) { + case B_KEYDATA: + nbytes = BKEYDATA_SIZE( + GET_BKEYDATA(dbp, pp, nxt)->len); + break; + case B_BLOB: + nbytes = BBLOB_SIZE; + break; + default: nbytes = BOVERFLOW_SIZE; + } break; case P_IRECNO: nbytes = RINTERNAL_SIZE; @@ -1316,17 +1369,18 @@ __bam_copy(dbp, pp, cp, nxt, stop) return (__db_pgfmt(dbp->env, pp->pgno)); } cinp[off] = HOFFSET(cp) -= nbytes; + /* Minimize the first key on an IBTREE page; it isn't valid. */ + bi = GET_BINTERNAL(dbp, pp, nxt); if (off == 0 && nxt != 0 && TYPE(pp) == P_IBTREE) { internal.len = 0; UMRW_SET(internal.unused); internal.type = B_KEYDATA; - internal.pgno = GET_BINTERNAL(dbp, pp, nxt)->pgno; - internal.nrecs = GET_BINTERNAL(dbp, pp, nxt)->nrecs; + internal.pgno = bi->pgno; + internal.nrecs = bi->nrecs; memcpy(P_ENTRY(dbp, cp, off), &internal, nbytes); } else - memcpy(P_ENTRY(dbp, cp, off), - P_ENTRY(dbp, pp, nxt), nbytes); + memcpy(P_ENTRY(dbp, cp, off), bi, nbytes); } return (0); } |
