diff options
Diffstat (limited to 'src/db/db_cam.c')
| -rw-r--r-- | src/db/db_cam.c | 367 |
1 files changed, 360 insertions, 7 deletions
diff --git a/src/db/db_cam.c b/src/db/db_cam.c index 6ee8b579..1a330bdb 100644 --- a/src/db/db_cam.c +++ b/src/db/db_cam.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2000, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -11,6 +11,7 @@ #include "db_int.h" #include "dbinc/db_page.h" #include "dbinc/btree.h" +#include "dbinc/fop.h" #include "dbinc/hash.h" #include "dbinc/heap.h" #include "dbinc/lock.h" @@ -83,6 +84,9 @@ __dbc_close(dbc) DB *dbp; DBC *opd; DBC_INTERNAL *cp; +#ifdef DIAGNOSTIC + DB_THREAD_INFO *ip; +#endif DB_TXN *txn; ENV *env; int ret, t_ret; @@ -149,6 +153,14 @@ __dbc_close(dbc) ret = t_ret; F_CLR(dbc, DBC_FAMILY); } +#ifdef DIAGNOSTIC + if (dbc->locker != NULL) { + ENV_GET_THREAD_INFO(env, ip); + if (ip != NULL) + ip->dbth_locker = dbc->locker->prev_locker; + dbc->locker->prev_locker = INVALID_ROFF; + } +#endif if ((txn = dbc->txn) != NULL) txn->cursors--; @@ -510,6 +522,305 @@ __dbc_idel(dbc, flags) return (ret); } +/* + * __dbc_db_stream -- + * + * DBC->db_stream + * + * PUBLIC: int __dbc_db_stream __P((DBC *, DB_STREAM **, u_int32_t)); + */ +int +__dbc_db_stream(dbc, dbsp, flags) + DBC *dbc; + DB_STREAM **dbsp; + u_int32_t flags; +{ + ENV *env; + int ret; + u_int32_t oflags; + + env = dbc->env; + oflags = flags; + + if ((ret = __db_fchk( + env, "DBC->db_stream", flags, + DB_STREAM_READ | DB_STREAM_WRITE | DB_STREAM_SYNC_WRITE)) != 0) + return (ret); + + if (DB_IS_READONLY(dbc->dbp)) { + LF_SET(DB_STREAM_READ); + oflags |= DB_STREAM_READ; + } + if (LF_ISSET(DB_STREAM_READ) && LF_ISSET(DB_STREAM_WRITE)) { + ret = EINVAL; + __db_errx(env, DB_STR("0750", + "Error, cannot set both DB_STREAM_WRITE and DB_STREAM_READ.")); + goto err; + } + + if (oflags & DB_STREAM_READ) + LF_SET(DB_FOP_READONLY); + else + LF_SET(DB_FOP_WRITE); + if (oflags & DB_STREAM_SYNC_WRITE) + LF_SET(DB_FOP_SYNC_WRITE); + + ret = __db_stream_init(dbc, dbsp, flags); + +err: return (ret); +} + +/* + * __dbc_get_blob_id -- + * + * Returns the blob id stored in the data record to which the cursor currently + * points. Returns EINVAL if the cursor does not point to a blob record. + * + * PUBLIC: int __dbc_get_blob_id __P((DBC *, db_seq_t *)); + */ +int +__dbc_get_blob_id(dbc, blob_id) + DBC *dbc; + db_seq_t *blob_id; +{ + DBT key, data; + BBLOB bl; + HBLOB hbl; + HEAPBLOBHDR bhdr; + int ret; + + if (dbc->dbtype != DB_BTREE && + dbc->dbtype != DB_HEAP && dbc->dbtype != DB_HASH) { + return (EINVAL); + } + + ret = 0; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + /* Get the blob database record instead of the blob. */ + data.flags |= DB_DBT_BLOB_REC; + + /* + * It would be great if there was a more efficient way to do this, but + * the complexities of getting a page from a database, especially + * when taking into account things like partitions and compression, + * make that more trouble than it is worth. + */ + if ((ret = __dbc_get(dbc, &key, &data, DB_CURRENT)) != 0) + goto err; + + switch (dbc->dbtype) { + case DB_BTREE: + if (data.size != BBLOB_SIZE) { + ret = EINVAL; + goto err; + } + memcpy(&bl, data.data, BBLOB_SIZE); + if (B_TYPE(bl.type) != B_BLOB) { + ret = EINVAL; + goto err; + } + *blob_id = (db_seq_t)bl.id; + break; + case DB_HEAP: + if (data.size != HEAPBLOBREC_SIZE) { + ret = EINVAL; + goto err; + } + memcpy(&bhdr, data.data, HEAPBLOBREC_SIZE); + if (!F_ISSET(&bhdr.std_hdr, HEAP_RECBLOB)) { + ret = EINVAL; + goto err; + } + *blob_id = (db_seq_t)bhdr.id; + break; + case DB_HASH: + if (data.size != HBLOB_SIZE) { + ret = EINVAL; + goto err; + } + memcpy(&hbl, data.data, HBLOB_SIZE); + if (HPAGE_PTYPE(&hbl) != H_BLOB) { + ret = EINVAL; + goto err; + } + *blob_id = (db_seq_t)hbl.id; + break; + default: + ret = EINVAL; + goto err; + } + +err: return (ret); +} + +/* + * __dbc_get_blob_size -- + * + * Returns the blob file size stored in the data record to which the cursor + * currently points. Returns EINVAL if the cursor does not point to a blob + * record. + * + * PUBLIC: int __dbc_get_blob_size __P((DBC *, off_t *)); + */ +int +__dbc_get_blob_size(dbc, size) + DBC *dbc; + off_t *size; +{ + DBT key, data; + ENV *env; + BBLOB bl; + HBLOB hbl; + HEAPBLOBHDR bhdr; + int ret; + + if (dbc->dbtype != DB_BTREE && + dbc->dbtype != DB_HEAP && dbc->dbtype != DB_HASH) { + return (EINVAL); + } + + env = dbc->env; + ret = 0; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + /* Get the blob database record instead of the blob. */ + data.flags |= DB_DBT_BLOB_REC; + + /* + * It would be great if there was a more efficient way to do this, but + * the complexities of getting a page from a database, especially + * when taking into account things like partitions and compression, + * make that more trouble than it is worth. + */ + if ((ret = __dbc_get(dbc, &key, &data, DB_CURRENT)) != 0) + goto err; + + switch (dbc->dbtype) { + case DB_BTREE: + if (data.size != BBLOB_SIZE) { + ret = EINVAL; + goto err; + } + memcpy(&bl, data.data, BBLOB_SIZE); + if (B_TYPE(bl.type) != B_BLOB) { + ret = EINVAL; + goto err; + } + GET_BLOB_SIZE(env, bl, *size, ret); + break; + case DB_HEAP: + if (data.size != HEAPBLOBREC_SIZE) { + ret = EINVAL; + goto err; + } + memcpy(&bhdr, data.data, HEAPBLOBREC_SIZE); + if (!F_ISSET(&bhdr.std_hdr, HEAP_RECBLOB)) { + ret = EINVAL; + goto err; + } + GET_BLOB_SIZE(env, bhdr, *size, ret); + break; + case DB_HASH: + if (data.size != HBLOB_SIZE) { + ret = EINVAL; + goto err; + } + memcpy(&hbl, data.data, HBLOB_SIZE); + if (HPAGE_PTYPE(&hbl) != H_BLOB) { + ret = EINVAL; + goto err; + } + GET_BLOB_SIZE(env, hbl, *size, ret); + break; + default: + ret = EINVAL; + goto err; + } + +err: return (ret); +} + +/* + * __dbc_set_blob_size -- + * + * Sets the blob file size in the data record to which the cursor + * currently points. Returns EINVAL if the cursor does not point to a blob + * record. + * + * PUBLIC: int __dbc_set_blob_size __P((DBC *, off_t)); + */ +int +__dbc_set_blob_size(dbc, size) + DBC *dbc; + off_t size; +{ + DBT key, data; + BBLOB *bl; + HBLOB *hbl; + HEAPBLOBHDR *bhdr; + int ret; + + if (dbc->dbtype != DB_BTREE && + dbc->dbtype != DB_HEAP && dbc->dbtype != DB_HASH) { + return (EINVAL); + } + + ret = 0; + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + /* Get the blob database record instead of the blob. */ + data.flags |= DB_DBT_BLOB_REC; + + /* + * It would be great if there was a more efficient way to do this, but + * the complexities of getting a page from a database, especially + * when taking into account things like partitions and compression, + * make that more trouble than it is worth. + */ + if ((ret = __dbc_get(dbc, &key, &data, DB_CURRENT)) != 0) + goto err; + + switch (dbc->dbtype) { + case DB_BTREE: + bl = (BBLOB *)data.data; + if (bl == NULL || + B_TYPE(bl->type) != B_BLOB || data.size != BBLOB_SIZE) { + ret = EINVAL; + goto err; + } + SET_BLOB_SIZE(bl, size, BBLOB); + break; + case DB_HEAP: + bhdr = (HEAPBLOBHDR *)data.data; + if (bhdr == NULL || + !F_ISSET(&bhdr->std_hdr, HEAP_RECBLOB) || + data.size != HEAPBLOBREC_SIZE) { + ret = EINVAL; + goto err; + } + SET_BLOB_SIZE(bhdr, size, HEAPBLOBHDR); + break; + case DB_HASH: + hbl = data.data; + if (hbl == NULL || + HPAGE_PTYPE(hbl) != H_BLOB || data.size != HBLOB_SIZE) { + ret = EINVAL; + goto err; + } + SET_BLOB_SIZE((HBLOB *)hbl, size, HBLOB); + break; + default: + ret = EINVAL; + goto err; + } + + if ((ret = __dbc_put(dbc, &key, &data, DB_CURRENT)) != 0) + goto err; + +err: return (ret); +} + #ifdef HAVE_COMPRESSION /* * __dbc_bulk_del -- @@ -632,6 +943,12 @@ __dbc_idup(dbc_orig, dbcp, flags) int_n->stream_off = int_orig->stream_off; int_n->stream_curr_pgno = int_orig->stream_curr_pgno; +#ifdef HAVE_PARTITION + if (DB_IS_PARTITIONED(dbp)) { + if ((ret = __partc_dup(dbc_orig, dbc_n)) != 0) + goto err; + } else +#endif switch (dbc_orig->dbtype) { case DB_QUEUE: if ((ret = __qamc_dup(dbc_orig, dbc_n)) != 0) @@ -859,7 +1176,11 @@ __dbc_iget(dbc, key, data, flags) * we acquire a write lock in the primary tree and no locks in the * off-page dup tree. If the DB_RMW flag was specified and the get * operation is done in an off-page duplicate tree, call the primary - * cursor's upgrade routine first. + * cursor's upgrade routine first. We fetch the primary tree's data + * page to follow the buffer latching order rules for btrees: latch from + * the top of the main tree down, even when also searching OPD trees. + * Deadlocks could otherwise occur if we need to fetch the main page + * while an OPD page is latched. [#22532] */ cp = dbc->internal; if (cp->opd != NULL && @@ -868,6 +1189,10 @@ __dbc_iget(dbc, key, data, flags) flags == DB_PREV || flags == DB_PREV_DUP)) { if (tmp_rmw && (ret = dbc->am_writelock(dbc)) != 0) goto err; + if (cp->page == NULL && (ret = __memp_fget(mpf, &cp->pgno, + dbc->thread_info, dbc->txn, 0, &cp->page)) != 0) + goto err; + if (F_ISSET(dbc, DBC_TRANSIENT)) opd = cp->opd; else if ((ret = __dbc_idup(cp->opd, &opd, DB_POSITION)) != 0) @@ -1660,7 +1985,7 @@ __dbc_put_secondaries(dbc, tskeyp, &oldpkey, rmw | DB_SET); if (ret == 0) { cmp = __bam_defcmp(sdbp, - &oldpkey, pkey); + &oldpkey, pkey, NULL); __os_ufree(env, oldpkey.data); /* * If the secondary key is unchanged, @@ -1868,7 +2193,7 @@ __dbc_put_primary(dbc, key, data, flags) olddata.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM; ret = __dbc_get(dbc, key, &olddata, DB_SET); if (ret == 0) { - ret = DB_KEYEXIST; + ret = DBC_ERR(dbc, DB_KEYEXIST); goto done; } else if (ret != DB_NOTFOUND && ret != DB_KEYEMPTY) goto err; @@ -2100,7 +2425,7 @@ __dbc_iput(dbc, key, data, flags) if (dbc->dbtype == DB_HASH && F_ISSET( ((BTREE_CURSOR *)(dbc->internal->opd->internal)), C_DELETED)) { - ret = DB_NOTFOUND; + ret = DBC_ERR(dbc, DB_NOTFOUND); goto err; } @@ -2228,7 +2553,7 @@ __dbc_del_oldskey(sdbp, dbc, skey, pkey, olddata) */ for (i = 0, tskeyp = skey; i < nskey; i++, tskeyp++) if (((BTREE *)sdbp->bt_internal)->bt_compare(sdbp, - toldskeyp, tskeyp) == 0) { + toldskeyp, tskeyp, NULL) == 0) { nsame++; F_CLR(tskeyp, DB_DBT_ISSET); break; @@ -2382,12 +2707,14 @@ __dbc_cleanup(dbc, dbc_n, failed) * cursors. */ if (!failed && ret == 0) { + MUTEX_LOCK(dbp->env, dbp->mutex); if (opd != NULL) opd->internal->pdbc = dbc; if (internal->opd != NULL) internal->opd->internal->pdbc = dbc_n; dbc->internal = dbc_n->internal; dbc_n->internal = internal; + MUTEX_UNLOCK(dbp->env, dbp->mutex); } /* @@ -3501,6 +3828,32 @@ __db_check_skeyset(sdbp, skeyp) for (key2 = key1 + 1; key2 < last_key; key2++) DB_ASSERT(env, ((BTREE *)sdbp->bt_internal)->bt_compare(sdbp, - key1, key2) != 0); + key1, key2, NULL) != 0); +} +#endif + +#ifdef HAVE_ERROR_HISTORY +/* + * __dbc_diags + * Save the context which triggers the "first notice" of an error code; + * i.e., its creation. It doesn't touch anything when err == 0. + * + * PUBLIC: int __dbc_diags __P((DBC *, int)); + */ + int + __dbc_diags(dbc, err) + DBC *dbc; + int err; +{ + DB_MSGBUF *mb; + + if (err != 0 && dbc->env != NULL && + (mb = __db_deferred_get()) != NULL) { + (void)__db_remember_context(dbc->env, mb, err); + __db_msgadd(dbc->env, mb, "DB: %s:%s\n" , + dbc->dbp->fname == NULL ? "in-mem" : dbc->dbp->fname, + dbc->dbp->dname == NULL ? "" : dbc->dbp->fname); + } + return (err); } #endif |
