summaryrefslogtreecommitdiff
path: root/src/btree/bt_compare.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/btree/bt_compare.c')
-rw-r--r--src/btree/bt_compare.c105
1 files changed, 85 insertions, 20 deletions
diff --git a/src/btree/bt_compare.c b/src/btree/bt_compare.c
index 5c009071..8923c5fa 100644
--- a/src/btree/bt_compare.c
+++ b/src/btree/bt_compare.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
@@ -49,27 +49,39 @@
/*
* __bam_cmp --
- * Compare a key to a given record.
+ * Compare a key to a given record. We always start the comparison
+ * at an offset and update the offset with longest matching count
+ * after the comparison.
*
* PUBLIC: int __bam_cmp __P((DBC *, const DBT *, PAGE *, u_int32_t,
- * PUBLIC: int (*)(DB *, const DBT *, const DBT *), int *));
+ * PUBLIC: int (*)(DB *, const DBT *, const DBT *, size_t *),
+ * PUBLIC: int *, size_t *));
*/
int
-__bam_cmp(dbc, dbt, h, indx, func, cmpp)
+__bam_cmp(dbc, dbt, h, indx, func, cmpp, locp)
DBC *dbc;
const DBT *dbt;
PAGE *h;
u_int32_t indx;
- int (*func)__P((DB *, const DBT *, const DBT *));
+ int (*func)__P((DB *, const DBT *, const DBT *, size_t *));
int *cmpp;
+ size_t *locp;
{
+ BBLOB bl;
BINTERNAL *bi;
BKEYDATA *bk;
BOVERFLOW *bo;
DB *dbp;
DBT pg_dbt;
+ off_t blob_size;
+ int ret;
+ db_seq_t blob_id;
dbp = dbc->dbp;
+ ret = 0;
+
+ /* Assert that the func is non-Null. */
+ DB_ASSERT(dbp->env, func != NULL);
/*
* Returns:
@@ -91,11 +103,49 @@ __bam_cmp(dbc, dbt, h, indx, func, cmpp)
bk = GET_BKEYDATA(dbp, h, indx);
if (B_TYPE(bk->type) == B_OVERFLOW)
bo = (BOVERFLOW *)bk;
- else {
+ else if (B_TYPE(bk->type) == B_BLOB) {
+ /*
+ * This is very slow, but since blobs cannot be
+ * in databases with duplicates or be keys, it should
+ * only happen when using DB_GET_BOTH or DB_SET.
+ */
+ memcpy(&bl, bk, BBLOB_SIZE);
+ memset(&pg_dbt, 0, sizeof(DBT));
+ GET_BLOB_SIZE(dbc->env, bl, blob_size, ret);
+ if (ret != 0)
+ return (ret);
+ if (blob_size > UINT32_MAX)
+ pg_dbt.size = UINT32_MAX;
+ else
+ pg_dbt.size = (u_int32_t)blob_size;
+ blob_id = (db_seq_t)bl.id;
+ pg_dbt.flags = DB_DBT_USERMEM;
+ if ((ret = __os_malloc(
+ dbc->env, pg_dbt.size, &pg_dbt.data)) != 0)
+ return (ret);
+ pg_dbt.ulen = pg_dbt.size;
+ if ((ret = __blob_get(dbc,
+ &pg_dbt, blob_id, blob_size, NULL, NULL)) != 0) {
+ __os_free(dbc->env, pg_dbt.data);
+ return (ret);
+ }
+ *cmpp = func(dbp, dbt, &pg_dbt, locp);
+ /*
+ * There is no way to directly compare a blob file that
+ * is greater in size than UINT32_MAX, so instead we
+ * compare the data up to UINT32_MAX, and if they are
+ * equal return that the blob is larger, since it is
+ * longer than the input data.
+ */
+ if (*cmpp == 0 && (blob_size > UINT32_MAX))
+ *cmpp = -1;
+ __os_free(dbc->env, pg_dbt.data);
+ return (0);
+ } else {
pg_dbt.app_data = NULL;
pg_dbt.data = bk->data;
pg_dbt.size = bk->len;
- *cmpp = func(dbp, dbt, &pg_dbt);
+ *cmpp = func(dbp, dbt, &pg_dbt, locp);
return (0);
}
break;
@@ -123,13 +173,14 @@ __bam_cmp(dbc, dbt, h, indx, func, cmpp)
}
bi = GET_BINTERNAL(dbp, h, indx);
- if (B_TYPE(bi->type) == B_OVERFLOW)
+ if (B_TYPE(bi->type) == B_OVERFLOW) {
+ DB_ASSERT(dbp->env, bi->len == BOVERFLOW_SIZE);
bo = (BOVERFLOW *)(bi->data);
- else {
+ } else {
pg_dbt.app_data = NULL;
pg_dbt.data = bi->data;
pg_dbt.size = bi->len;
- *cmpp = func(dbp, dbt, &pg_dbt);
+ *cmpp = func(dbp, dbt, &pg_dbt, locp);
return (0);
}
break;
@@ -141,42 +192,56 @@ __bam_cmp(dbc, dbt, h, indx, func, cmpp)
* Overflow.
*/
return (__db_moff(dbc, dbt, bo->pgno, bo->tlen,
- func == __bam_defcmp ? NULL : func, cmpp));
+ func == __bam_defcmp ? NULL : func, cmpp, locp));
}
/*
* __bam_defcmp --
- * Default comparison routine.
+ * Keep track of how far along in the two keys we find matching
+ * characters, and use that as an offset into the keys to begin
+ * future comparisons. This will save us the overhead of always
+ * starting the comparisons on the first character.
*
- * PUBLIC: int __bam_defcmp __P((DB *, const DBT *, const DBT *));
+ * PUBLIC: int __bam_defcmp __P((DB *, const DBT *, const DBT *, size_t *));
*/
int
-__bam_defcmp(dbp, a, b)
+__bam_defcmp(dbp, a, b, locp)
DB *dbp;
const DBT *a, *b;
+ size_t *locp;
{
- size_t len;
+ size_t len, i, start;
u_int8_t *p1, *p2;
COMPQUIET(dbp, NULL);
-
+ start = (locp == NULL ? 0 : *locp);
/*
* Returns:
* < 0 if a is < b
* = 0 if a is = b
* > 0 if a is > b
*
+ * We start the comparison from 'locp' and store the last match
+ * location in 'locp'.
+ *
* XXX
* If a size_t doesn't fit into a long, or if the difference between
* any two characters doesn't fit into an int, this routine can lose.
* What we need is a signed integral type that's guaranteed to be at
* least as large as a size_t, and there is no such thing.
*/
+ p1 = (u_int8_t *)a->data + start;
+ p2 = (u_int8_t *)b->data + start;
len = a->size > b->size ? b->size : a->size;
- for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2)
- if (*p1 != *p2)
- return ((long)*p1 - (long)*p2);
- return ((long)a->size - (long)b->size);
+ for (i = start; i < len; ++p1, ++p2, ++i)
+ if (*p1 != *p2) {
+ if (locp != NULL)
+ *locp = i;
+ return (*p1 < *p2 ? -1 : 1);
+ }
+ if (locp != NULL)
+ *locp = len;
+ return (a->size == b->size ? 0 : (a->size < b->size ? -1 : 1));
}
/*