diff options
Diffstat (limited to 'src/btree/bt_compare.c')
| -rw-r--r-- | src/btree/bt_compare.c | 105 |
1 files changed, 85 insertions, 20 deletions
diff --git a/src/btree/bt_compare.c b/src/btree/bt_compare.c index 5c009071..8923c5fa 100644 --- a/src/btree/bt_compare.c +++ b/src/btree/bt_compare.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2015 Oracle and/or its affiliates. All rights reserved. */ /* * Copyright (c) 1990, 1993, 1994, 1995, 1996 @@ -49,27 +49,39 @@ /* * __bam_cmp -- - * Compare a key to a given record. + * Compare a key to a given record. We always start the comparison + * at an offset and update the offset with longest matching count + * after the comparison. * * PUBLIC: int __bam_cmp __P((DBC *, const DBT *, PAGE *, u_int32_t, - * PUBLIC: int (*)(DB *, const DBT *, const DBT *), int *)); + * PUBLIC: int (*)(DB *, const DBT *, const DBT *, size_t *), + * PUBLIC: int *, size_t *)); */ int -__bam_cmp(dbc, dbt, h, indx, func, cmpp) +__bam_cmp(dbc, dbt, h, indx, func, cmpp, locp) DBC *dbc; const DBT *dbt; PAGE *h; u_int32_t indx; - int (*func)__P((DB *, const DBT *, const DBT *)); + int (*func)__P((DB *, const DBT *, const DBT *, size_t *)); int *cmpp; + size_t *locp; { + BBLOB bl; BINTERNAL *bi; BKEYDATA *bk; BOVERFLOW *bo; DB *dbp; DBT pg_dbt; + off_t blob_size; + int ret; + db_seq_t blob_id; dbp = dbc->dbp; + ret = 0; + + /* Assert that the func is non-Null. */ + DB_ASSERT(dbp->env, func != NULL); /* * Returns: @@ -91,11 +103,49 @@ __bam_cmp(dbc, dbt, h, indx, func, cmpp) bk = GET_BKEYDATA(dbp, h, indx); if (B_TYPE(bk->type) == B_OVERFLOW) bo = (BOVERFLOW *)bk; - else { + else if (B_TYPE(bk->type) == B_BLOB) { + /* + * This is very slow, but since blobs cannot be + * in databases with duplicates or be keys, it should + * only happen when using DB_GET_BOTH or DB_SET. + */ + memcpy(&bl, bk, BBLOB_SIZE); + memset(&pg_dbt, 0, sizeof(DBT)); + GET_BLOB_SIZE(dbc->env, bl, blob_size, ret); + if (ret != 0) + return (ret); + if (blob_size > UINT32_MAX) + pg_dbt.size = UINT32_MAX; + else + pg_dbt.size = (u_int32_t)blob_size; + blob_id = (db_seq_t)bl.id; + pg_dbt.flags = DB_DBT_USERMEM; + if ((ret = __os_malloc( + dbc->env, pg_dbt.size, &pg_dbt.data)) != 0) + return (ret); + pg_dbt.ulen = pg_dbt.size; + if ((ret = __blob_get(dbc, + &pg_dbt, blob_id, blob_size, NULL, NULL)) != 0) { + __os_free(dbc->env, pg_dbt.data); + return (ret); + } + *cmpp = func(dbp, dbt, &pg_dbt, locp); + /* + * There is no way to directly compare a blob file that + * is greater in size than UINT32_MAX, so instead we + * compare the data up to UINT32_MAX, and if they are + * equal return that the blob is larger, since it is + * longer than the input data. + */ + if (*cmpp == 0 && (blob_size > UINT32_MAX)) + *cmpp = -1; + __os_free(dbc->env, pg_dbt.data); + return (0); + } else { pg_dbt.app_data = NULL; pg_dbt.data = bk->data; pg_dbt.size = bk->len; - *cmpp = func(dbp, dbt, &pg_dbt); + *cmpp = func(dbp, dbt, &pg_dbt, locp); return (0); } break; @@ -123,13 +173,14 @@ __bam_cmp(dbc, dbt, h, indx, func, cmpp) } bi = GET_BINTERNAL(dbp, h, indx); - if (B_TYPE(bi->type) == B_OVERFLOW) + if (B_TYPE(bi->type) == B_OVERFLOW) { + DB_ASSERT(dbp->env, bi->len == BOVERFLOW_SIZE); bo = (BOVERFLOW *)(bi->data); - else { + } else { pg_dbt.app_data = NULL; pg_dbt.data = bi->data; pg_dbt.size = bi->len; - *cmpp = func(dbp, dbt, &pg_dbt); + *cmpp = func(dbp, dbt, &pg_dbt, locp); return (0); } break; @@ -141,42 +192,56 @@ __bam_cmp(dbc, dbt, h, indx, func, cmpp) * Overflow. */ return (__db_moff(dbc, dbt, bo->pgno, bo->tlen, - func == __bam_defcmp ? NULL : func, cmpp)); + func == __bam_defcmp ? NULL : func, cmpp, locp)); } /* * __bam_defcmp -- - * Default comparison routine. + * Keep track of how far along in the two keys we find matching + * characters, and use that as an offset into the keys to begin + * future comparisons. This will save us the overhead of always + * starting the comparisons on the first character. * - * PUBLIC: int __bam_defcmp __P((DB *, const DBT *, const DBT *)); + * PUBLIC: int __bam_defcmp __P((DB *, const DBT *, const DBT *, size_t *)); */ int -__bam_defcmp(dbp, a, b) +__bam_defcmp(dbp, a, b, locp) DB *dbp; const DBT *a, *b; + size_t *locp; { - size_t len; + size_t len, i, start; u_int8_t *p1, *p2; COMPQUIET(dbp, NULL); - + start = (locp == NULL ? 0 : *locp); /* * Returns: * < 0 if a is < b * = 0 if a is = b * > 0 if a is > b * + * We start the comparison from 'locp' and store the last match + * location in 'locp'. + * * XXX * If a size_t doesn't fit into a long, or if the difference between * any two characters doesn't fit into an int, this routine can lose. * What we need is a signed integral type that's guaranteed to be at * least as large as a size_t, and there is no such thing. */ + p1 = (u_int8_t *)a->data + start; + p2 = (u_int8_t *)b->data + start; len = a->size > b->size ? b->size : a->size; - for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2) - if (*p1 != *p2) - return ((long)*p1 - (long)*p2); - return ((long)a->size - (long)b->size); + for (i = start; i < len; ++p1, ++p2, ++i) + if (*p1 != *p2) { + if (locp != NULL) + *locp = i; + return (*p1 < *p2 ? -1 : 1); + } + if (locp != NULL) + *locp = len; + return (a->size == b->size ? 0 : (a->size < b->size ? -1 : 1)); } /* |
