diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2008-04-14 17:05:34 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2008-04-14 17:05:34 +0000 |
commit | 9b5c8d45f62bd3d243a40cc84deb93893f2f5122 (patch) | |
tree | 2d75607f7bdb96cfa1d73a3a36a4b3328118ff08 /src/backend/access | |
parent | 10be77c173211a75718f50fe6061862f6a0cb4a2 (diff) | |
download | postgresql-9b5c8d45f62bd3d243a40cc84deb93893f2f5122.tar.gz |
Push index operator lossiness determination down to GIST/GIN opclass
"consistent" functions, and remove pg_amop.opreqcheck, as per recent
discussion. The main immediate benefit of this is that we no longer need
8.3's ugly hack of requiring @@@ rather than @@ to test weight-using tsquery
searches on GIN indexes. In future it should be possible to optimize some
other queries better than is done now, by detecting at runtime whether the
index match is exact or not.
Tom Lane, after an idea of Heikki's, and with some help from Teodor.
Diffstat (limited to 'src/backend/access')
-rw-r--r-- | src/backend/access/gin/ginarrayproc.c | 41 | ||||
-rw-r--r-- | src/backend/access/gin/ginget.c | 67 | ||||
-rw-r--r-- | src/backend/access/gist/gistget.c | 33 | ||||
-rw-r--r-- | src/backend/access/gist/gistproc.c | 21 |
4 files changed, 116 insertions, 46 deletions
diff --git a/src/backend/access/gin/ginarrayproc.c b/src/backend/access/gin/ginarrayproc.c index c453cda525..c85399d632 100644 --- a/src/backend/access/gin/ginarrayproc.c +++ b/src/backend/access/gin/ginarrayproc.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.12 2008/01/01 19:45:46 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.13 2008/04/14 17:05:33 tgl Exp $ *------------------------------------------------------------------------- */ #include "postgres.h" @@ -95,8 +95,9 @@ ginarrayconsistent(PG_FUNCTION_ARGS) bool *check = (bool *) PG_GETARG_POINTER(0); StrategyNumber strategy = PG_GETARG_UINT16(1); ArrayType *query = PG_GETARG_ARRAYTYPE_P(2); - int res, - i, + bool *recheck = (bool *) PG_GETARG_POINTER(3); + bool res; + int i, nentries; /* ARRAYCHECK was already done by previous ginarrayextract call */ @@ -104,25 +105,51 @@ ginarrayconsistent(PG_FUNCTION_ARGS) switch (strategy) { case GinOverlapStrategy: + /* result is not lossy */ + *recheck = false; + /* at least one element in check[] is true, so result = true */ + res = true; + break; case GinContainedStrategy: + /* we will need recheck */ + *recheck = true; /* at least one element in check[] is true, so result = true */ - res = TRUE; + res = true; break; case GinContainsStrategy: + /* result is not lossy */ + *recheck = false; + /* must have all elements in check[] true */ + nentries = ArrayGetNItems(ARR_NDIM(query), ARR_DIMS(query)); + res = true; + for (i = 0; i < nentries; i++) + { + if (!check[i]) + { + res = false; + break; + } + } + break; case GinEqualStrategy: + /* we will need recheck */ + *recheck = true; + /* must have all elements in check[] true */ nentries = ArrayGetNItems(ARR_NDIM(query), ARR_DIMS(query)); - res = TRUE; + res = true; for (i = 0; i < nentries; i++) + { if (!check[i]) { - res = FALSE; + res = false; break; } + } break; default: elog(ERROR, "ginarrayconsistent: unknown strategy number: %d", strategy); - res = FALSE; + res = false; } PG_RETURN_BOOL(res); diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index af38717340..26abaa76af 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.12 2008/04/13 19:18:13 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.13 2008/04/14 17:05:33 tgl Exp $ *------------------------------------------------------------------------- */ @@ -343,10 +343,12 @@ entryGetItem(Relation index, GinScanEntry entry) /* * Sets key->curItem to new found heap item pointer for one scan key - * returns isFinished! + * Returns isFinished, ie TRUE means we did NOT get a new item pointer! + * Also, *keyrecheck is set true if recheck is needed for this scan key. */ static bool -keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, GinScanKey key) +keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, + GinScanKey key, bool *keyrecheck) { uint32 i; GinScanEntry entry; @@ -391,31 +393,36 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, GinScanKey return TRUE; } - if (key->nentries == 1) - { - /* we can do not call consistentFn !! */ - key->entryRes[0] = TRUE; - return FALSE; - } + /* + * if key->nentries == 1 then the consistentFn should always succeed, + * but we must call it anyway to find out the recheck status. + */ /* setting up array for consistentFn */ for (i = 0; i < key->nentries; i++) { entry = key->scanEntry + i; - if (entry->isFinished == FALSE && compareItemPointers(&entry->curItem, &key->curItem) == 0) + if (entry->isFinished == FALSE && + compareItemPointers(&entry->curItem, &key->curItem) == 0) key->entryRes[i] = TRUE; else key->entryRes[i] = FALSE; } + /* + * Initialize *keyrecheck in case the consistentFn doesn't know it + * should set it. The safe assumption in that case is to force + * recheck. + */ + *keyrecheck = true; + oldCtx = MemoryContextSwitchTo(tempCtx); - res = DatumGetBool(FunctionCall3( - &ginstate->consistentFn, + res = DatumGetBool(FunctionCall4(&ginstate->consistentFn, PointerGetDatum(key->entryRes), UInt16GetDatum(key->strategy), - key->query - )); + key->query, + PointerGetDatum(keyrecheck))); MemoryContextSwitchTo(oldCtx); MemoryContextReset(tempCtx); } while (!res); @@ -430,24 +437,32 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, GinScanKey static bool scanGetItem(IndexScanDesc scan, ItemPointerData *item, bool *recheck) { - uint32 i; GinScanOpaque so = (GinScanOpaque) scan->opaque; - - /* XXX for the moment, treat all GIN operators as lossy */ - *recheck = true; + uint32 i; + bool keyrecheck; + + /* + * We return recheck = true if any of the keyGetItem calls return + * keyrecheck = true. Note that because the second loop might advance + * some keys, this could theoretically be too conservative. In practice + * though, we expect that a consistentFn's recheck result will depend + * only on the operator and the query, so for any one key it should + * stay the same regardless of advancing to new items. So it's not + * worth working harder. + */ + *recheck = false; ItemPointerSetMin(item); for (i = 0; i < so->nkeys; i++) { GinScanKey key = so->keys + i; - if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx, key) == FALSE) - { - if (compareItemPointers(item, &key->curItem) < 0) - *item = key->curItem; - } - else + if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx, + key, &keyrecheck)) return FALSE; /* finished one of keys */ + if (compareItemPointers(item, &key->curItem) < 0) + *item = key->curItem; + *recheck |= keyrecheck; } for (i = 1; i <= so->nkeys; i++) @@ -462,8 +477,10 @@ scanGetItem(IndexScanDesc scan, ItemPointerData *item, bool *recheck) break; else if (cmp > 0) { - if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx, key) == TRUE) + if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx, + key, &keyrecheck)) return FALSE; /* finished one of keys */ + *recheck |= keyrecheck; } else { /* returns to begin */ diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index 8e92139582..7a5177218e 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.71 2008/04/13 19:18:14 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.72 2008/04/14 17:05:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -151,7 +151,6 @@ gistnext(IndexScanDesc scan, ScanDirection dir, TIDBitmap *tbm) GISTScanOpaque so; GISTSearchStack *stk; IndexTuple it; - bool recheck; GISTPageOpaque opaque; bool resetoffset = false; int64 ntids = 0; @@ -257,8 +256,6 @@ gistnext(IndexScanDesc scan, ScanDirection dir, TIDBitmap *tbm) for (;;) { n = gistfindnext(scan, n, dir); - /* XXX for the moment, treat all GIST operators as lossy */ - recheck = true; if (!OffsetNumberIsValid(n)) { @@ -304,11 +301,11 @@ gistnext(IndexScanDesc scan, ScanDirection dir, TIDBitmap *tbm) it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); ntids++; if (tbm != NULL) - tbm_add_tuples(tbm, &it->t_tid, 1, recheck); + tbm_add_tuples(tbm, &it->t_tid, 1, scan->xs_recheck); else { scan->xs_ctup.t_self = it->t_tid; - scan->xs_recheck = recheck; + /* scan->xs_recheck is already set */ LockBuffer(so->curbuf, GIST_UNLOCK); return ntids; /* always 1 */ @@ -345,6 +342,10 @@ gistnext(IndexScanDesc scan, ScanDirection dir, TIDBitmap *tbm) /* * gistindex_keytest() -- does this index tuple satisfy the scan key(s)? * + * On success return for a leaf tuple, scan->xs_recheck is set to indicate + * whether recheck is needed. We recheck if any of the consistent() functions + * request it. + * * We must decompress the key in the IndexTuple before passing it to the * sk_func (and we have previously overwritten the sk_func to use the * user-defined Consistent method, so we actually are invoking that). @@ -371,6 +372,8 @@ gistindex_keytest(IndexTuple tuple, IncrIndexProcessed(); + scan->xs_recheck = false; + /* * Tuple doesn't restore after crash recovery because of incomplete insert */ @@ -382,6 +385,7 @@ gistindex_keytest(IndexTuple tuple, Datum datum; bool isNull; Datum test; + bool recheck; GISTENTRY de; datum = index_getattr(tuple, @@ -408,7 +412,6 @@ gistindex_keytest(IndexTuple tuple, } else { - gistdentryinit(giststate, key->sk_attno - 1, &de, datum, r, p, offset, FALSE, isNull); @@ -416,21 +419,28 @@ gistindex_keytest(IndexTuple tuple, /* * Call the Consistent function to evaluate the test. The * arguments are the index datum (as a GISTENTRY*), the comparison - * datum, and the comparison operator's strategy number and - * subtype from pg_amop. + * datum, the comparison operator's strategy number and + * subtype from pg_amop, and the recheck flag. * * (Presently there's no need to pass the subtype since it'll * always be zero, but might as well pass it for possible future * use.) + * + * We initialize the recheck flag to true (the safest assumption) + * in case the Consistent function forgets to set it. */ - test = FunctionCall4(&key->sk_func, + recheck = true; + + test = FunctionCall5(&key->sk_func, PointerGetDatum(&de), key->sk_argument, Int32GetDatum(key->sk_strategy), - ObjectIdGetDatum(key->sk_subtype)); + ObjectIdGetDatum(key->sk_subtype), + PointerGetDatum(&recheck)); if (!DatumGetBool(test)) return false; + scan->xs_recheck |= recheck; } keySize--; @@ -444,6 +454,7 @@ gistindex_keytest(IndexTuple tuple, * Return the offset of the first index entry that is consistent with * the search key after offset 'n' in the current page. If there are * no more consistent entries, return InvalidOffsetNumber. + * On success, scan->xs_recheck is set correctly, too. * Page should be locked.... */ static OffsetNumber diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c index 7653776c36..ec23385f5c 100644 --- a/src/backend/access/gist/gistproc.c +++ b/src/backend/access/gist/gistproc.c @@ -10,7 +10,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.13 2008/01/01 19:45:46 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.14 2008/04/14 17:05:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -86,6 +86,11 @@ gist_box_consistent(PG_FUNCTION_ARGS) GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); BOX *query = PG_GETARG_BOX_P(1); StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + /* Oid subtype = PG_GETARG_OID(3); */ + bool *recheck = (bool *) PG_GETARG_POINTER(4); + + /* All cases served by this function are exact */ + *recheck = false; if (DatumGetBoxP(entry->key) == NULL || query == NULL) PG_RETURN_BOOL(FALSE); @@ -723,13 +728,18 @@ gist_poly_consistent(PG_FUNCTION_ARGS) GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); POLYGON *query = PG_GETARG_POLYGON_P(1); StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + /* Oid subtype = PG_GETARG_OID(3); */ + bool *recheck = (bool *) PG_GETARG_POINTER(4); bool result; + /* All cases served by this function are inexact */ + *recheck = true; + if (DatumGetBoxP(entry->key) == NULL || query == NULL) PG_RETURN_BOOL(FALSE); /* - * Since the operators are marked lossy anyway, we can just use + * Since the operators require recheck anyway, we can just use * rtree_internal_consistent even at leaf nodes. (This works in part * because the index entries are bounding boxes not polygons.) */ @@ -794,14 +804,19 @@ gist_circle_consistent(PG_FUNCTION_ARGS) GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); CIRCLE *query = PG_GETARG_CIRCLE_P(1); StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + /* Oid subtype = PG_GETARG_OID(3); */ + bool *recheck = (bool *) PG_GETARG_POINTER(4); BOX bbox; bool result; + /* All cases served by this function are inexact */ + *recheck = true; + if (DatumGetBoxP(entry->key) == NULL || query == NULL) PG_RETURN_BOOL(FALSE); /* - * Since the operators are marked lossy anyway, we can just use + * Since the operators require recheck anyway, we can just use * rtree_internal_consistent even at leaf nodes. (This works in part * because the index entries are bounding boxes not circles.) */ |