/*------------------------------------------------------------------------- * * rangetypes_gist.c * GiST support for range types. * * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/utils/adt/rangetypes_gist.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "access/gist.h" #include "access/stratnum.h" #include "utils/datum.h" #include "utils/float.h" #include "utils/fmgrprotos.h" #include "utils/multirangetypes.h" #include "utils/rangetypes.h" /* * Range class properties used to segregate different classes of ranges in * GiST. Each unique combination of properties is a class. CLS_EMPTY cannot * be combined with anything else. */ #define CLS_NORMAL 0 /* Ordinary finite range (no bits set) */ #define CLS_LOWER_INF 1 /* Lower bound is infinity */ #define CLS_UPPER_INF 2 /* Upper bound is infinity */ #define CLS_CONTAIN_EMPTY 4 /* Contains underlying empty ranges */ #define CLS_EMPTY 8 /* Special class for empty ranges */ #define CLS_COUNT 9 /* # of classes; includes all combinations of * properties. CLS_EMPTY doesn't combine with * anything else, so it's only 2^3 + 1. */ /* * Minimum accepted ratio of split for items of the same class. If the items * are of different classes, we will separate along those lines regardless of * the ratio. */ #define LIMIT_RATIO 0.3 /* Constants for fixed penalty values */ #define INFINITE_BOUND_PENALTY 2.0 #define CONTAIN_EMPTY_PENALTY 1.0 #define DEFAULT_SUBTYPE_DIFF_PENALTY 1.0 /* * Per-item data for range_gist_single_sorting_split. */ typedef struct { int index; RangeBound bound; } SingleBoundSortItem; /* place on left or right side of split? */ typedef enum { SPLIT_LEFT = 0, /* makes initialization to SPLIT_LEFT easier */ SPLIT_RIGHT } SplitLR; /* * Context for range_gist_consider_split. */ typedef struct { TypeCacheEntry *typcache; /* typcache for range type */ bool has_subtype_diff; /* does it have subtype_diff? */ int entries_count; /* total number of entries being split */ /* Information about currently selected split follows */ bool first; /* true if no split was selected yet */ RangeBound *left_upper; /* upper bound of left interval */ RangeBound *right_lower; /* lower bound of right interval */ float4 ratio; /* split ratio */ float4 overlap; /* overlap between left and right predicate */ int common_left; /* # common entries destined for each side */ int common_right; } ConsiderSplitContext; /* * Bounds extracted from a non-empty range, for use in * range_gist_double_sorting_split. */ typedef struct { RangeBound lower; RangeBound upper; } NonEmptyRange; /* * Represents information about an entry that can be placed in either group * without affecting overlap over selected axis ("common entry"). */ typedef struct { /* Index of entry in the initial array */ int index; /* Delta between closeness of range to each of the two groups */ double delta; } CommonEntry; /* Helper macros to place an entry in the left or right group during split */ /* Note direct access to variables v, typcache, left_range, right_range */ #define PLACE_LEFT(range, off) \ do { \ if (v->spl_nleft > 0) \ left_range = range_super_union(typcache, left_range, range); \ else \ left_range = (range); \ v->spl_left[v->spl_nleft++] = (off); \ } while(0) #define PLACE_RIGHT(range, off) \ do { \ if (v->spl_nright > 0) \ right_range = range_super_union(typcache, right_range, range); \ else \ right_range = (range); \ v->spl_right[v->spl_nright++] = (off); \ } while(0) /* Copy a RangeType datum (hardwires typbyval and typlen for ranges...) */ #define rangeCopy(r) \ ((RangeType *) DatumGetPointer(datumCopy(PointerGetDatum(r), \ false, -1))) static RangeType *range_super_union(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2); static bool range_gist_consistent_int_range(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const RangeType *query); static bool range_gist_consistent_int_multirange(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const MultirangeType *query); static bool range_gist_consistent_int_element(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, Datum query); static bool range_gist_consistent_leaf_range(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const RangeType *query); static bool range_gist_consistent_leaf_multirange(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const MultirangeType *query); static bool range_gist_consistent_leaf_element(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, Datum query); static void range_gist_fallback_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v); static void range_gist_class_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v, SplitLR *classes_groups); static void range_gist_single_sorting_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v, bool use_upper_bound); static void range_gist_double_sorting_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v); static void range_gist_consider_split(ConsiderSplitContext *context, RangeBound *right_lower, int min_left_count, RangeBound *left_upper, int max_left_count); static int get_gist_range_class(RangeType *range); static int single_bound_cmp(const void *a, const void *b, void *arg); static int interval_cmp_lower(const void *a, const void *b, void *arg); static int interval_cmp_upper(const void *a, const void *b, void *arg); static int common_entry_cmp(const void *i1, const void *i2); static float8 call_subtype_diff(TypeCacheEntry *typcache, Datum val1, Datum val2); /* GiST query consistency check */ Datum range_gist_consistent(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); Datum query = PG_GETARG_DATUM(1); StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); bool result; Oid subtype = PG_GETARG_OID(3); bool *recheck = (bool *) PG_GETARG_POINTER(4); RangeType *key = DatumGetRangeTypeP(entry->key); TypeCacheEntry *typcache; /* All operators served by this function are exact */ *recheck = false; typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key)); /* * Perform consistent checking using function corresponding to key type * (leaf or internal) and query subtype (range, multirange, or element). * Note that invalid subtype means that query type matches key type * (range). */ if (GIST_LEAF(entry)) { if (!OidIsValid(subtype) || subtype == ANYRANGEOID) result = range_gist_consistent_leaf_range(typcache, strategy, key, DatumGetRangeTypeP(query)); else if (subtype == ANYMULTIRANGEOID) result = range_gist_consistent_leaf_multirange(typcache, strategy, key, DatumGetMultirangeTypeP(query)); else result = range_gist_consistent_leaf_element(typcache, strategy, key, query); } else { if (!OidIsValid(subtype) || subtype == ANYRANGEOID) result = range_gist_consistent_int_range(typcache, strategy, key, DatumGetRangeTypeP(query)); else if (subtype == ANYMULTIRANGEOID) result = range_gist_consistent_int_multirange(typcache, strategy, key, DatumGetMultirangeTypeP(query)); else result = range_gist_consistent_int_element(typcache, strategy, key, query); } PG_RETURN_BOOL(result); } /* * GiST compress method for multiranges: multirange is approximated as union * range with no gaps. */ Datum multirange_gist_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); if (entry->leafkey) { MultirangeType *mr = DatumGetMultirangeTypeP(entry->key); RangeType *r; TypeCacheEntry *typcache; GISTENTRY *retval = palloc(sizeof(GISTENTRY)); typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); r = multirange_get_union_range(typcache->rngtype, mr); gistentryinit(*retval, RangeTypePGetDatum(r), entry->rel, entry->page, entry->offset, false); PG_RETURN_POINTER(retval); } PG_RETURN_POINTER(entry); } /* GiST query consistency check for multiranges */ Datum multirange_gist_consistent(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); Datum query = PG_GETARG_DATUM(1); StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); bool result; Oid subtype = PG_GETARG_OID(3); bool *recheck = (bool *) PG_GETARG_POINTER(4); RangeType *key = DatumGetRangeTypeP(entry->key); TypeCacheEntry *typcache; /* * All operators served by this function are inexact because multirange is * approximated by union range with no gaps. */ *recheck = true; typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key)); /* * Perform consistent checking using function corresponding to key type * (leaf or internal) and query subtype (range, multirange, or element). * Note that invalid subtype means that query type matches key type * (multirange). */ if (GIST_LEAF(entry)) { if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID) result = range_gist_consistent_leaf_multirange(typcache, strategy, key, DatumGetMultirangeTypeP(query)); else if (subtype == ANYRANGEOID) result = range_gist_consistent_leaf_range(typcache, strategy, key, DatumGetRangeTypeP(query)); else result = range_gist_consistent_leaf_element(typcache, strategy, key, query); } else { if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID) result = range_gist_consistent_int_multirange(typcache, strategy, key, DatumGetMultirangeTypeP(query)); else if (subtype == ANYRANGEOID) result = range_gist_consistent_int_range(typcache, strategy, key, DatumGetRangeTypeP(query)); else result = range_gist_consistent_int_element(typcache, strategy, key, query); } PG_RETURN_BOOL(result); } /* form union range */ Datum range_gist_union(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); GISTENTRY *ent = entryvec->vector; RangeType *result_range; TypeCacheEntry *typcache; int i; result_range = DatumGetRangeTypeP(ent[0].key); typcache = range_get_typcache(fcinfo, RangeTypeGetOid(result_range)); for (i = 1; i < entryvec->n; i++) { result_range = range_super_union(typcache, result_range, DatumGetRangeTypeP(ent[i].key)); } PG_RETURN_RANGE_P(result_range); } /* * We store ranges as ranges in GiST indexes, so we do not need * compress, decompress, or fetch functions. Note this implies a limit * on the size of range values that can be indexed. */ /* * GiST page split penalty function. * * The penalty function has the following goals (in order from most to least * important): * - Keep normal ranges separate * - Avoid broadening the class of the original predicate * - Avoid broadening (as determined by subtype_diff) the original predicate * - Favor adding ranges to narrower original predicates */ Datum range_gist_penalty(PG_FUNCTION_ARGS) { GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1); float *penalty = (float *) PG_GETARG_POINTER(2); RangeType *orig = DatumGetRangeTypeP(origentry->key); RangeType *new = DatumGetRangeTypeP(newentry->key); TypeCacheEntry *typcache; bool has_subtype_diff; RangeBound orig_lower, new_lower, orig_upper, new_upper; bool orig_empty, new_empty; if (RangeTypeGetOid(orig) != RangeTypeGetOid(new)) elog(ERROR, "range types do not match"); typcache = range_get_typcache(fcinfo, RangeTypeGetOid(orig)); has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid); range_deserialize(typcache, orig, &orig_lower, &orig_upper, &orig_empty); range_deserialize(typcache, new, &new_lower, &new_upper, &new_empty); /* * Distinct branches for handling distinct classes of ranges. Note that * penalty values only need to be commensurate within the same class of * new range. */ if (new_empty) { /* Handle insertion of empty range */ if (orig_empty) { /* * The best case is to insert it to empty original range. * Insertion here means no broadening of original range. Also * original range is the most narrow. */ *penalty = 0.0; } else if (RangeIsOrContainsEmpty(orig)) { /* * The second case is to insert empty range into range which * contains at least one underlying empty range. There is still * no broadening of original range, but original range is not as * narrow as possible. */ *penalty = CONTAIN_EMPTY_PENALTY; } else if (orig_lower.infinite && orig_upper.infinite) { /* * Original range requires broadening. (-inf; +inf) is most far * from normal range in this case. */ *penalty = 2 * CONTAIN_EMPTY_PENALTY; } else if (orig_lower.infinite || orig_upper.infinite) { /* * (-inf, x) or (x, +inf) original ranges are closer to normal * ranges, so it's worse to mix it with empty ranges. */ *penalty = 3 * CONTAIN_EMPTY_PENALTY; } else { /* * The least preferred case is broadening of normal range. */ *penalty = 4 * CONTAIN_EMPTY_PENALTY; } } else if (new_lower.infinite && new_upper.infinite) { /* Handle insertion of (-inf, +inf) range */ if (orig_lower.infinite && orig_upper.infinite) { /* * Best case is inserting to (-inf, +inf) original range. */ *penalty = 0.0; } else if (orig_lower.infinite || orig_upper.infinite) { /* * When original range is (-inf, x) or (x, +inf) it requires * broadening of original range (extension of one bound to * infinity). */ *penalty = INFINITE_BOUND_PENALTY; } else { /* * Insertion to normal original range is least preferred. */ *penalty = 2 * INFINITE_BOUND_PENALTY; } if (RangeIsOrContainsEmpty(orig)) { /* * Original range is narrower when it doesn't contain empty * ranges. Add additional penalty otherwise. */ *penalty += CONTAIN_EMPTY_PENALTY; } } else if (new_lower.infinite) { /* Handle insertion of (-inf, x) range */ if (!orig_empty && orig_lower.infinite) { if (orig_upper.infinite) { /* * (-inf, +inf) range won't be extended by insertion of (-inf, * x) range. It's a less desirable case than insertion to * (-inf, y) original range without extension, because in that * case original range is narrower. But we can't express that * in single float value. */ *penalty = 0.0; } else { if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0) { /* * Get extension of original range using subtype_diff. Use * constant if subtype_diff unavailable. */ if (has_subtype_diff) *penalty = call_subtype_diff(typcache, new_upper.val, orig_upper.val); else *penalty = DEFAULT_SUBTYPE_DIFF_PENALTY; } else { /* No extension of original range */ *penalty = 0.0; } } } else { /* * If lower bound of original range is not -inf, then extension of * it is infinity. */ *penalty = get_float4_infinity(); } } else if (new_upper.infinite) { /* Handle insertion of (x, +inf) range */ if (!orig_empty && orig_upper.infinite) { if (orig_lower.infinite) { /* * (-inf, +inf) range won't be extended by insertion of (x, * +inf) range. It's a less desirable case than insertion to * (y, +inf) original range without extension, because in that * case original range is narrower. But we can't express that * in single float value. */ *penalty = 0.0; } else { if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0) { /* * Get extension of original range using subtype_diff. Use * constant if subtype_diff unavailable. */ if (has_subtype_diff) *penalty = call_subtype_diff(typcache, orig_lower.val, new_lower.val); else *penalty = DEFAULT_SUBTYPE_DIFF_PENALTY; } else { /* No extension of original range */ *penalty = 0.0; } } } else { /* * If upper bound of original range is not +inf, then extension of * it is infinity. */ *penalty = get_float4_infinity(); } } else { /* Handle insertion of normal (non-empty, non-infinite) range */ if (orig_empty || orig_lower.infinite || orig_upper.infinite) { /* * Avoid mixing normal ranges with infinite and empty ranges. */ *penalty = get_float4_infinity(); } else { /* * Calculate extension of original range by calling subtype_diff. * Use constant if subtype_diff unavailable. */ float8 diff = 0.0; if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0) { if (has_subtype_diff) diff += call_subtype_diff(typcache, orig_lower.val, new_lower.val); else diff += DEFAULT_SUBTYPE_DIFF_PENALTY; } if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0) { if (has_subtype_diff) diff += call_subtype_diff(typcache, new_upper.val, orig_upper.val); else diff += DEFAULT_SUBTYPE_DIFF_PENALTY; } *penalty = diff; } } PG_RETURN_POINTER(penalty); } /* * The GiST PickSplit method for ranges * * Primarily, we try to segregate ranges of different classes. If splitting * ranges of the same class, use the appropriate split method for that class. */ Datum range_gist_picksplit(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); TypeCacheEntry *typcache; OffsetNumber i; RangeType *pred_left; int nbytes; OffsetNumber maxoff; int count_in_classes[CLS_COUNT]; int j; int non_empty_classes_count = 0; int biggest_class = -1; int biggest_class_count = 0; int total_count; /* use first item to look up range type's info */ pred_left = DatumGetRangeTypeP(entryvec->vector[FirstOffsetNumber].key); typcache = range_get_typcache(fcinfo, RangeTypeGetOid(pred_left)); maxoff = entryvec->n - 1; nbytes = (maxoff + 1) * sizeof(OffsetNumber); v->spl_left = (OffsetNumber *) palloc(nbytes); v->spl_right = (OffsetNumber *) palloc(nbytes); /* * Get count distribution of range classes. */ memset(count_in_classes, 0, sizeof(count_in_classes)); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key); count_in_classes[get_gist_range_class(range)]++; } /* * Count non-empty classes and find biggest class. */ total_count = maxoff; for (j = 0; j < CLS_COUNT; j++) { if (count_in_classes[j] > 0) { if (count_in_classes[j] > biggest_class_count) { biggest_class_count = count_in_classes[j]; biggest_class = j; } non_empty_classes_count++; } } Assert(non_empty_classes_count > 0); if (non_empty_classes_count == 1) { /* One non-empty class, so split inside class */ if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_NORMAL) { /* double sorting split for normal ranges */ range_gist_double_sorting_split(typcache, entryvec, v); } else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_LOWER_INF) { /* upper bound sorting split for (-inf, x) ranges */ range_gist_single_sorting_split(typcache, entryvec, v, true); } else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_UPPER_INF) { /* lower bound sorting split for (x, +inf) ranges */ range_gist_single_sorting_split(typcache, entryvec, v, false); } else { /* trivial split for all (-inf, +inf) or all empty ranges */ range_gist_fallback_split(typcache, entryvec, v); } } else { /* * Class based split. * * To which side of the split should each class go? Initialize them * all to go to the left side. */ SplitLR classes_groups[CLS_COUNT]; memset(classes_groups, 0, sizeof(classes_groups)); if (count_in_classes[CLS_NORMAL] > 0) { /* separate normal ranges if any */ classes_groups[CLS_NORMAL] = SPLIT_RIGHT; } else { /*---------- * Try to split classes in one of two ways: * 1) containing infinities - not containing infinities * 2) containing empty - not containing empty * * Select the way which balances the ranges between left and right * the best. If split in these ways is not possible, there are at * most 3 classes, so just separate biggest class. *---------- */ int infCount, nonInfCount; int emptyCount, nonEmptyCount; nonInfCount = count_in_classes[CLS_NORMAL] + count_in_classes[CLS_CONTAIN_EMPTY] + count_in_classes[CLS_EMPTY]; infCount = total_count - nonInfCount; nonEmptyCount = count_in_classes[CLS_NORMAL] + count_in_classes[CLS_LOWER_INF] + count_in_classes[CLS_UPPER_INF] + count_in_classes[CLS_LOWER_INF | CLS_UPPER_INF]; emptyCount = total_count - nonEmptyCount; if (infCount > 0 && nonInfCount > 0 && (abs(infCount - nonInfCount) <= abs(emptyCount - nonEmptyCount))) { classes_groups[CLS_NORMAL] = SPLIT_RIGHT; classes_groups[CLS_CONTAIN_EMPTY] = SPLIT_RIGHT; classes_groups[CLS_EMPTY] = SPLIT_RIGHT; } else if (emptyCount > 0 && nonEmptyCount > 0) { classes_groups[CLS_NORMAL] = SPLIT_RIGHT; classes_groups[CLS_LOWER_INF] = SPLIT_RIGHT; classes_groups[CLS_UPPER_INF] = SPLIT_RIGHT; classes_groups[CLS_LOWER_INF | CLS_UPPER_INF] = SPLIT_RIGHT; } else { /* * Either total_count == emptyCount or total_count == * infCount. */ classes_groups[biggest_class] = SPLIT_RIGHT; } } range_gist_class_split(typcache, entryvec, v, classes_groups); } PG_RETURN_POINTER(v); } /* equality comparator for GiST */ Datum range_gist_same(PG_FUNCTION_ARGS) { RangeType *r1 = PG_GETARG_RANGE_P(0); RangeType *r2 = PG_GETARG_RANGE_P(1); bool *result = (bool *) PG_GETARG_POINTER(2); /* * range_eq will ignore the RANGE_CONTAIN_EMPTY flag, so we have to check * that for ourselves. More generally, if the entries have been properly * normalized, then unequal flags bytes must mean unequal ranges ... so * let's just test all the flag bits at once. */ if (range_get_flags(r1) != range_get_flags(r2)) *result = false; else { TypeCacheEntry *typcache; typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1)); *result = range_eq_internal(typcache, r1, r2); } PG_RETURN_POINTER(result); } /* *---------------------------------------------------------- * STATIC FUNCTIONS *---------------------------------------------------------- */ /* * Return the smallest range that contains r1 and r2 * * This differs from regular range_union in two critical ways: * 1. It won't throw an error for non-adjacent r1 and r2, but just absorb * the intervening values into the result range. * 2. We track whether any empty range has been union'd into the result, * so that contained_by searches can be indexed. Note that this means * that *all* unions formed within the GiST index must go through here. */ static RangeType * range_super_union(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2) { RangeType *result; RangeBound lower1, lower2; RangeBound upper1, upper2; bool empty1, empty2; char flags1, flags2; RangeBound *result_lower; RangeBound *result_upper; range_deserialize(typcache, r1, &lower1, &upper1, &empty1); range_deserialize(typcache, r2, &lower2, &upper2, &empty2); flags1 = range_get_flags(r1); flags2 = range_get_flags(r2); if (empty1) { /* We can return r2 as-is if it already is or contains empty */ if (flags2 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY)) return r2; /* Else we'd better copy it (modify-in-place isn't safe) */ r2 = rangeCopy(r2); range_set_contain_empty(r2); return r2; } if (empty2) { /* We can return r1 as-is if it already is or contains empty */ if (flags1 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY)) return r1; /* Else we'd better copy it (modify-in-place isn't safe) */ r1 = rangeCopy(r1); range_set_contain_empty(r1); return r1; } if (range_cmp_bounds(typcache, &lower1, &lower2) <= 0) result_lower = &lower1; else result_lower = &lower2; if (range_cmp_bounds(typcache, &upper1, &upper2) >= 0) result_upper = &upper1; else result_upper = &upper2; /* optimization to avoid constructing a new range */ if (result_lower == &lower1 && result_upper == &upper1 && ((flags1 & RANGE_CONTAIN_EMPTY) || !(flags2 & RANGE_CONTAIN_EMPTY))) return r1; if (result_lower == &lower2 && result_upper == &upper2 && ((flags2 & RANGE_CONTAIN_EMPTY) || !(flags1 & RANGE_CONTAIN_EMPTY))) return r2; result = make_range(typcache, result_lower, result_upper, false, NULL); if ((flags1 & RANGE_CONTAIN_EMPTY) || (flags2 & RANGE_CONTAIN_EMPTY)) range_set_contain_empty(result); return result; } static bool multirange_union_range_equal(TypeCacheEntry *typcache, const RangeType *r, const MultirangeType *mr) { RangeBound lower1, upper1, lower2, upper2, tmp; bool empty; if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) return (RangeIsEmpty(r) && MultirangeIsEmpty(mr)); range_deserialize(typcache, r, &lower1, &upper1, &empty); Assert(!empty); multirange_get_bounds(typcache, mr, 0, &lower2, &tmp); multirange_get_bounds(typcache, mr, mr->rangeCount - 1, &tmp, &upper2); return (range_cmp_bounds(typcache, &lower1, &lower2) == 0 && range_cmp_bounds(typcache, &upper1, &upper2) == 0); } /* * GiST consistent test on an index internal page with range query */ static bool range_gist_consistent_int_range(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const RangeType *query) { switch (strategy) { case RANGESTRAT_BEFORE: if (RangeIsEmpty(key) || RangeIsEmpty(query)) return false; return (!range_overright_internal(typcache, key, query)); case RANGESTRAT_OVERLEFT: if (RangeIsEmpty(key) || RangeIsEmpty(query)) return false; return (!range_after_internal(typcache, key, query)); case RANGESTRAT_OVERLAPS: return range_overlaps_internal(typcache, key, query); case RANGESTRAT_OVERRIGHT: if (RangeIsEmpty(key) || RangeIsEmpty(query)) return false; return (!range_before_internal(typcache, key, query)); case RANGESTRAT_AFTER: if (RangeIsEmpty(key) || RangeIsEmpty(query)) return false; return (!range_overleft_internal(typcache, key, query)); case RANGESTRAT_ADJACENT: if (RangeIsEmpty(key) || RangeIsEmpty(query)) return false; if (range_adjacent_internal(typcache, key, query)) return true; return range_overlaps_internal(typcache, key, query); case RANGESTRAT_CONTAINS: return range_contains_internal(typcache, key, query); case RANGESTRAT_CONTAINED_BY: /* * Empty ranges are contained by anything, so if key is or * contains any empty ranges, we must descend into it. Otherwise, * descend only if key overlaps the query. */ if (RangeIsOrContainsEmpty(key)) return true; return range_overlaps_internal(typcache, key, query); case RANGESTRAT_EQ: /* * If query is empty, descend only if the key is or contains any * empty ranges. Otherwise, descend if key contains query. */ if (RangeIsEmpty(query)) return RangeIsOrContainsEmpty(key); return range_contains_internal(typcache, key, query); default: elog(ERROR, "unrecognized range strategy: %d", strategy); return false; /* keep compiler quiet */ } } /* * GiST consistent test on an index internal page with multirange query */ static bool range_gist_consistent_int_multirange(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const MultirangeType *query) { switch (strategy) { case RANGESTRAT_BEFORE: if (RangeIsEmpty(key) || MultirangeIsEmpty(query)) return false; return (!range_overright_multirange_internal(typcache, key, query)); case RANGESTRAT_OVERLEFT: if (RangeIsEmpty(key) || MultirangeIsEmpty(query)) return false; return (!range_after_multirange_internal(typcache, key, query)); case RANGESTRAT_OVERLAPS: return range_overlaps_multirange_internal(typcache, key, query); case RANGESTRAT_OVERRIGHT: if (RangeIsEmpty(key) || MultirangeIsEmpty(query)) return false; return (!range_before_multirange_internal(typcache, key, query)); case RANGESTRAT_AFTER: if (RangeIsEmpty(key) || MultirangeIsEmpty(query)) return false; return (!range_overleft_multirange_internal(typcache, key, query)); case RANGESTRAT_ADJACENT: if (RangeIsEmpty(key) || MultirangeIsEmpty(query)) return false; if (range_adjacent_multirange_internal(typcache, key, query)) return true; return range_overlaps_multirange_internal(typcache, key, query); case RANGESTRAT_CONTAINS: return range_contains_multirange_internal(typcache, key, query); case RANGESTRAT_CONTAINED_BY: /* * Empty ranges are contained by anything, so if key is or * contains any empty ranges, we must descend into it. Otherwise, * descend only if key overlaps the query. */ if (RangeIsOrContainsEmpty(key)) return true; return range_overlaps_multirange_internal(typcache, key, query); case RANGESTRAT_EQ: /* * If query is empty, descend only if the key is or contains any * empty ranges. Otherwise, descend if key contains query. */ if (MultirangeIsEmpty(query)) return RangeIsOrContainsEmpty(key); return range_contains_multirange_internal(typcache, key, query); default: elog(ERROR, "unrecognized range strategy: %d", strategy); return false; /* keep compiler quiet */ } } /* * GiST consistent test on an index internal page with element query */ static bool range_gist_consistent_int_element(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, Datum query) { switch (strategy) { case RANGESTRAT_CONTAINS_ELEM: return range_contains_elem_internal(typcache, key, query); default: elog(ERROR, "unrecognized range strategy: %d", strategy); return false; /* keep compiler quiet */ } } /* * GiST consistent test on an index leaf page with range query */ static bool range_gist_consistent_leaf_range(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const RangeType *query) { switch (strategy) { case RANGESTRAT_BEFORE: return range_before_internal(typcache, key, query); case RANGESTRAT_OVERLEFT: return range_overleft_internal(typcache, key, query); case RANGESTRAT_OVERLAPS: return range_overlaps_internal(typcache, key, query); case RANGESTRAT_OVERRIGHT: return range_overright_internal(typcache, key, query); case RANGESTRAT_AFTER: return range_after_internal(typcache, key, query); case RANGESTRAT_ADJACENT: return range_adjacent_internal(typcache, key, query); case RANGESTRAT_CONTAINS: return range_contains_internal(typcache, key, query); case RANGESTRAT_CONTAINED_BY: return range_contained_by_internal(typcache, key, query); case RANGESTRAT_EQ: return range_eq_internal(typcache, key, query); default: elog(ERROR, "unrecognized range strategy: %d", strategy); return false; /* keep compiler quiet */ } } /* * GiST consistent test on an index leaf page with multirange query */ static bool range_gist_consistent_leaf_multirange(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const MultirangeType *query) { switch (strategy) { case RANGESTRAT_BEFORE: return range_before_multirange_internal(typcache, key, query); case RANGESTRAT_OVERLEFT: return range_overleft_multirange_internal(typcache, key, query); case RANGESTRAT_OVERLAPS: return range_overlaps_multirange_internal(typcache, key, query); case RANGESTRAT_OVERRIGHT: return range_overright_multirange_internal(typcache, key, query); case RANGESTRAT_AFTER: return range_after_multirange_internal(typcache, key, query); case RANGESTRAT_ADJACENT: return range_adjacent_multirange_internal(typcache, key, query); case RANGESTRAT_CONTAINS: return range_contains_multirange_internal(typcache, key, query); case RANGESTRAT_CONTAINED_BY: return multirange_contains_range_internal(typcache, query, key); case RANGESTRAT_EQ: return multirange_union_range_equal(typcache, key, query); default: elog(ERROR, "unrecognized range strategy: %d", strategy); return false; /* keep compiler quiet */ } } /* * GiST consistent test on an index leaf page with element query */ static bool range_gist_consistent_leaf_element(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, Datum query) { switch (strategy) { case RANGESTRAT_CONTAINS_ELEM: return range_contains_elem_internal(typcache, key, query); default: elog(ERROR, "unrecognized range strategy: %d", strategy); return false; /* keep compiler quiet */ } } /* * Trivial split: half of entries will be placed on one page * and the other half on the other page. */ static void range_gist_fallback_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v) { RangeType *left_range = NULL; RangeType *right_range = NULL; OffsetNumber i, maxoff, split_idx; maxoff = entryvec->n - 1; /* Split entries before this to left page, after to right: */ split_idx = (maxoff - FirstOffsetNumber) / 2 + FirstOffsetNumber; v->spl_nleft = 0; v->spl_nright = 0; for (i = FirstOffsetNumber; i <= maxoff; i++) { RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key); if (i < split_idx) PLACE_LEFT(range, i); else PLACE_RIGHT(range, i); } v->spl_ldatum = RangeTypePGetDatum(left_range); v->spl_rdatum = RangeTypePGetDatum(right_range); } /* * Split based on classes of ranges. * * See get_gist_range_class for class definitions. * classes_groups is an array of length CLS_COUNT indicating the side of the * split to which each class should go. */ static void range_gist_class_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v, SplitLR *classes_groups) { RangeType *left_range = NULL; RangeType *right_range = NULL; OffsetNumber i, maxoff; maxoff = entryvec->n - 1; v->spl_nleft = 0; v->spl_nright = 0; for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key); int class; /* Get class of range */ class = get_gist_range_class(range); /* Place range to appropriate page */ if (classes_groups[class] == SPLIT_LEFT) PLACE_LEFT(range, i); else { Assert(classes_groups[class] == SPLIT_RIGHT); PLACE_RIGHT(range, i); } } v->spl_ldatum = RangeTypePGetDatum(left_range); v->spl_rdatum = RangeTypePGetDatum(right_range); } /* * Sorting based split. First half of entries according to the sort will be * placed to one page, and second half of entries will be placed to other * page. use_upper_bound parameter indicates whether to use upper or lower * bound for sorting. */ static void range_gist_single_sorting_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v, bool use_upper_bound) { SingleBoundSortItem *sortItems; RangeType *left_range = NULL; RangeType *right_range = NULL; OffsetNumber i, maxoff, split_idx; maxoff = entryvec->n - 1; sortItems = (SingleBoundSortItem *) palloc(maxoff * sizeof(SingleBoundSortItem)); /* * Prepare auxiliary array and sort the values. */ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key); RangeBound bound2; bool empty; sortItems[i - 1].index = i; /* Put appropriate bound into array */ if (use_upper_bound) range_deserialize(typcache, range, &bound2, &sortItems[i - 1].bound, &empty); else range_deserialize(typcache, range, &sortItems[i - 1].bound, &bound2, &empty); Assert(!empty); } qsort_arg(sortItems, maxoff, sizeof(SingleBoundSortItem), single_bound_cmp, typcache); split_idx = maxoff / 2; v->spl_nleft = 0; v->spl_nright = 0; for (i = 0; i < maxoff; i++) { int idx = sortItems[i].index; RangeType *range = DatumGetRangeTypeP(entryvec->vector[idx].key); if (i < split_idx) PLACE_LEFT(range, idx); else PLACE_RIGHT(range, idx); } v->spl_ldatum = RangeTypePGetDatum(left_range); v->spl_rdatum = RangeTypePGetDatum(right_range); } /* * Double sorting split algorithm. * * The algorithm considers dividing ranges into two groups. The first (left) * group contains general left bound. The second (right) group contains * general right bound. The challenge is to find upper bound of left group * and lower bound of right group so that overlap of groups is minimal and * ratio of distribution is acceptable. Algorithm finds for each lower bound of * right group minimal upper bound of left group, and for each upper bound of * left group maximal lower bound of right group. For each found pair * range_gist_consider_split considers replacement of currently selected * split with the new one. * * After that, all the entries are divided into three groups: * 1) Entries which should be placed to the left group * 2) Entries which should be placed to the right group * 3) "Common entries" which can be placed to either group without affecting * amount of overlap. * * The common ranges are distributed by difference of distance from lower * bound of common range to lower bound of right group and distance from upper * bound of common range to upper bound of left group. * * For details see: * "A new double sorting-based node splitting algorithm for R-tree", * A. Korotkov * http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36 */ static void range_gist_double_sorting_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v) { ConsiderSplitContext context; OffsetNumber i, maxoff; RangeType *left_range = NULL, *right_range = NULL; int common_entries_count; NonEmptyRange *by_lower, *by_upper; CommonEntry *common_entries; int nentries, i1, i2; RangeBound *right_lower, *left_upper; memset(&context, 0, sizeof(ConsiderSplitContext)); context.typcache = typcache; context.has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid); maxoff = entryvec->n - 1; nentries = context.entries_count = maxoff - FirstOffsetNumber + 1; context.first = true; /* Allocate arrays for sorted range bounds */ by_lower = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange)); by_upper = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange)); /* Fill arrays of bounds */ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key); bool empty; range_deserialize(typcache, range, &by_lower[i - FirstOffsetNumber].lower, &by_lower[i - FirstOffsetNumber].upper, &empty); Assert(!empty); } /* * Make two arrays of range bounds: one sorted by lower bound and another * sorted by upper bound. */ memcpy(by_upper, by_lower, nentries * sizeof(NonEmptyRange)); qsort_arg(by_lower, nentries, sizeof(NonEmptyRange), interval_cmp_lower, typcache); qsort_arg(by_upper, nentries, sizeof(NonEmptyRange), interval_cmp_upper, typcache); /*---------- * The goal is to form a left and right range, so that every entry * range is contained by either left or right interval (or both). * * For example, with the ranges (0,1), (1,3), (2,3), (2,4): * * 0 1 2 3 4 * +-+ * +---+ * +-+ * +---+ * * The left and right ranges are of the form (0,a) and (b,4). * We first consider splits where b is the lower bound of an entry. * We iterate through all entries, and for each b, calculate the * smallest possible a. Then we consider splits where a is the * upper bound of an entry, and for each a, calculate the greatest * possible b. * * In the above example, the first loop would consider splits: * b=0: (0,1)-(0,4) * b=1: (0,1)-(1,4) * b=2: (0,3)-(2,4) * * And the second loop: * a=1: (0,1)-(1,4) * a=3: (0,3)-(2,4) * a=4: (0,4)-(2,4) *---------- */ /* * Iterate over lower bound of right group, finding smallest possible * upper bound of left group. */ i1 = 0; i2 = 0; right_lower = &by_lower[i1].lower; left_upper = &by_upper[i2].lower; while (true) { /* * Find next lower bound of right group. */ while (i1 < nentries && range_cmp_bounds(typcache, right_lower, &by_lower[i1].lower) == 0) { if (range_cmp_bounds(typcache, &by_lower[i1].upper, left_upper) > 0) left_upper = &by_lower[i1].upper; i1++; } if (i1 >= nentries) break; right_lower = &by_lower[i1].lower; /* * Find count of ranges which anyway should be placed to the left * group. */ while (i2 < nentries && range_cmp_bounds(typcache, &by_upper[i2].upper, left_upper) <= 0) i2++; /* * Consider found split to see if it's better than what we had. */ range_gist_consider_split(&context, right_lower, i1, left_upper, i2); } /* * Iterate over upper bound of left group finding greatest possible lower * bound of right group. */ i1 = nentries - 1; i2 = nentries - 1; right_lower = &by_lower[i1].upper; left_upper = &by_upper[i2].upper; while (true) { /* * Find next upper bound of left group. */ while (i2 >= 0 && range_cmp_bounds(typcache, left_upper, &by_upper[i2].upper) == 0) { if (range_cmp_bounds(typcache, &by_upper[i2].lower, right_lower) < 0) right_lower = &by_upper[i2].lower; i2--; } if (i2 < 0) break; left_upper = &by_upper[i2].upper; /* * Find count of intervals which anyway should be placed to the right * group. */ while (i1 >= 0 && range_cmp_bounds(typcache, &by_lower[i1].lower, right_lower) >= 0) i1--; /* * Consider found split to see if it's better than what we had. */ range_gist_consider_split(&context, right_lower, i1 + 1, left_upper, i2 + 1); } /* * If we failed to find any acceptable splits, use trivial split. */ if (context.first) { range_gist_fallback_split(typcache, entryvec, v); return; } /* * Ok, we have now selected bounds of the groups. Now we have to * distribute entries themselves. At first we distribute entries which can * be placed unambiguously and collect "common entries" to array. */ /* Allocate vectors for results */ v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber)); v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber)); v->spl_nleft = 0; v->spl_nright = 0; /* * Allocate an array for "common entries" - entries which can be placed to * either group without affecting overlap along selected axis. */ common_entries_count = 0; common_entries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry)); /* * Distribute entries which can be distributed unambiguously, and collect * common entries. */ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { RangeType *range; RangeBound lower, upper; bool empty; /* * Get upper and lower bounds along selected axis. */ range = DatumGetRangeTypeP(entryvec->vector[i].key); range_deserialize(typcache, range, &lower, &upper, &empty); if (range_cmp_bounds(typcache, &upper, context.left_upper) <= 0) { /* Fits in the left group */ if (range_cmp_bounds(typcache, &lower, context.right_lower) >= 0) { /* Fits also in the right group, so "common entry" */ common_entries[common_entries_count].index = i; if (context.has_subtype_diff) { /* * delta = (lower - context.right_lower) - * (context.left_upper - upper) */ common_entries[common_entries_count].delta = call_subtype_diff(typcache, lower.val, context.right_lower->val) - call_subtype_diff(typcache, context.left_upper->val, upper.val); } else { /* Without subtype_diff, take all deltas as zero */ common_entries[common_entries_count].delta = 0; } common_entries_count++; } else { /* Doesn't fit to the right group, so join to the left group */ PLACE_LEFT(range, i); } } else { /* * Each entry should fit on either left or right group. Since this * entry didn't fit in the left group, it better fit in the right * group. */ Assert(range_cmp_bounds(typcache, &lower, context.right_lower) >= 0); PLACE_RIGHT(range, i); } } /* * Distribute "common entries", if any. */ if (common_entries_count > 0) { /* * Sort "common entries" by calculated deltas in order to distribute * the most ambiguous entries first. */ qsort(common_entries, common_entries_count, sizeof(CommonEntry), common_entry_cmp); /* * Distribute "common entries" between groups according to sorting. */ for (i = 0; i < common_entries_count; i++) { RangeType *range; int idx = common_entries[i].index; range = DatumGetRangeTypeP(entryvec->vector[idx].key); /* * Check if we have to place this entry in either group to achieve * LIMIT_RATIO. */ if (i < context.common_left) PLACE_LEFT(range, idx); else PLACE_RIGHT(range, idx); } } v->spl_ldatum = PointerGetDatum(left_range); v->spl_rdatum = PointerGetDatum(right_range); } /* * Consider replacement of currently selected split with a better one * during range_gist_double_sorting_split. */ static void range_gist_consider_split(ConsiderSplitContext *context, RangeBound *right_lower, int min_left_count, RangeBound *left_upper, int max_left_count) { int left_count, right_count; float4 ratio, overlap; /* * Calculate entries distribution ratio assuming most uniform distribution * of common entries. */ if (min_left_count >= (context->entries_count + 1) / 2) left_count = min_left_count; else if (max_left_count <= context->entries_count / 2) left_count = max_left_count; else left_count = context->entries_count / 2; right_count = context->entries_count - left_count; /* * Ratio of split: quotient between size of smaller group and total * entries count. This is necessarily 0.5 or less; if it's less than * LIMIT_RATIO then we will never accept the new split. */ ratio = ((float4) Min(left_count, right_count)) / ((float4) context->entries_count); if (ratio > LIMIT_RATIO) { bool selectthis = false; /* * The ratio is acceptable, so compare current split with previously * selected one. We search for minimal overlap (allowing negative * values) and minimal ratio secondarily. If subtype_diff is * available, it's used for overlap measure. Without subtype_diff we * use number of "common entries" as an overlap measure. */ if (context->has_subtype_diff) overlap = call_subtype_diff(context->typcache, left_upper->val, right_lower->val); else overlap = max_left_count - min_left_count; /* If there is no previous selection, select this split */ if (context->first) selectthis = true; else { /* * Choose the new split if it has a smaller overlap, or same * overlap but better ratio. */ if (overlap < context->overlap || (overlap == context->overlap && ratio > context->ratio)) selectthis = true; } if (selectthis) { /* save information about selected split */ context->first = false; context->ratio = ratio; context->overlap = overlap; context->right_lower = right_lower; context->left_upper = left_upper; context->common_left = max_left_count - left_count; context->common_right = left_count - min_left_count; } } } /* * Find class number for range. * * The class number is a valid combination of the properties of the * range. Note: the highest possible number is 8, because CLS_EMPTY * can't be combined with anything else. */ static int get_gist_range_class(RangeType *range) { int classNumber; char flags; flags = range_get_flags(range); if (flags & RANGE_EMPTY) { classNumber = CLS_EMPTY; } else { classNumber = 0; if (flags & RANGE_LB_INF) classNumber |= CLS_LOWER_INF; if (flags & RANGE_UB_INF) classNumber |= CLS_UPPER_INF; if (flags & RANGE_CONTAIN_EMPTY) classNumber |= CLS_CONTAIN_EMPTY; } return classNumber; } /* * Comparison function for range_gist_single_sorting_split. */ static int single_bound_cmp(const void *a, const void *b, void *arg) { SingleBoundSortItem *i1 = (SingleBoundSortItem *) a; SingleBoundSortItem *i2 = (SingleBoundSortItem *) b; TypeCacheEntry *typcache = (TypeCacheEntry *) arg; return range_cmp_bounds(typcache, &i1->bound, &i2->bound); } /* * Compare NonEmptyRanges by lower bound. */ static int interval_cmp_lower(const void *a, const void *b, void *arg) { NonEmptyRange *i1 = (NonEmptyRange *) a; NonEmptyRange *i2 = (NonEmptyRange *) b; TypeCacheEntry *typcache = (TypeCacheEntry *) arg; return range_cmp_bounds(typcache, &i1->lower, &i2->lower); } /* * Compare NonEmptyRanges by upper bound. */ static int interval_cmp_upper(const void *a, const void *b, void *arg) { NonEmptyRange *i1 = (NonEmptyRange *) a; NonEmptyRange *i2 = (NonEmptyRange *) b; TypeCacheEntry *typcache = (TypeCacheEntry *) arg; return range_cmp_bounds(typcache, &i1->upper, &i2->upper); } /* * Compare CommonEntrys by their deltas. */ static int common_entry_cmp(const void *i1, const void *i2) { double delta1 = ((CommonEntry *) i1)->delta; double delta2 = ((CommonEntry *) i2)->delta; if (delta1 < delta2) return -1; else if (delta1 > delta2) return 1; else return 0; } /* * Convenience function to invoke type-specific subtype_diff function. * Caller must have already checked that there is one for the range type. */ static float8 call_subtype_diff(TypeCacheEntry *typcache, Datum val1, Datum val2) { float8 value; value = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo, typcache->rng_collation, val1, val2)); /* Cope with buggy subtype_diff function by returning zero */ if (value >= 0.0) return value; return 0.0; }