summaryrefslogtreecommitdiff
path: root/src/backend/access/hash
diff options
context:
space:
mode:
authorDavid Rowley <drowley@postgresql.org>2022-11-24 17:21:44 +1300
committerDavid Rowley <drowley@postgresql.org>2022-11-24 17:21:44 +1300
commitd09dbeb9bde6b9faabd30e887eff4493331d6424 (patch)
treef4e543205d960ff8261862aea33d4d21cda7b150 /src/backend/access/hash
parentd46ad72f464beafa6d92d60d214412374559e280 (diff)
downloadpostgresql-d09dbeb9bde6b9faabd30e887eff4493331d6424.tar.gz
Speedup hash index builds by skipping needless binary searches
When building hash indexes using the spool method, tuples are added to the index page in hashkey order. Because of this, we can safely skip performing the binary search on the existing tuples on the page to find the location to insert the tuple based on its hashkey value. For this case, we can just always put the tuple at the end of the item array as the tuples will always arrive in hashkey order. Testing has shown that this can improve hash index build speeds by 5-15% with a unique set of integer values. Author: Simon Riggs Reviewed-by: David Rowley Tested-by: David Zhang, Tomas Vondra Discussion: https://postgr.es/m/CANbhV-GBc5JoG0AneUGPZZW3o4OK5LjBGeKe_icpC3R1McrZWQ@mail.gmail.com
Diffstat (limited to 'src/backend/access/hash')
-rw-r--r--src/backend/access/hash/hash.c4
-rw-r--r--src/backend/access/hash/hashinsert.c41
-rw-r--r--src/backend/access/hash/hashsort.c3
3 files changed, 37 insertions, 11 deletions
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index c361509d68..77fd147f68 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -231,7 +231,7 @@ hashbuildCallback(Relation index,
itup = index_form_tuple(RelationGetDescr(index),
index_values, index_isnull);
itup->t_tid = *tid;
- _hash_doinsert(index, itup, buildstate->heapRel);
+ _hash_doinsert(index, itup, buildstate->heapRel, false);
pfree(itup);
}
@@ -265,7 +265,7 @@ hashinsert(Relation rel, Datum *values, bool *isnull,
itup = index_form_tuple(RelationGetDescr(rel), index_values, index_isnull);
itup->t_tid = *ht_ctid;
- _hash_doinsert(rel, itup, heapRel);
+ _hash_doinsert(rel, itup, heapRel, false);
pfree(itup);
diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c
index 23907d2e5b..9db522051e 100644
--- a/src/backend/access/hash/hashinsert.c
+++ b/src/backend/access/hash/hashinsert.c
@@ -32,9 +32,12 @@ static void _hash_vacuum_one_page(Relation rel, Relation hrel,
*
* This routine is called by the public interface routines, hashbuild
* and hashinsert. By here, itup is completely filled in.
+ *
+ * 'sorted' must only be passed as 'true' when inserts are done in hashkey
+ * order.
*/
void
-_hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel)
+_hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel, bool sorted)
{
Buffer buf = InvalidBuffer;
Buffer bucket_buf;
@@ -198,7 +201,7 @@ restart_insert:
START_CRIT_SECTION();
/* found page with enough space, so add the item here */
- itup_off = _hash_pgaddtup(rel, buf, itemsz, itup);
+ itup_off = _hash_pgaddtup(rel, buf, itemsz, itup, sorted);
MarkBufferDirty(buf);
/* metapage operations */
@@ -263,21 +266,43 @@ restart_insert:
*
* Returns the offset number at which the tuple was inserted. This function
* is responsible for preserving the condition that tuples in a hash index
- * page are sorted by hashkey value.
+ * page are sorted by hashkey value, however, if the caller is certain that
+ * the hashkey for the tuple being added is >= the hashkeys of all existing
+ * tuples on the page, then the 'appendtup' flag may be passed as true. This
+ * saves from having to binary search for the correct location to insert the
+ * tuple.
*/
OffsetNumber
-_hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup)
+_hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup,
+ bool appendtup)
{
OffsetNumber itup_off;
Page page;
- uint32 hashkey;
_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
page = BufferGetPage(buf);
- /* Find where to insert the tuple (preserving page's hashkey ordering) */
- hashkey = _hash_get_indextuple_hashkey(itup);
- itup_off = _hash_binsearch(page, hashkey);
+ /*
+ * Find where to insert the tuple (preserving page's hashkey ordering). If
+ * 'appendtup' is true then we just insert it at the end.
+ */
+ if (appendtup)
+ {
+ itup_off = PageGetMaxOffsetNumber(page) + 1;
+
+ /* ensure this tuple's hashkey is >= the final existing tuple */
+ Assert(PageGetMaxOffsetNumber(page) == 0 ||
+ _hash_get_indextuple_hashkey((IndexTuple)
+ PageGetItem(page, PageGetItemId(page,
+ PageGetMaxOffsetNumber(page)))) <=
+ _hash_get_indextuple_hashkey(itup));
+ }
+ else
+ {
+ uint32 hashkey = _hash_get_indextuple_hashkey(itup);
+
+ itup_off = _hash_binsearch(page, hashkey);
+ }
if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
== InvalidOffsetNumber)
diff --git a/src/backend/access/hash/hashsort.c b/src/backend/access/hash/hashsort.c
index 19563148d0..a4ab8384e1 100644
--- a/src/backend/access/hash/hashsort.c
+++ b/src/backend/access/hash/hashsort.c
@@ -145,7 +145,8 @@ _h_indexbuild(HSpool *hspool, Relation heapRel)
Assert(hashkey >= lasthashkey);
#endif
- _hash_doinsert(hspool->index, itup, heapRel);
+ /* the tuples are sorted by hashkey, so pass 'sorted' as true */
+ _hash_doinsert(hspool->index, itup, heapRel, true);
pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
++tups_done);