From 65fdd6a193eb9ca83134d95c37ab2f0f4519baa0 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Thu, 7 Jul 2016 14:47:19 -0700 Subject: [PATCH] Use more efficient hashtable for tidbitmap.c to speed up bitmap scans. Use the new simplehash.h to speed up tidbitmap.c uses. For bitmap scan heavy queries speedups of over 100% have been measured. Both lossy and exact scans benefit, but the wins are bigger for mostly exact scans. The conversion is mostly trivial, except that tbm_lossify() now restarts lossifying at the point it previously stopped. Otherwise the hash table becomes unbalanced because the scan in done in hash-order, leaving the end of the hashtable more densely filled then the beginning. That caused performance issues with dynahash as well, but due to the open chaining they were less pronounced than with the linear adressing from simplehash.h. --- src/backend/nodes/tidbitmap.c | 131 ++++++++++++++++++---------------- 1 file changed, 70 insertions(+), 61 deletions(-) diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c index dfeb7d5c63..7423e8df9f 100644 --- a/src/backend/nodes/tidbitmap.c +++ b/src/backend/nodes/tidbitmap.c @@ -43,7 +43,6 @@ #include "access/htup_details.h" #include "nodes/bitmapset.h" #include "nodes/tidbitmap.h" -#include "utils/hsearch.h" /* * The maximum number of tuples per page is not large (typically 256 with @@ -97,6 +96,7 @@ typedef struct PagetableEntry { BlockNumber blockno; /* page number (hashtable key) */ + char status; bool ischunk; /* T = lossy storage, F = exact */ bool recheck; /* should the tuples be rechecked? */ bitmapword words[Max(WORDS_PER_PAGE, WORDS_PER_CHUNK)]; @@ -128,12 +128,13 @@ struct TIDBitmap NodeTag type; /* to make it a valid Node */ MemoryContext mcxt; /* memory context containing me */ TBMStatus status; /* see codes above */ - HTAB *pagetable; /* hash table of PagetableEntry's */ + struct pagetable_hash *pagetable; /* hash table of PagetableEntry's */ int nentries; /* number of entries in pagetable */ int maxentries; /* limit on same to meet maxbytes */ int npages; /* number of exact entries in pagetable */ int nchunks; /* number of lossy entries in pagetable */ bool iterating; /* tbm_begin_iterate called? */ + uint32 lossify_start; /* offset to start lossifying hashtable */ PagetableEntry entry1; /* used when status == TBM_ONE_PAGE */ /* these are valid when iterating is true: */ PagetableEntry **spages; /* sorted exact-page list, or NULL */ @@ -168,6 +169,29 @@ static void tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno); static void tbm_lossify(TIDBitmap *tbm); static int tbm_comparator(const void *left, const void *right); +static inline uint32 +hash_blockno(BlockNumber b) +{ + uint32 h = b; + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; +} + +#define SH_PREFIX pagetable +#define SH_KEYTYPE BlockNumber +#define SH_KEY blockno +#define SH_CONTAINS PagetableEntry +#define SH_HASH_KEY(tb, key) hash_blockno(key) +#define SH_EQUAL(tb, a, b) a == b +#define SH_SCOPE extern +#define SH_DEFINE +#define SH_DECLARE +#include "lib/simplehash.h" + /* * tbm_create - create an initially-empty bitmap @@ -190,17 +214,15 @@ tbm_create(long maxbytes) /* * Estimate number of hashtable entries we can have within maxbytes. This - * estimates the hash overhead at MAXALIGN(sizeof(HASHELEMENT)) plus a - * pointer per hash entry, which is crude but good enough for our purpose. - * Also count an extra Pointer per entry for the arrays created during - * iteration readout. + * estimates the hash cost as sizeof(PagetableEntry), which is good enough + * for our purpose. Also count an extra Pointer per entry for the arrays + * created during iteration readout. */ - nbuckets = maxbytes / - (MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(sizeof(PagetableEntry)) - + sizeof(Pointer) + sizeof(Pointer)); + nbuckets = maxbytes / (sizeof(PagetableEntry) + sizeof(Pointer)); nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */ nbuckets = Max(nbuckets, 16); /* sanity limit */ tbm->maxentries = (int) nbuckets; + tbm->lossify_start = 0; return tbm; } @@ -212,32 +234,24 @@ tbm_create(long maxbytes) static void tbm_create_pagetable(TIDBitmap *tbm) { - HASHCTL hash_ctl; - Assert(tbm->status != TBM_HASH); Assert(tbm->pagetable == NULL); - /* Create the hashtable proper */ - MemSet(&hash_ctl, 0, sizeof(hash_ctl)); - hash_ctl.keysize = sizeof(BlockNumber); - hash_ctl.entrysize = sizeof(PagetableEntry); - hash_ctl.hcxt = tbm->mcxt; - tbm->pagetable = hash_create("TIDBitmap", - 128, /* start small and extend */ - &hash_ctl, - HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); - - /* If entry1 is valid, push it into the hashtable */ + tbm->pagetable = pagetable_create(tbm->mcxt, 128); + if (tbm->status == TBM_ONE_PAGE) { PagetableEntry *page; bool found; + char oldstatus; - page = (PagetableEntry *) hash_search(tbm->pagetable, - (void *) &tbm->entry1.blockno, - HASH_ENTER, &found); + page = pagetable_insert(tbm->pagetable, + tbm->entry1.blockno, + &found); Assert(!found); + oldstatus = page->status; memcpy(page, &tbm->entry1, sizeof(PagetableEntry)); + page->status = oldstatus; } tbm->status = TBM_HASH; @@ -250,7 +264,7 @@ void tbm_free(TIDBitmap *tbm) { if (tbm->pagetable) - hash_destroy(tbm->pagetable); + pagetable_destroy(tbm->pagetable); if (tbm->spages) pfree(tbm->spages); if (tbm->schunks) @@ -357,12 +371,12 @@ tbm_union(TIDBitmap *a, const TIDBitmap *b) tbm_union_page(a, &b->entry1); else { - HASH_SEQ_STATUS status; + pagetable_iterator i; PagetableEntry *bpage; Assert(b->status == TBM_HASH); - hash_seq_init(&status, b->pagetable); - while ((bpage = (PagetableEntry *) hash_seq_search(&status)) != NULL) + pagetable_start_iterate(b->pagetable, &i); + while ((bpage = pagetable_iterate(b->pagetable, &i)) != NULL) tbm_union_page(a, bpage); } } @@ -449,12 +463,12 @@ tbm_intersect(TIDBitmap *a, const TIDBitmap *b) } else { - HASH_SEQ_STATUS status; + pagetable_iterator i; PagetableEntry *apage; Assert(a->status == TBM_HASH); - hash_seq_init(&status, a->pagetable); - while ((apage = (PagetableEntry *) hash_seq_search(&status)) != NULL) + pagetable_start_iterate(a->pagetable, &i); + while ((apage = pagetable_iterate(a->pagetable, &i)) != NULL) { if (tbm_intersect_page(a, apage, b)) { @@ -464,9 +478,7 @@ tbm_intersect(TIDBitmap *a, const TIDBitmap *b) else a->npages--; a->nentries--; - if (hash_search(a->pagetable, - (void *) &apage->blockno, - HASH_REMOVE, NULL) == NULL) + if (!pagetable_delete(a->pagetable, apage->blockno)) elog(ERROR, "hash table corrupted"); } } @@ -606,7 +618,7 @@ tbm_begin_iterate(TIDBitmap *tbm) */ if (tbm->status == TBM_HASH && !tbm->iterating) { - HASH_SEQ_STATUS status; + pagetable_iterator i; PagetableEntry *page; int npages; int nchunks; @@ -620,9 +632,9 @@ tbm_begin_iterate(TIDBitmap *tbm) MemoryContextAlloc(tbm->mcxt, tbm->nchunks * sizeof(PagetableEntry *)); - hash_seq_init(&status, tbm->pagetable); npages = nchunks = 0; - while ((page = (PagetableEntry *) hash_seq_search(&status)) != NULL) + pagetable_start_iterate(tbm->pagetable, &i); + while ((page = pagetable_iterate(tbm->pagetable, &i)) != NULL) { if (page->ischunk) tbm->schunks[nchunks++] = page; @@ -791,9 +803,7 @@ tbm_find_pageentry(const TIDBitmap *tbm, BlockNumber pageno) return page; } - page = (PagetableEntry *) hash_search(tbm->pagetable, - (void *) &pageno, - HASH_FIND, NULL); + page = pagetable_lookup(tbm->pagetable, pageno); if (page == NULL) return NULL; if (page->ischunk) @@ -833,16 +843,15 @@ tbm_get_pageentry(TIDBitmap *tbm, BlockNumber pageno) tbm_create_pagetable(tbm); } - /* Look up or create an entry */ - page = (PagetableEntry *) hash_search(tbm->pagetable, - (void *) &pageno, - HASH_ENTER, &found); + page = pagetable_insert(tbm->pagetable, pageno, &found); } /* Initialize it if not present before */ if (!found) { + char oldstatus = page->status; MemSet(page, 0, sizeof(PagetableEntry)); + page->status = oldstatus; page->blockno = pageno; /* must count it too */ tbm->nentries++; @@ -869,9 +878,9 @@ tbm_page_is_lossy(const TIDBitmap *tbm, BlockNumber pageno) bitno = pageno % PAGES_PER_CHUNK; chunk_pageno = pageno - bitno; - page = (PagetableEntry *) hash_search(tbm->pagetable, - (void *) &chunk_pageno, - HASH_FIND, NULL); + + page = pagetable_lookup(tbm->pagetable, chunk_pageno); + if (page != NULL && page->ischunk) { int wordnum = WORDNUM(bitno); @@ -912,9 +921,7 @@ tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno) */ if (bitno != 0) { - if (hash_search(tbm->pagetable, - (void *) &pageno, - HASH_REMOVE, NULL) != NULL) + if (pagetable_delete(tbm->pagetable, pageno)) { /* It was present, so adjust counts */ tbm->nentries--; @@ -922,15 +929,14 @@ tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno) } } - /* Look up or create entry for chunk-header page */ - page = (PagetableEntry *) hash_search(tbm->pagetable, - (void *) &chunk_pageno, - HASH_ENTER, &found); + page = pagetable_insert(tbm->pagetable, chunk_pageno, &found); /* Initialize it if not present before */ if (!found) { + char oldstatus = page->status; MemSet(page, 0, sizeof(PagetableEntry)); + page->status = oldstatus; page->blockno = chunk_pageno; page->ischunk = true; /* must count it too */ @@ -939,8 +945,10 @@ tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno) } else if (!page->ischunk) { + char oldstatus = page->status; /* chunk header page was formerly non-lossy, make it lossy */ MemSet(page, 0, sizeof(PagetableEntry)); + page->status = oldstatus; page->blockno = chunk_pageno; page->ischunk = true; /* we assume it had some tuple bit(s) set, so mark it lossy */ @@ -962,7 +970,7 @@ tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno) static void tbm_lossify(TIDBitmap *tbm) { - HASH_SEQ_STATUS status; + pagetable_iterator i; PagetableEntry *page; /* @@ -977,8 +985,8 @@ tbm_lossify(TIDBitmap *tbm) Assert(!tbm->iterating); Assert(tbm->status == TBM_HASH); - hash_seq_init(&status, tbm->pagetable); - while ((page = (PagetableEntry *) hash_seq_search(&status)) != NULL) + pagetable_start_iterate_at(tbm->pagetable, &i, tbm->lossify_start); + while ((page = pagetable_iterate(tbm->pagetable, &i)) != NULL) { if (page->ischunk) continue; /* already a chunk header */ @@ -996,14 +1004,15 @@ tbm_lossify(TIDBitmap *tbm) if (tbm->nentries <= tbm->maxentries / 2) { /* we have done enough */ - hash_seq_term(&status); + tbm->lossify_start = i.cur; break; } /* * Note: tbm_mark_page_lossy may have inserted a lossy chunk into the - * hashtable. We can continue the same seq_search scan since we do - * not care whether we visit lossy chunks or not. + * hashtable and may have deleted the non-lossy chunk. We can + * continue the same hash table scan, since failure to visit one + * element or visiting the newly inserted element, isn't fatal. */ } -- 2.39.5