From: Robert Haas Date: Mon, 20 Sep 2021 19:21:23 +0000 (-0400) Subject: code to add index segments X-Git-Url: http://git.postgresql.org/gitweb/static/gitweb.js?a=commitdiff_plain;h=aeb4fea4072c193adf1b530f34c73474fde9e169;p=users%2Frhaas%2Fpostgres.git code to add index segments --- diff --git a/src/backend/access/conveyor/cbmetapage.c b/src/backend/access/conveyor/cbmetapage.c index 8292f1ef4f..9b8039309c 100644 --- a/src/backend/access/conveyor/cbmetapage.c +++ b/src/backend/access/conveyor/cbmetapage.c @@ -45,10 +45,10 @@ cb_metapage_initialize(Page page, uint16 pages_per_segment) meta->cbm_pages_per_segment = pages_per_segment; /* - * PageInit has already zeroed the page, so we only need to initialize - * any fields that need to be non-zero. Everything of type CBPageNo - * and all of the freespace map should start out as 0, but most of the - * fields of CBSegNo fields need to be set to CB_INVALID_SEGMENT. + * PageInit has already zeroed the page, so we only need to initialize any + * fields that need to be non-zero. Everything of type CBPageNo and all of + * the freespace map should start out as 0, but most of the fields of + * CBSegNo fields need to be set to CB_INVALID_SEGMENT. */ meta->cbm_oldest_index_segment = CB_INVALID_SEGMENT; meta->cbm_newest_index_segment = CB_INVALID_SEGMENT; @@ -133,7 +133,11 @@ cb_metapage_find_logical_page(CBMetapageData *meta, * * Regardless of the return value, *next_pageno and *next_segno will be * set to the lowest-numbered logical page that is not allocated and the - * lowest segment number that is not allocated, respectively. + * lowest segment number that is not allocated, respectively. In addition, + * *index_metapage_start will be set to the first logical page number + * covered by the metapage portion of the index, and *newest_index_segment + * will be set to the segment number of the newest index segment, or + * CB_INVALID_SEGMENT if there is none. * * If the return value is CBM_INSERT_OK, there is an unfilled payload segment, * and *blkno will be set to the block number of the first unused page in that @@ -143,7 +147,9 @@ CBMInsertState cb_metapage_get_insert_state(CBMetapageData *meta, BlockNumber *blkno, CBPageNo *next_pageno, - CBSegNo *next_segno) + CBSegNo *next_segno, + CBPageNo *index_metapage_start, + CBSegNo *newest_index_segment) { CBPageNo relp; CBSegNo segno; @@ -152,27 +158,29 @@ cb_metapage_get_insert_state(CBMetapageData *meta, /* Set the values that we return unconditionally. */ *next_pageno = meta->cbm_next_logical_page; *next_segno = meta->cbm_next_segment; + *index_metapage_start = meta->cbm_index_metapage_start; + *newest_index_segment = meta->cbm_newest_index_segment; /* Compute next logical page number relative to start of metapage. */ relp = meta->cbm_next_logical_page - meta->cbm_index_metapage_start; /* - * If the next logical page number doesn't fit on the metapage, we need - * to make space by relocating some index entries to an index segment. + * If the next logical page number doesn't fit on the metapage, we need to + * make space by relocating some index entries to an index segment. * * Potentially, we could instead clean out some index entries from the - * metapage that now precede the logical truncation point, but that - * would require a cleanup lock on the metapage, and it normally isn't - * going to be possible, because typically the last truncate operation - * will have afterward done any such work that is possible. We might miss - * an opportunity in the case where the last truncate operation didn't - * clean up fully, but hopefully that's rare enough that we don't need - * to stress about it. + * metapage that now precede the logical truncation point, but that would + * require a cleanup lock on the metapage, and it normally isn't going to + * be possible, because typically the last truncate operation will have + * afterward done any such work that is possible. We might miss an + * opportunity in the case where the last truncate operation didn't clean + * up fully, but hopefully that's rare enough that we don't need to stress + * about it. * * If the newest index segment is already full, then a new index segment - * will need to be created. Otherwise, some entries can be copied into - * the existing index segment. To make things easier for the caller, there - * is a metapage flag to tell us which situation prevails. + * will need to be created. Otherwise, some entries can be copied into the + * existing index segment. To make things easier for the caller, there is + * a metapage flag to tell us which situation prevails. */ if (relp >= CB_METAPAGE_INDEX_ENTRIES * meta->cbm_pages_per_segment) { @@ -214,7 +222,8 @@ cb_metapage_advance_next_logical_page(CBMetapageData *meta, /* Perform sanity checks. */ if (cb_metapage_get_insert_state(meta, &expected_blkno, &dummy_pageno, - &dummy_segno) != CBM_INSERT_OK) + &dummy_segno, &dummy_pageno, &dummy_segno) + != CBM_INSERT_OK) elog(ERROR, "no active insertion segment"); if (blkno != expected_blkno) elog(ERROR, "new page is at block %u but expected block %u", @@ -453,7 +462,7 @@ cb_metapage_find_free_segment(CBMetapageData *meta) for (i = 0; i < CB_METAPAGE_FREESPACE_BYTES; i += sizeof(uint64)) { - uint64 word = * (uint64 *) &meta->cbm_freespace_map[i]; + uint64 word = *(uint64 *) &meta->cbm_freespace_map[i]; if (word != PG_UINT64_MAX) { diff --git a/src/backend/access/conveyor/conveyor.c b/src/backend/access/conveyor/conveyor.c index aa56050bf6..9a81e46c30 100644 --- a/src/backend/access/conveyor/conveyor.c +++ b/src/backend/access/conveyor/conveyor.c @@ -154,8 +154,12 @@ ConveyorBeltOpen(Relation rel, ForkNumber fork, MemoryContext mcxt) Buffer ConveyorBeltGetNewPage(ConveyorBelt *cb, CBPageNo *pageno) { - BlockNumber fsmblock; + BlockNumber indexblock = InvalidBlockNumber; + BlockNumber prevblock = InvalidBlockNumber; + BlockNumber fsmblock = InvalidBlockNumber; Buffer metabuffer; + Buffer indexbuffer = InvalidBuffer; + Buffer prevbuffer = InvalidBuffer; Buffer fsmbuffer = InvalidBuffer; Buffer buffer; CBPageNo next_pageno; @@ -213,6 +217,8 @@ ConveyorBeltGetNewPage(ConveyorBelt *cb, CBPageNo *pageno) CBMetapageData *meta; CBMInsertState insert_state; BlockNumber next_blkno; + CBPageNo index_metapage_start; + CBSegNo newest_index_segment; CBSegNo next_segno; bool can_allocate_segment; @@ -228,14 +234,16 @@ ConveyorBeltGetNewPage(ConveyorBelt *cb, CBPageNo *pageno) * lock on the metapage. * * NB: Our rule is that the lock on the metapage is acquired last, - * after all other buffer locks. If any of fsmbuffer, idxbuffer, and - * newidxbuffer are valid, they are also exclusively locked at this + * after all other buffer locks. If any of indexbuffer, prevbuffer, + * and fsmbuffer are valid, they are also exclusively locked at this * point. */ LockBuffer(metabuffer, mode); meta = cb_metapage_get_special(BufferGetPage(metabuffer)); insert_state = cb_metapage_get_insert_state(meta, &next_blkno, - &next_pageno, &next_segno); + &next_pageno, &next_segno, + &index_metapage_start, + &newest_index_segment); /* * If we need to allocate a payload or index segment, and we don't @@ -248,53 +256,92 @@ ConveyorBeltGetNewPage(ConveyorBelt *cb, CBPageNo *pageno) free_segno = cb_metapage_find_free_segment(meta); /* - * We cannot allocate a segment unless at least the first page of that - * segment is guaranteed to be on disk. This is certain to be true for - * any segment that's been allocated previously, but otherwise it's - * only true if we've verified that the size of the relation on disk - * is large enough. + * If we need a new payload or index segment, see whether it's + * possible to complete that operation on this trip through the loop. + * + * This will only be possible if we've got an exclusive lock on the + * metapage. + * + * Furthermore, by rule, we cannot allocate a segment unless at least + * the first page of that segment is guaranteed to be on disk. This is + * certain to be true for any segment that's been allocated + * previously, but otherwise it's only true if we've verified that the + * size of the relation on disk is large enough. */ - can_allocate_segment = (mode == BUFFER_LOCK_EXCLUSIVE) - && (free_segno != CB_INVALID_SEGMENT) - && (free_segno < next_segno || - free_segno < possibly_not_on_disk_segno); + can_allocate_segment = + (insert_state != CBM_INSERT_NEEDS_PAYLOAD_SEGMENT + || insert_state != CBM_INSERT_NEEDS_INDEX_SEGMENT) && + mode == BUFFER_LOCK_EXCLUSIVE && + (free_segno != CB_INVALID_SEGMENT) && + (free_segno < next_segno || + free_segno < possibly_not_on_disk_segno); /* - * If the metapage says that we need a payload segment, and on a - * previous trip through this loop we identified a candidate segment, - * then see if we can allocate it. + * If it still looks like we can allocate, check for the case where we + * need a new index segment but don't have the other required buffer + * locks. */ - if (insert_state == CBM_INSERT_NEEDS_PAYLOAD_SEGMENT && - can_allocate_segment) - { - bool segment_still_free; - - Assert(mode == BUFFER_LOCK_EXCLUSIVE); + if (can_allocate_segment && + insert_state != CBM_INSERT_NEEDS_INDEX_SEGMENT && + !BufferIsValid(indexbuffer) && + !BufferIsValid(prevbuffer)) + can_allocate_segment = false; + /* + * If it still looks like we can allocate, check for the case where + * the segment we planned to allocate is no longer free. + */ + if (can_allocate_segment) + { /* fsmbuffer, if valid, is already exclusively locked. */ if (BufferIsValid(fsmbuffer)) - segment_still_free = - cb_fsmpage_get_fsm_bit(BufferGetPage(fsmbuffer), - free_segno); + can_allocate_segment = + !cb_fsmpage_get_fsm_bit(BufferGetPage(fsmbuffer), + free_segno); else - segment_still_free = cb_metapage_get_fsm_bit(meta, free_segno); + can_allocate_segment = + !cb_metapage_get_fsm_bit(meta, free_segno); + } - /* - * If the target segment is still free, we can go ahead and - * allocate it now. After that, we know there is a non-full - * payload segment and can plan to try to grab the first page. - */ - if (segment_still_free) + /* If it STILL looks like we can allocate, do it! */ + if (can_allocate_segment) + { + if (insert_state == CBM_INSERT_NEEDS_PAYLOAD_SEGMENT) { cb_allocate_payload_segment(cb->cb_insert_relfilenode, cb->cb_fork, metabuffer, fsmblock, fsmbuffer, free_segno, free_segno >= next_segno, needs_xlog); + + /* + * We know for sure that there's now a payload segment that + * isn't full - and we know exactly where it's located. + */ insert_state = CBM_INSERT_OK; next_blkno = cb_segment_to_block(cb->cb_pages_per_segment, free_segno, 0); } + else + { + Assert(insert_state == CBM_INSERT_NEEDS_INDEX_SEGMENT); + + cb_allocate_index_segment(cb->cb_insert_relfilenode, + cb->cb_fork, metabuffer, + indexblock, indexbuffer, + prevblock, prevbuffer, + fsmblock, fsmbuffer, free_segno, + index_metapage_start, + free_segno >= next_segno, + needs_xlog); + + /* + * We know for sure that there's now an index segment that + * isn't full, and our next move must be to relocate some + * index entries to that index segment. + */ + insert_state = CBM_INSERT_NEEDS_INDEX_ENTRIES_RELOCATED; + } /* * Whether we allocated or not, the segment we intended to @@ -305,6 +352,18 @@ ConveyorBeltGetNewPage(ConveyorBelt *cb, CBPageNo *pageno) /* Release buffer locks and, except for the metapage, also pins. */ LockBuffer(metabuffer, BUFFER_LOCK_UNLOCK); + if (BufferIsValid(indexbuffer)) + { + UnlockReleaseBuffer(indexbuffer); + indexblock = InvalidBlockNumber; + indexbuffer = InvalidBuffer; + } + if (BufferIsValid(prevbuffer)) + { + UnlockReleaseBuffer(prevbuffer); + prevblock = InvalidBlockNumber; + prevbuffer = InvalidBuffer; + } if (BufferIsValid(fsmbuffer)) { UnlockReleaseBuffer(fsmbuffer); @@ -356,7 +415,26 @@ ConveyorBeltGetNewPage(ConveyorBelt *cb, CBPageNo *pageno) elog(ERROR, "XXX relocating index entries is not implemented yet"); } - /* Do we need a new segment? */ + /* + * If we need to add a new index segment, we'll have to update the + * newest index page with a pointer to the index page we're going to + * add, so we must read and pin that page. + * + * The names "prevblock" and "prevbuffer" are intended to signify that + * what is currently the newest index segment will become the previous + * segment relative to the one we're going to add. + */ + if (insert_state == CBM_INSERT_NEEDS_INDEX_SEGMENT) + { + prevblock = cb_segment_to_block(cb->cb_pages_per_segment, + newest_index_segment, 0); + prevbuffer = ConveyorBeltRead(cb, next_blkno, BUFFER_LOCK_SHARE); + } + + /* + * If we need to add a new segment of either type, make provisions to + * do so. + */ if (insert_state == CBM_INSERT_NEEDS_PAYLOAD_SEGMENT || insert_state == CBM_INSERT_NEEDS_INDEX_SEGMENT) { @@ -421,7 +499,14 @@ ConveyorBeltGetNewPage(ConveyorBelt *cb, CBPageNo *pageno) buffer = ReadBufferExtended(cb->cb_rel, cb->cb_fork, P_NEW, RBM_NORMAL, NULL); - ReleaseBuffer(buffer); + if (nblocks < free_block || + insert_state != CBM_INSERT_NEEDS_INDEX_SEGMENT) + ReleaseBuffer(buffer); + else + { + indexblock = nblocks; + indexbuffer = buffer; + } ++nblocks; } } @@ -435,16 +520,15 @@ ConveyorBeltGetNewPage(ConveyorBelt *cb, CBPageNo *pageno) } } - if (insert_state == CBM_INSERT_NEEDS_INDEX_SEGMENT) - { - elog(ERROR, "XXX creating index segments is not implemented yet"); - } - /* * Prepare for next attempt by reacquiring all relevant buffer locks, * except for the one on the metapage, which is acquired at the top of * the loop. */ + if (BufferIsValid(indexbuffer)) + LockBuffer(indexbuffer, BUFFER_LOCK_EXCLUSIVE); + if (BufferIsValid(prevbuffer)) + LockBuffer(prevbuffer, BUFFER_LOCK_EXCLUSIVE); if (BufferIsValid(fsmbuffer)) LockBuffer(fsmbuffer, BUFFER_LOCK_EXCLUSIVE); } diff --git a/src/include/access/cbmetapage.h b/src/include/access/cbmetapage.h index 55a1623c55..008ffd6996 100644 --- a/src/include/access/cbmetapage.h +++ b/src/include/access/cbmetapage.h @@ -99,7 +99,9 @@ extern CBMInsertState cb_metapage_find_next_logical_page(CBMetapageData *meta, extern CBMInsertState cb_metapage_get_insert_state(CBMetapageData *meta, BlockNumber *blkno, CBPageNo *next_pageno, - CBSegNo *next_segno); + CBSegNo *next_segno, + CBPageNo *index_metapage_start, + CBSegNo *newest_index_segment); extern void cb_metapage_advance_next_logical_page(CBMetapageData *meta, BlockNumber blkno); extern void cb_metapage_advance_oldest_logical_page(CBMetapageData *meta,