From: Robert Haas Date: Tue, 7 Dec 2021 20:12:34 +0000 (-0500) Subject: some infrastructure for removing obsolete index pages X-Git-Url: http://git.postgresql.org/gitweb/static/gitweb.js?a=commitdiff_plain;h=8a16f04bc0fd2cf89c406452025692ac90dc0dbc;p=users%2Frhaas%2Fpostgres.git some infrastructure for removing obsolete index pages --- diff --git a/src/backend/access/conveyor/cbmetapage.c b/src/backend/access/conveyor/cbmetapage.c index e290c5238d..2abf7cc650 100644 --- a/src/backend/access/conveyor/cbmetapage.c +++ b/src/backend/access/conveyor/cbmetapage.c @@ -489,8 +489,11 @@ cb_metapage_remove_index_segment(CBMetapageData *meta, CBSegNo segno) * Examine the metapage state to determine how to go about recycling space. * * If the return value is CBM_OBSOLETE_SEGMENT_ENTRIES, then - * *oldest_index_segment will be set, and the caller should remove obsolete - * entries from that segment and/or the segment itself. + * *oldest_index_segment will be set to the segment number of the oldest index + * segment, and *index_vacuum_stop_point will be set to the oldest page number + * for which any index entry in the index pages should not be removed. The + * caller should remove index entries that precede that point from index + * segments, and if possible the segments themselves. * * If the return value is CBM_OBSOLETE_METAPAGE_ENTRIES, then *metapage_segno * will be set to a payload segment that can be deallocated, and @@ -509,6 +512,7 @@ cb_metapage_remove_index_segment(CBMetapageData *meta, CBSegNo segno) CBMObsoleteState cb_metapage_get_obsolete_state(CBMetapageData *meta, CBSegNo *oldest_index_segment, + CBPageNo *index_vacuum_stop_point, CBSegNo *metapage_segno, unsigned *metapage_offset) { @@ -559,6 +563,9 @@ cb_metapage_get_obsolete_state(CBMetapageData *meta, if (meta->cbm_oldest_index_segment != CB_INVALID_SEGMENT) { *oldest_index_segment = meta->cbm_oldest_index_segment; + *index_vacuum_stop_point = + Min(meta->cbm_index_metapage_start, + meta->cbm_oldest_logical_page); return CBM_OBSOLETE_SEGMENT_ENTRIES; } diff --git a/src/backend/access/conveyor/cbmodify.c b/src/backend/access/conveyor/cbmodify.c index 3f34b40dce..a609ff5958 100644 --- a/src/backend/access/conveyor/cbmodify.c +++ b/src/backend/access/conveyor/cbmodify.c @@ -617,15 +617,18 @@ cb_recycle_payload_segment(RelFileNode *rnode, /* * Deallocate an index segment. * - * indexblock and indexbuffer shuolud refer to the first block of the segment + * indexblock and indexbuffer should refer to the first block of the segment * to be deallocated. It's the oldest index segment, so we can't clear it * in advance, else we'd lose track of what other index segments exist. * * fsmblock and fsmbuffer should refer to the FSM page that contains the * FSM bit for the segment to be freed. If the segment is covered by the * metapage, pass InvalidBlockNumber and InvalidBuffer, respectively. + * + * The return value is the segment number of the oldest index segment that + * remains after the operation, or CB_INVALID_SEGMENT if none. */ -void +CBSegNo cb_recycle_index_segment(RelFileNode *rnode, ForkNumber fork, Buffer metabuffer, diff --git a/src/backend/access/conveyor/conveyor.c b/src/backend/access/conveyor/conveyor.c index 586d518085..97a5399082 100644 --- a/src/backend/access/conveyor/conveyor.c +++ b/src/backend/access/conveyor/conveyor.c @@ -29,6 +29,10 @@ static CBSegNo ConveyorSearchFSMPages(ConveyorBelt *cb, Buffer *fsmbuffer); static void ConveyorBeltClearSegment(ConveyorBelt *cb, CBSegNo segno, bool include_first_page); +static CBSegNo ConveyorBeltFreeOldestIndexSegment(ConveyorBelt *cb, + Buffer metabuffer, + CBSegNo oldest_index_segment, + CBPageNo index_vacuum_stop_point); static Buffer ConveyorBeltExtend(ConveyorBelt *cb, BlockNumber blkno, BlockNumber *possibly_not_on_disk_blkno); static BlockNumber ConveyorBeltFSMBlockNumber(ConveyorBelt *cb, @@ -987,6 +991,13 @@ ConveyorBeltLogicalTruncate(ConveyorBelt *cb, CBPageNo oldest_keeper) UnlockReleaseBuffer(metabuffer); } +static unsigned +ConveyorBeltClearIndexEntries(ConveyorBelt *cb, CBSegNo oldest_index_segment, + CBPageNo index_vacuum_stop_point) +{ + elog(ERROR, "ConveyorBeltClearIndexEntries not implemented yet"); +} + /* * Recycle segments that are no longer needed. * @@ -1004,6 +1015,7 @@ ConveyorBeltVacuum(ConveyorBelt *cb) Buffer fsmbuffer = InvalidBuffer; CBSegNo cleared_segno = CB_INVALID_SEGMENT; bool needs_xlog; + bool cleaned_index_segments = false; /* Do any changes we make here need to be WAL-logged? */ needs_xlog = RelationNeedsWAL(cb->cb_rel) || cb->cb_fork == INIT_FORKNUM; @@ -1026,6 +1038,7 @@ ConveyorBeltVacuum(ConveyorBelt *cb) CBMetapageData *meta; CBMObsoleteState obsolete_state; CBSegNo oldest_index_segment; + CBPageNo index_vacuum_stop_point; CBSegNo metapage_segno; unsigned metapage_offset; @@ -1033,6 +1046,7 @@ ConveyorBeltVacuum(ConveyorBelt *cb) meta = cb_metapage_get_special(BufferGetPage(metabuffer)); obsolete_state = cb_metapage_get_obsolete_state(meta, &oldest_index_segment, + &index_vacuum_stop_point, &metapage_segno, &metapage_offset); /* @@ -1127,24 +1141,170 @@ ConveyorBeltVacuum(ConveyorBelt *cb) } else if (obsolete_state == CBM_OBSOLETE_SEGMENT_ENTRIES) { + unsigned empty_index_segments; + /* - * XXX. - * - * 1. Walk the chain of index segments while keeping a pin on the - * metabuffer and the current index segment. - * - * 2. As we do, reinitialize payload segments and free them. + * Do this part just once. A single pass through the logic below + * should clean out the index segments as completely as possible, + * so if we end up here again, either the logical truncation point + * changed concurrently, or there's actually nothing to do. Even + * in the former case, it's OK to return without doing anything + * further, because this function only promises to clean up data + * that was no longer needed as of the time it was called. It makes + * no promises about cleaning up things that became obsolete once + * this function was already running. + */ + if (cleaned_index_segments) + { + UnlockReleaseBuffer(metabuffer); + break; + } + cleaned_index_segments = true; + + /* + * Release lock on metapage before locking other pages, but keep + * the pin for efficiency and so that no index segments can + * disappear concurrently. + */ + LockBuffer(metabuffer, BUFFER_LOCK_UNLOCK); + + /* + * Clear as many obsolete index entries out of index segments as + * we can. + */ + empty_index_segments = + ConveyorBeltClearIndexEntries(cb, oldest_index_segment, + index_vacuum_stop_point); + + /* + * If even the oldest index segment is still partially in use, + * then all newer ones are needed also, and likewise everything in + * the metapage, which means no further cleanup is possible. + */ + if (empty_index_segments == 0) + { + ReleaseBuffer(metabuffer); + return; + } + + /* + * Free old index segments. * - * 3. Then, get a cleanup lock on the metapage and try to free as - * many old index segments as we can. We can remember which ones - * are eligible based on what we know we cleared (or found - * already cleared). + * We might stop before freeing the requested number of index + * segments, due to concurrent locking. If that happens, + * give up on performing any further cleanup. + */ + while (empty_index_segments > 0) + { + oldest_index_segment = + ConveyorBeltFreeOldestIndexSegment(cb, metabuffer, + oldest_index_segment, + index_vacuum_stop_point); + --empty_index_segments; + if (empty_index_segments > 0 && + oldest_index_segment == CB_INVALID_SEGMENT) + { + ReleaseBuffer(metabuffer); + return; + } + } + + /* + * If we freed some but not all index segments, all the entries in + * the metapage are still needed, so there is no point in trying to + * clean it up. */ - elog(ERROR, "CBM_OBSOLETE_SEGMENT_ENTRIES case is not implemented yet"); + if (oldest_index_segment != CB_INVALID_SEGMENT) + { + ReleaseBuffer(metabuffer); + return; + } + + /* + * Relock the metapage prior to looping around. We may still be + * able to clear index entries from the metapage, or adjust the + * start of the metapage index. + */ + LockBuffer(metabuffer, BUFFER_LOCK_EXCLUSIVE); } } } +/* + * Attempt to remve the oldest index segment. + * + * The return value is the segment number of the oldest index segment that + * remains after the operation has been completed. If no index segments remain + * after the operation or if the operation cannot be completed, the return + * value is CB_INVALID_SEGMENT. + */ +static CBSegNo +ConveyorBeltFreeOldestIndexSegment(ConveyorBelt *cb, Buffer metabuffer, + CBSegNo oldest_index_segment, + CBPageNo index_vacuum_stop_point) +{ + BlockNumber firstindexblock; + Buffer firstindexbuffer; + BlockNumber fsmblock; + Buffer fsmbuffer; + bool needs_xlog; + CBSegNo oldest_remaining_index_segment = CB_INVALID_SEGMENT; + + /* + * Read and pin the first block of the index segment. The others will + * already have been cleared, but the first one has to stick around until + * we actually deallocate the segment, so that it remains possible to + * walk the chain of index segments. + */ + needs_xlog = RelationNeedsWAL(cb->cb_rel) || cb->cb_fork == INIT_FORKNUM; + firstindexblock = cb_segment_to_block(cb->cb_pages_per_segment, + oldest_index_segment, 0); + firstindexbuffer = ReadBufferExtended(cb->cb_rel, cb->cb_fork, + firstindexblock, RBM_NORMAL, NULL); + + /* + * Also read and pin the appropriate FSM page, unless the busy/free status + * of this segment is stored in the metapage. + */ + fsmblock = cb_segment_to_fsm_block(cb->cb_pages_per_segment, + oldest_index_segment); + if (fsmblock == InvalidBlockNumber) + fsmbuffer = InvalidBuffer; + else + fsmbuffer = ReadBufferExtended(cb->cb_rel, cb->cb_fork, + fsmblock, RBM_NORMAL, NULL); + + /* + * The lock ordering described in the README requires the metapage lock + * to be taken last, but it also requires that freeing an index segment + * take a cleanup lock on the metapage. Since a concurrent reader will + * hold a pin on the metapage when trying to lock the first index page, + * we can't lock the first index page and then wait for a cleanup lock + * on the metapage, because that might deadlock. + * + * To get around that problem, we take the cleanup lock on the metabuffer + * conditionally. If we can't get it, we just skip freeing the oldest + * index segment. That's not great, but it's not obvious how we can do + * any better. + */ + LockBuffer(firstindexbuffer, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(fsmbuffer, BUFFER_LOCK_EXCLUSIVE); + if (ConditionalLockBufferForCleanup(metabuffer)) + { + oldest_remaining_index_segment = + cb_recycle_index_segment(&RelationGetSmgr(cb->cb_rel)->smgr_rnode.node, + cb->cb_fork, metabuffer, + firstindexblock, firstindexbuffer, + fsmblock, fsmbuffer, + oldest_index_segment, needs_xlog); + LockBuffer(metabuffer, BUFFER_LOCK_UNLOCK); + } + UnlockReleaseBuffer(fsmbuffer); + UnlockReleaseBuffer(firstindexbuffer); + + return oldest_remaining_index_segment; +} + /* * Clear all pages in a segment, or alternatively all pages in a segment * except for the first one. The segment can be a payload segment that isn't diff --git a/src/include/access/cbfsmpage.h b/src/include/access/cbfsmpage.h index e1181400eb..40e0133d65 100644 --- a/src/include/access/cbfsmpage.h +++ b/src/include/access/cbfsmpage.h @@ -104,4 +104,24 @@ cb_first_segment_for_fsm_page(BlockNumber blkno, uint16 pages_per_segment) + (fsm_index * CB_FSM_SEGMENTS_PER_FSMPAGE); } +/* + * Figure out which FSM block covers a certain segment number. + * + * If the FSM entry for the indicated segment is in the metapage, the return + * value is InvalidBlockNumber. + */ +static inline BlockNumber +cb_segment_to_fsm_block(uint16 pages_per_segment, CBSegNo segno) +{ + BlockNumber first_fsm_block = cb_first_fsm_block(pages_per_segment); + unsigned fsm_block_spacing = cb_fsm_block_spacing(pages_per_segment); + unsigned fsm_block_index; + + if (segno < CB_FSM_SEGMENTS_FOR_METAPAGE) + return InvalidBlockNumber; + fsm_block_index = + (segno - CB_FSM_SEGMENTS_FOR_METAPAGE) / CB_FSM_SEGMENTS_PER_FSMPAGE; + return first_fsm_block + (fsm_block_index * fsm_block_spacing); +} + #endif /* CBFSMPAGE_H */ diff --git a/src/include/access/cbmetapage.h b/src/include/access/cbmetapage.h index 9307bd3820..490130c339 100644 --- a/src/include/access/cbmetapage.h +++ b/src/include/access/cbmetapage.h @@ -159,6 +159,7 @@ extern void cb_metapage_remove_index_segment(CBMetapageData *meta, CBSegNo segno); extern CBMObsoleteState cb_metapage_get_obsolete_state(CBMetapageData *meta, CBSegNo *oldest_index_segment, + CBPageNo *index_vacuum_stop_point, CBSegNo *metapage_segno, unsigned *metapage_offset); extern void cb_metapage_clear_obsolete_index_entry(CBMetapageData *meta, diff --git a/src/include/access/cbmodify.h b/src/include/access/cbmodify.h index 8850b0279f..4d31b4b175 100644 --- a/src/include/access/cbmodify.h +++ b/src/include/access/cbmodify.h @@ -110,15 +110,15 @@ extern void cb_recycle_payload_segment(RelFileNode *rnode, unsigned pageoffset, bool needs_xlog); -extern void cb_recycle_index_segment(RelFileNode *rnode, - ForkNumber fork, - Buffer metabuffer, - BlockNumber indexblock, - Buffer indexbuffer, - BlockNumber fsmblock, - Buffer fsmbuffer, - CBSegNo segno, - bool needs_xlog); +extern CBSegNo cb_recycle_index_segment(RelFileNode *rnode, + ForkNumber fork, + Buffer metabuffer, + BlockNumber indexblock, + Buffer indexbuffer, + BlockNumber fsmblock, + Buffer fsmbuffer, + CBSegNo segno, + bool needs_xlog); extern void cb_shift_metapage_index(RelFileNode *rnode, ForkNumber fork,