some infrastructure for removing obsolete index pages
authorRobert Haas <[email protected]>
Tue, 7 Dec 2021 20:12:34 +0000 (15:12 -0500)
committerRobert Haas <[email protected]>
Tue, 7 Dec 2021 20:12:34 +0000 (15:12 -0500)
src/backend/access/conveyor/cbmetapage.c
src/backend/access/conveyor/cbmodify.c
src/backend/access/conveyor/conveyor.c
src/include/access/cbfsmpage.h
src/include/access/cbmetapage.h
src/include/access/cbmodify.h

index e290c5238ddec3377520cb8ae9b4c0de410d06fd..2abf7cc650f08d4ea61c5ecc8a700f74c4a65e47 100644 (file)
@@ -489,8 +489,11 @@ cb_metapage_remove_index_segment(CBMetapageData *meta, CBSegNo segno)
  * Examine the metapage state to determine how to go about recycling space.
  *
  * If the return value is CBM_OBSOLETE_SEGMENT_ENTRIES, then
- * *oldest_index_segment will be set, and the caller should remove obsolete
- * entries from that segment and/or the segment itself.
+ * *oldest_index_segment will be set to the segment number of the oldest index
+ * segment, and *index_vacuum_stop_point will be set to the oldest page number
+ * for which any index entry in the index pages should not be removed. The
+ * caller should remove index entries that precede that point from index
+ * segments, and if possible the segments themselves.
  *
  * If the return value is CBM_OBSOLETE_METAPAGE_ENTRIES, then *metapage_segno
  * will be set to a payload segment that can be deallocated, and
@@ -509,6 +512,7 @@ cb_metapage_remove_index_segment(CBMetapageData *meta, CBSegNo segno)
 CBMObsoleteState
 cb_metapage_get_obsolete_state(CBMetapageData *meta,
                                                           CBSegNo *oldest_index_segment,
+                                                          CBPageNo *index_vacuum_stop_point,
                                                           CBSegNo *metapage_segno,
                                                           unsigned *metapage_offset)
 {
@@ -559,6 +563,9 @@ cb_metapage_get_obsolete_state(CBMetapageData *meta,
        if (meta->cbm_oldest_index_segment != CB_INVALID_SEGMENT)
        {
                *oldest_index_segment = meta->cbm_oldest_index_segment;
+               *index_vacuum_stop_point =
+                       Min(meta->cbm_index_metapage_start,
+                               meta->cbm_oldest_logical_page);
                return CBM_OBSOLETE_SEGMENT_ENTRIES;
        }
 
index 3f34b40dce028d2c980df3464c93cffe7b420689..a609ff5958c15c11c42149a4f3569a538c943e3d 100644 (file)
@@ -617,15 +617,18 @@ cb_recycle_payload_segment(RelFileNode *rnode,
 /*
  * Deallocate an index segment.
  *
- * indexblock and indexbuffer shuolud refer to the first block of the segment
+ * indexblock and indexbuffer should refer to the first block of the segment
  * to be deallocated. It's the oldest index segment, so we can't clear it
  * in advance, else we'd lose track of what other index segments exist.
  *
  * fsmblock and fsmbuffer should refer to the FSM page that contains the
  * FSM bit for the segment to be freed. If the segment is covered by the
  * metapage, pass InvalidBlockNumber and InvalidBuffer, respectively.
+ *
+ * The return value is the segment number of the oldest index segment that
+ * remains after the operation, or CB_INVALID_SEGMENT if none.
  */
-void
+CBSegNo
 cb_recycle_index_segment(RelFileNode *rnode,
                                                 ForkNumber fork,
                                                 Buffer metabuffer,
index 586d51808559d7e4cd4424ff94b338f15ed321a4..97a5399082dd6bd047adb3d113762bbc46bb6e69 100644 (file)
@@ -29,6 +29,10 @@ static CBSegNo ConveyorSearchFSMPages(ConveyorBelt *cb,
                                                                          Buffer *fsmbuffer);
 static void ConveyorBeltClearSegment(ConveyorBelt *cb, CBSegNo segno,
                                                                         bool include_first_page);
+static CBSegNo ConveyorBeltFreeOldestIndexSegment(ConveyorBelt *cb,
+                                                                                                 Buffer metabuffer,
+                                                                                                 CBSegNo oldest_index_segment,
+                                                                                                 CBPageNo index_vacuum_stop_point);
 static Buffer ConveyorBeltExtend(ConveyorBelt *cb, BlockNumber blkno,
                                                                 BlockNumber *possibly_not_on_disk_blkno);
 static BlockNumber ConveyorBeltFSMBlockNumber(ConveyorBelt *cb,
@@ -987,6 +991,13 @@ ConveyorBeltLogicalTruncate(ConveyorBelt *cb, CBPageNo oldest_keeper)
        UnlockReleaseBuffer(metabuffer);
 }
 
+static unsigned
+ConveyorBeltClearIndexEntries(ConveyorBelt *cb, CBSegNo oldest_index_segment,
+                                                         CBPageNo index_vacuum_stop_point)
+{
+       elog(ERROR, "ConveyorBeltClearIndexEntries not implemented yet");
+}
+
 /*
  * Recycle segments that are no longer needed.
  *
@@ -1004,6 +1015,7 @@ ConveyorBeltVacuum(ConveyorBelt *cb)
        Buffer          fsmbuffer = InvalidBuffer;
        CBSegNo         cleared_segno = CB_INVALID_SEGMENT;
        bool            needs_xlog;
+       bool            cleaned_index_segments = false;
 
        /* Do any changes we make here need to be WAL-logged? */
        needs_xlog = RelationNeedsWAL(cb->cb_rel) || cb->cb_fork == INIT_FORKNUM;
@@ -1026,6 +1038,7 @@ ConveyorBeltVacuum(ConveyorBelt *cb)
                CBMetapageData     *meta;
                CBMObsoleteState        obsolete_state;
                CBSegNo         oldest_index_segment;
+               CBPageNo        index_vacuum_stop_point;
                CBSegNo         metapage_segno;
                unsigned        metapage_offset;
 
@@ -1033,6 +1046,7 @@ ConveyorBeltVacuum(ConveyorBelt *cb)
                meta = cb_metapage_get_special(BufferGetPage(metabuffer));
                obsolete_state =
                        cb_metapage_get_obsolete_state(meta, &oldest_index_segment,
+                                                                                  &index_vacuum_stop_point,
                                                                                   &metapage_segno, &metapage_offset);
 
                /*
@@ -1127,24 +1141,170 @@ ConveyorBeltVacuum(ConveyorBelt *cb)
                }
                else if (obsolete_state == CBM_OBSOLETE_SEGMENT_ENTRIES)
                {
+                       unsigned        empty_index_segments;
+
                        /*
-                        * XXX.
-                        *
-                        * 1. Walk the chain of index segments while keeping a pin on the
-                        * metabuffer and the current index segment.
-                        *
-                        * 2. As we do, reinitialize payload segments and free them.
+                        * Do this part just once. A single pass through the logic below
+                        * should clean out the index segments as completely as possible,
+                        * so if we end up here again, either the logical truncation point
+                        * changed concurrently, or there's actually nothing to do. Even
+                        * in the former case, it's OK to return without doing anything
+                        * further, because this function only promises to clean up data
+                        * that was no longer needed as of the time it was called. It makes
+                        * no promises about cleaning up things that became obsolete once
+                        * this function was already running.
+                        */
+                       if (cleaned_index_segments)
+                       {
+                               UnlockReleaseBuffer(metabuffer);
+                               break;
+                       }
+                       cleaned_index_segments = true;
+
+                       /*
+                        * Release lock on metapage before locking other pages, but keep
+                        * the pin for efficiency and so that no index segments can
+                        * disappear concurrently.
+                        */
+                       LockBuffer(metabuffer, BUFFER_LOCK_UNLOCK);
+
+                       /*
+                        * Clear as many obsolete index entries out of index segments as
+                        * we can.
+                        */
+                       empty_index_segments =
+                               ConveyorBeltClearIndexEntries(cb, oldest_index_segment,
+                                                                                         index_vacuum_stop_point);
+
+                       /*
+                        * If even the oldest index segment is still partially in use,
+                        * then all newer ones are needed also, and likewise everything in
+                        * the metapage, which means no further cleanup is possible.
+                        */
+                       if (empty_index_segments == 0)
+                       {
+                               ReleaseBuffer(metabuffer);
+                               return;
+                       }
+
+                       /*
+                        * Free old index segments.
                         *
-                        * 3. Then, get a cleanup lock on the metapage and try to free as
-                        * many old index segments as we can. We can remember which ones
-                        * are eligible based on what we know we cleared (or found
-                        * already cleared).
+                        * We might stop before freeing the requested number of index
+                        * segments, due to concurrent locking. If that happens,
+                        * give up on performing any further cleanup.
+                        */
+                       while (empty_index_segments > 0)
+                       {
+                               oldest_index_segment =
+                                       ConveyorBeltFreeOldestIndexSegment(cb, metabuffer,
+                                                                                                          oldest_index_segment,
+                                                                                                          index_vacuum_stop_point);
+                               --empty_index_segments;
+                               if (empty_index_segments > 0 &&
+                                       oldest_index_segment == CB_INVALID_SEGMENT)
+                               {
+                                       ReleaseBuffer(metabuffer);
+                                       return;
+                               }
+                       }
+
+                       /*
+                        * If we freed some but not all index segments, all the entries in
+                        * the metapage are still needed, so there is no point in trying to
+                        * clean it up.
                         */
-                       elog(ERROR, "CBM_OBSOLETE_SEGMENT_ENTRIES case is not implemented yet");
+                       if (oldest_index_segment != CB_INVALID_SEGMENT)
+                       {
+                               ReleaseBuffer(metabuffer);
+                               return;
+                       }
+
+                       /*
+                        * Relock the metapage prior to looping around. We may still be
+                        * able to clear index entries from the metapage, or adjust the
+                        * start of the metapage index.
+                        */
+                       LockBuffer(metabuffer, BUFFER_LOCK_EXCLUSIVE);
                }
        }
 }
 
+/*
+ * Attempt to remve the oldest index segment.
+ *
+ * The return value is the segment number of the oldest index segment that
+ * remains after the operation has been completed. If no index segments remain
+ * after the operation or if the operation cannot be completed, the return
+ * value is CB_INVALID_SEGMENT.
+ */
+static CBSegNo
+ConveyorBeltFreeOldestIndexSegment(ConveyorBelt *cb, Buffer metabuffer,
+                                                                  CBSegNo oldest_index_segment,
+                                                                  CBPageNo index_vacuum_stop_point)
+{
+       BlockNumber     firstindexblock;
+       Buffer          firstindexbuffer;
+       BlockNumber     fsmblock;
+       Buffer          fsmbuffer;
+       bool            needs_xlog;
+       CBSegNo         oldest_remaining_index_segment = CB_INVALID_SEGMENT;
+
+       /*
+        * Read and pin the first block of the index segment. The others will
+        * already have been cleared, but the first one has to stick around until
+        * we actually deallocate the segment, so that it remains possible to
+        * walk the chain of index segments.
+        */
+       needs_xlog = RelationNeedsWAL(cb->cb_rel) || cb->cb_fork == INIT_FORKNUM;
+       firstindexblock = cb_segment_to_block(cb->cb_pages_per_segment,
+                                                                                 oldest_index_segment, 0);
+       firstindexbuffer = ReadBufferExtended(cb->cb_rel, cb->cb_fork,
+                                                                                 firstindexblock, RBM_NORMAL, NULL);
+
+       /*
+        * Also read and pin the appropriate FSM page, unless the busy/free status
+        * of this segment is stored in the metapage.
+        */
+       fsmblock = cb_segment_to_fsm_block(cb->cb_pages_per_segment,
+                                                                          oldest_index_segment);
+       if (fsmblock == InvalidBlockNumber)
+               fsmbuffer = InvalidBuffer;
+       else
+               fsmbuffer = ReadBufferExtended(cb->cb_rel, cb->cb_fork,
+                                                                          fsmblock, RBM_NORMAL, NULL);
+
+       /*
+        * The lock ordering described in the README requires the metapage lock
+        * to be taken last, but it also requires that freeing an index segment
+        * take a cleanup lock on the metapage. Since a concurrent reader will
+        * hold a pin on the metapage when trying to lock the first index page,
+        * we can't lock the first index page and then wait for a cleanup lock
+        * on the metapage, because that might deadlock.
+        *
+        * To get around that problem, we take the cleanup lock on the metabuffer
+        * conditionally. If we can't get it, we just skip freeing the oldest
+        * index segment. That's not great, but it's not obvious how we can do
+        * any better.
+        */
+       LockBuffer(firstindexbuffer, BUFFER_LOCK_EXCLUSIVE);
+       LockBuffer(fsmbuffer, BUFFER_LOCK_EXCLUSIVE);
+       if (ConditionalLockBufferForCleanup(metabuffer))
+       {
+               oldest_remaining_index_segment =
+                       cb_recycle_index_segment(&RelationGetSmgr(cb->cb_rel)->smgr_rnode.node,
+                                                                        cb->cb_fork, metabuffer,
+                                                                        firstindexblock, firstindexbuffer,
+                                                                        fsmblock, fsmbuffer,
+                                                                        oldest_index_segment, needs_xlog);
+               LockBuffer(metabuffer, BUFFER_LOCK_UNLOCK);
+       }
+       UnlockReleaseBuffer(fsmbuffer);
+       UnlockReleaseBuffer(firstindexbuffer);
+
+       return oldest_remaining_index_segment;
+}
+
 /*
  * Clear all pages in a segment, or alternatively all pages in a segment
  * except for the first one. The segment can be a payload segment that isn't
index e1181400ebe3d6987ff7703eaafc10dcb48df4a7..40e0133d65f53c67d9fb81f804676f3279c2b756 100644 (file)
@@ -104,4 +104,24 @@ cb_first_segment_for_fsm_page(BlockNumber blkno, uint16 pages_per_segment)
                + (fsm_index * CB_FSM_SEGMENTS_PER_FSMPAGE);
 }
 
+/*
+ * Figure out which FSM block covers a certain segment number.
+ *
+ * If the FSM entry for the indicated segment is in the metapage, the return
+ * value is InvalidBlockNumber.
+ */
+static inline BlockNumber
+cb_segment_to_fsm_block(uint16 pages_per_segment, CBSegNo segno)
+{
+       BlockNumber     first_fsm_block = cb_first_fsm_block(pages_per_segment);
+       unsigned        fsm_block_spacing = cb_fsm_block_spacing(pages_per_segment);
+       unsigned        fsm_block_index;
+
+       if (segno < CB_FSM_SEGMENTS_FOR_METAPAGE)
+               return InvalidBlockNumber;
+       fsm_block_index =
+               (segno - CB_FSM_SEGMENTS_FOR_METAPAGE) / CB_FSM_SEGMENTS_PER_FSMPAGE;
+       return first_fsm_block + (fsm_block_index * fsm_block_spacing);
+}
+
 #endif                                                 /* CBFSMPAGE_H */
index 9307bd382094030f818903380d94da2ca8b1a867..490130c339253161cf55dfbc201557a8bf868016 100644 (file)
@@ -159,6 +159,7 @@ extern void cb_metapage_remove_index_segment(CBMetapageData *meta,
                                                                                         CBSegNo segno);
 extern CBMObsoleteState cb_metapage_get_obsolete_state(CBMetapageData *meta,
                                                                                                           CBSegNo *oldest_index_segment,
+                                                                                                          CBPageNo *index_vacuum_stop_point,
                                                                                                           CBSegNo *metapage_segno,
                                                                                                           unsigned *metapage_offset);
 extern void cb_metapage_clear_obsolete_index_entry(CBMetapageData *meta,
index 8850b0279f0349745034743d3f6c9437d18300d0..4d31b4b175cabef6127808e928c86b0280a7b464 100644 (file)
@@ -110,15 +110,15 @@ extern void cb_recycle_payload_segment(RelFileNode *rnode,
                                                                           unsigned pageoffset,
                                                                           bool needs_xlog);
 
-extern void cb_recycle_index_segment(RelFileNode *rnode,
-                                                                        ForkNumber fork,
-                                                                        Buffer metabuffer,
-                                                                        BlockNumber indexblock,
-                                                                        Buffer indexbuffer,
-                                                                        BlockNumber fsmblock,
-                                                                        Buffer fsmbuffer,
-                                                                        CBSegNo segno,
-                                                                        bool needs_xlog);
+extern CBSegNo cb_recycle_index_segment(RelFileNode *rnode,
+                                                                               ForkNumber fork,
+                                                                               Buffer metabuffer,
+                                                                               BlockNumber indexblock,
+                                                                               Buffer indexbuffer,
+                                                                               BlockNumber fsmblock,
+                                                                               Buffer fsmbuffer,
+                                                                               CBSegNo segno,
+                                                                               bool needs_xlog);
 
 extern void cb_shift_metapage_index(RelFileNode *rnode,
                                                                        ForkNumber fork,