Buffer *fsmbuffer);
static void ConveyorBeltClearSegment(ConveyorBelt *cb, CBSegNo segno,
bool include_first_page);
+static CBSegNo ConveyorBeltFreeOldestIndexSegment(ConveyorBelt *cb,
+ Buffer metabuffer,
+ CBSegNo oldest_index_segment,
+ CBPageNo index_vacuum_stop_point);
static Buffer ConveyorBeltExtend(ConveyorBelt *cb, BlockNumber blkno,
BlockNumber *possibly_not_on_disk_blkno);
static BlockNumber ConveyorBeltFSMBlockNumber(ConveyorBelt *cb,
UnlockReleaseBuffer(metabuffer);
}
+static unsigned
+ConveyorBeltClearIndexEntries(ConveyorBelt *cb, CBSegNo oldest_index_segment,
+ CBPageNo index_vacuum_stop_point)
+{
+ elog(ERROR, "ConveyorBeltClearIndexEntries not implemented yet");
+}
+
/*
* Recycle segments that are no longer needed.
*
Buffer fsmbuffer = InvalidBuffer;
CBSegNo cleared_segno = CB_INVALID_SEGMENT;
bool needs_xlog;
+ bool cleaned_index_segments = false;
/* Do any changes we make here need to be WAL-logged? */
needs_xlog = RelationNeedsWAL(cb->cb_rel) || cb->cb_fork == INIT_FORKNUM;
CBMetapageData *meta;
CBMObsoleteState obsolete_state;
CBSegNo oldest_index_segment;
+ CBPageNo index_vacuum_stop_point;
CBSegNo metapage_segno;
unsigned metapage_offset;
meta = cb_metapage_get_special(BufferGetPage(metabuffer));
obsolete_state =
cb_metapage_get_obsolete_state(meta, &oldest_index_segment,
+ &index_vacuum_stop_point,
&metapage_segno, &metapage_offset);
/*
}
else if (obsolete_state == CBM_OBSOLETE_SEGMENT_ENTRIES)
{
+ unsigned empty_index_segments;
+
/*
- * XXX.
- *
- * 1. Walk the chain of index segments while keeping a pin on the
- * metabuffer and the current index segment.
- *
- * 2. As we do, reinitialize payload segments and free them.
+ * Do this part just once. A single pass through the logic below
+ * should clean out the index segments as completely as possible,
+ * so if we end up here again, either the logical truncation point
+ * changed concurrently, or there's actually nothing to do. Even
+ * in the former case, it's OK to return without doing anything
+ * further, because this function only promises to clean up data
+ * that was no longer needed as of the time it was called. It makes
+ * no promises about cleaning up things that became obsolete once
+ * this function was already running.
+ */
+ if (cleaned_index_segments)
+ {
+ UnlockReleaseBuffer(metabuffer);
+ break;
+ }
+ cleaned_index_segments = true;
+
+ /*
+ * Release lock on metapage before locking other pages, but keep
+ * the pin for efficiency and so that no index segments can
+ * disappear concurrently.
+ */
+ LockBuffer(metabuffer, BUFFER_LOCK_UNLOCK);
+
+ /*
+ * Clear as many obsolete index entries out of index segments as
+ * we can.
+ */
+ empty_index_segments =
+ ConveyorBeltClearIndexEntries(cb, oldest_index_segment,
+ index_vacuum_stop_point);
+
+ /*
+ * If even the oldest index segment is still partially in use,
+ * then all newer ones are needed also, and likewise everything in
+ * the metapage, which means no further cleanup is possible.
+ */
+ if (empty_index_segments == 0)
+ {
+ ReleaseBuffer(metabuffer);
+ return;
+ }
+
+ /*
+ * Free old index segments.
*
- * 3. Then, get a cleanup lock on the metapage and try to free as
- * many old index segments as we can. We can remember which ones
- * are eligible based on what we know we cleared (or found
- * already cleared).
+ * We might stop before freeing the requested number of index
+ * segments, due to concurrent locking. If that happens,
+ * give up on performing any further cleanup.
+ */
+ while (empty_index_segments > 0)
+ {
+ oldest_index_segment =
+ ConveyorBeltFreeOldestIndexSegment(cb, metabuffer,
+ oldest_index_segment,
+ index_vacuum_stop_point);
+ --empty_index_segments;
+ if (empty_index_segments > 0 &&
+ oldest_index_segment == CB_INVALID_SEGMENT)
+ {
+ ReleaseBuffer(metabuffer);
+ return;
+ }
+ }
+
+ /*
+ * If we freed some but not all index segments, all the entries in
+ * the metapage are still needed, so there is no point in trying to
+ * clean it up.
*/
- elog(ERROR, "CBM_OBSOLETE_SEGMENT_ENTRIES case is not implemented yet");
+ if (oldest_index_segment != CB_INVALID_SEGMENT)
+ {
+ ReleaseBuffer(metabuffer);
+ return;
+ }
+
+ /*
+ * Relock the metapage prior to looping around. We may still be
+ * able to clear index entries from the metapage, or adjust the
+ * start of the metapage index.
+ */
+ LockBuffer(metabuffer, BUFFER_LOCK_EXCLUSIVE);
}
}
}
+/*
+ * Attempt to remve the oldest index segment.
+ *
+ * The return value is the segment number of the oldest index segment that
+ * remains after the operation has been completed. If no index segments remain
+ * after the operation or if the operation cannot be completed, the return
+ * value is CB_INVALID_SEGMENT.
+ */
+static CBSegNo
+ConveyorBeltFreeOldestIndexSegment(ConveyorBelt *cb, Buffer metabuffer,
+ CBSegNo oldest_index_segment,
+ CBPageNo index_vacuum_stop_point)
+{
+ BlockNumber firstindexblock;
+ Buffer firstindexbuffer;
+ BlockNumber fsmblock;
+ Buffer fsmbuffer;
+ bool needs_xlog;
+ CBSegNo oldest_remaining_index_segment = CB_INVALID_SEGMENT;
+
+ /*
+ * Read and pin the first block of the index segment. The others will
+ * already have been cleared, but the first one has to stick around until
+ * we actually deallocate the segment, so that it remains possible to
+ * walk the chain of index segments.
+ */
+ needs_xlog = RelationNeedsWAL(cb->cb_rel) || cb->cb_fork == INIT_FORKNUM;
+ firstindexblock = cb_segment_to_block(cb->cb_pages_per_segment,
+ oldest_index_segment, 0);
+ firstindexbuffer = ReadBufferExtended(cb->cb_rel, cb->cb_fork,
+ firstindexblock, RBM_NORMAL, NULL);
+
+ /*
+ * Also read and pin the appropriate FSM page, unless the busy/free status
+ * of this segment is stored in the metapage.
+ */
+ fsmblock = cb_segment_to_fsm_block(cb->cb_pages_per_segment,
+ oldest_index_segment);
+ if (fsmblock == InvalidBlockNumber)
+ fsmbuffer = InvalidBuffer;
+ else
+ fsmbuffer = ReadBufferExtended(cb->cb_rel, cb->cb_fork,
+ fsmblock, RBM_NORMAL, NULL);
+
+ /*
+ * The lock ordering described in the README requires the metapage lock
+ * to be taken last, but it also requires that freeing an index segment
+ * take a cleanup lock on the metapage. Since a concurrent reader will
+ * hold a pin on the metapage when trying to lock the first index page,
+ * we can't lock the first index page and then wait for a cleanup lock
+ * on the metapage, because that might deadlock.
+ *
+ * To get around that problem, we take the cleanup lock on the metabuffer
+ * conditionally. If we can't get it, we just skip freeing the oldest
+ * index segment. That's not great, but it's not obvious how we can do
+ * any better.
+ */
+ LockBuffer(firstindexbuffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(fsmbuffer, BUFFER_LOCK_EXCLUSIVE);
+ if (ConditionalLockBufferForCleanup(metabuffer))
+ {
+ oldest_remaining_index_segment =
+ cb_recycle_index_segment(&RelationGetSmgr(cb->cb_rel)->smgr_rnode.node,
+ cb->cb_fork, metabuffer,
+ firstindexblock, firstindexbuffer,
+ fsmblock, fsmbuffer,
+ oldest_index_segment, needs_xlog);
+ LockBuffer(metabuffer, BUFFER_LOCK_UNLOCK);
+ }
+ UnlockReleaseBuffer(fsmbuffer);
+ UnlockReleaseBuffer(firstindexbuffer);
+
+ return oldest_remaining_index_segment;
+}
+
/*
* Clear all pages in a segment, or alternatively all pages in a segment
* except for the first one. The segment can be a payload segment that isn't