Update FSM on WAL replay. This is a bit limited; the FSM is only updated
authorHeikki Linnakangas <[email protected]>
Fri, 31 Oct 2008 19:40:27 +0000 (19:40 +0000)
committerHeikki Linnakangas <[email protected]>
Fri, 31 Oct 2008 19:40:27 +0000 (19:40 +0000)
on non-full-page-image WAL records, and quite arbitrarily, only if there's
less than 20% free space on the page after the insert/update (not on HOT
updates, though). The 20% cutoff should avoid most of the overhead, when
replaying a bulk insertion, for example, while ensuring that pages that
are full are marked as full in the FSM.

This is mostly to avoid the nasty worst case scenario, where you replay
from a PITR archive, and the FSM information in the base backup is really
out of date. If there was a lot of pages that the outdated FSM claims to
have free space, but don't actually have any, the first unlucky inserter
after the recovery would traverse through all those pages, just to find
out that they're full. We didn't have this problem with the old FSM
implementation, because we simply threw the FSM information away on a
non-clean shutdown.

src/backend/access/heap/heapam.c
src/backend/storage/freespace/freespace.c
src/include/storage/freespace.h

index f6584e9b533ee7d7c752c2c3826ad3cbb7c71abf..a9eabeb7bea5f42f5d7ae703be06cfbd1fd2924a 100644 (file)
@@ -54,6 +54,7 @@
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "storage/bufmgr.h"
+#include "storage/freespace.h"
 #include "storage/lmgr.h"
 #include "storage/procarray.h"
 #include "storage/smgr.h"
@@ -4022,6 +4023,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move)
        int                     nredirected;
        int                     ndead;
        int                     nunused;
+       Size            freespace;
 
        if (record->xl_info & XLR_BKP_BLOCK_1)
                return;
@@ -4053,6 +4055,8 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move)
                                                        nowunused, nunused,
                                                        clean_move);
 
+       freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
+
        /*
         * Note: we don't worry about updating the page's prunability hints.
         * At worst this will cause an extra prune cycle to occur soon.
@@ -4062,6 +4066,15 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move)
        PageSetTLI(page, ThisTimeLineID);
        MarkBufferDirty(buffer);
        UnlockReleaseBuffer(buffer);
+
+       /*
+        * Update the FSM as well.
+        *
+        * XXX: We don't get here if the page was restored from full page image.
+        * We don't bother to update the FSM in that case, it doesn't need to be
+        * totally accurate anyway.
+        */
+       XLogRecordPageWithFreeSpace(xlrec->node, xlrec->block, freespace);
 }
 
 static void
@@ -4205,15 +4218,17 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
        HeapTupleHeader htup;
        xl_heap_header xlhdr;
        uint32          newlen;
+       Size            freespace;
+       BlockNumber     blkno;
 
        if (record->xl_info & XLR_BKP_BLOCK_1)
                return;
 
+       blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid));
+
        if (record->xl_info & XLOG_HEAP_INIT_PAGE)
        {
-               buffer = XLogReadBuffer(xlrec->target.node,
-                                                        ItemPointerGetBlockNumber(&(xlrec->target.tid)),
-                                                               true);
+               buffer = XLogReadBuffer(xlrec->target.node, blkno, true);
                Assert(BufferIsValid(buffer));
                page = (Page) BufferGetPage(buffer);
 
@@ -4221,9 +4236,7 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
        }
        else
        {
-               buffer = XLogReadBuffer(xlrec->target.node,
-                                                        ItemPointerGetBlockNumber(&(xlrec->target.tid)),
-                                                               false);
+               buffer = XLogReadBuffer(xlrec->target.node, blkno, false);
                if (!BufferIsValid(buffer))
                        return;
                page = (Page) BufferGetPage(buffer);
@@ -4261,10 +4274,25 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
        offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
        if (offnum == InvalidOffsetNumber)
                elog(PANIC, "heap_insert_redo: failed to add tuple");
+
+       freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
+
        PageSetLSN(page, lsn);
        PageSetTLI(page, ThisTimeLineID);
        MarkBufferDirty(buffer);
        UnlockReleaseBuffer(buffer);
+
+       /*
+        * If the page is running low on free space, update the FSM as well.
+        * Arbitrarily, our definition of "low" is less than 20%. We can't do
+        * much better than that without knowing the fill-factor for the table.
+        *
+        * XXX: We don't get here if the page was restored from full page image.
+        * We don't bother to update the FSM in that case, it doesn't need to be
+        * totally accurate anyway.
+        */
+       if (freespace < BLCKSZ / 5)
+               XLogRecordPageWithFreeSpace(xlrec->target.node, blkno, freespace);
 }
 
 /*
@@ -4289,6 +4317,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move, bool hot_update)
        xl_heap_header xlhdr;
        int                     hsize;
        uint32          newlen;
+       Size            freespace;
 
        if (record->xl_info & XLR_BKP_BLOCK_1)
        {
@@ -4446,10 +4475,32 @@ newsame:;
        offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
        if (offnum == InvalidOffsetNumber)
                elog(PANIC, "heap_update_redo: failed to add tuple");
+
+       freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
+
        PageSetLSN(page, lsn);
        PageSetTLI(page, ThisTimeLineID);
        MarkBufferDirty(buffer);
        UnlockReleaseBuffer(buffer);
+
+       /*
+        * If the page is running low on free space, update the FSM as well.
+        * Arbitrarily, our definition of "low" is less than 20%. We can't do
+        * much better than that without knowing the fill-factor for the table.
+        *
+        * However, don't update the FSM on HOT updates, because after crash
+        * recovery, either the old or the new tuple will certainly be dead and
+        * prunable. After pruning, the page will have roughly as much free space
+        * as it did before the update, assuming the new tuple is about the same
+        * size as the old one.
+        *
+        * XXX: We don't get here if the page was restored from full page image.
+        * We don't bother to update the FSM in that case, it doesn't need to be
+        * totally accurate anyway.
+        */
+       if (!hot_update && freespace < BLCKSZ / 5)
+               XLogRecordPageWithFreeSpace(xlrec->target.node,
+                                       ItemPointerGetBlockNumber(&(xlrec->newtid)), freespace);
 }
 
 static void
index 171fe63af5bd2261163cceadbd1c3a2196a692d4..4949cf6d8ece435a2620bb81f90597535be7d9d2 100644 (file)
@@ -202,6 +202,36 @@ RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
        fsm_set_and_search(rel, addr, slot, new_cat, 0);
 }
 
+/*
+ * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
+ *             WAL replay
+ */
+void
+XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
+                                                       Size spaceAvail)
+{
+       int                     new_cat = fsm_space_avail_to_cat(spaceAvail);
+       FSMAddress      addr;
+       uint16          slot;
+       BlockNumber blkno;
+       Buffer          buf;
+       Page            page;
+
+       /* Get the location of the FSM byte representing the heap block */
+       addr = fsm_get_location(heapBlk, &slot);
+       blkno = fsm_logical_to_physical(addr);
+
+       /* If the page doesn't exist already, extend */
+       buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR);
+       page = BufferGetPage(buf);
+       if (PageIsNew(page))
+               PageInit(page, BLCKSZ, 0);
+
+       if (fsm_set_avail(page, slot, new_cat))
+               MarkBufferDirty(buf);
+       UnlockReleaseBuffer(buf);
+}
+
 /*
  * GetRecordedFreePage - return the amount of free space on a particular page,
  *             according to the FSM.
index 7a1664f0ed33fa492ae51eb049b88f820756e6ff..e17a8d5d2d8f0657e14b2c515a478e1633f49e0a 100644 (file)
@@ -27,6 +27,8 @@ extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel,
                                                          Size spaceNeeded);
 extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
                                                                        Size spaceAvail);
+extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
+                                                                               Size spaceAvail);
 
 extern void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks);
 extern void FreeSpaceMapVacuum(Relation rel);