* value is removed; the cutoff value is stored in pg_class. The minimum value
* across all tables in each database is stored in pg_database, and the global
* minimum across all databases is part of pg_control and is kept in shared
- * memory. At checkpoint time, after the value is known flushed in WAL, any
- * files that correspond to multixacts older than that value are removed.
- * (These files are also removed when a restartpoint is executed.)
+ * memory. Whenever that minimum is advanced, the SLRUs are truncated.
*
* When new multixactid values are to be created, care is taken that the
* counter does not fall within the wraparound horizon considering the global
#include "postmaster/autovacuum.h"
#include "storage/lmgr.h"
#include "storage/pmsignal.h"
+#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
/* page in which a member is to be found */
#define MXOffsetToMemberPage(xid) ((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
+#define MXOffsetToMemberSegment(xid) (MXOffsetToMemberPage(xid) / SLRU_PAGES_PER_SEGMENT)
/* Location (byte offset within page) of flag word for a given member */
#define MXOffsetToFlagsOffset(xid) \
bool oldestOffsetKnown;
/*
- * This is what the previous checkpoint stored as the truncate position.
- * This value is the oldestMultiXactId that was valid when a checkpoint
- * was last executed.
+ * True if a multixact truncation WAL record was replayed since the last
+ * checkpoint. This is used to trigger 'legacy truncations', i.e. truncate
+ * by looking at the data directory during WAL replay, when the primary is
+ * too old to general truncation records.
*/
- MultiXactId lastCheckpointedOldest;
+ bool sawTruncationCkptCyle;
/* support for anti-wraparound measures */
MultiXactId multiVacLimit;
MultiXactId multiWrapLimit;
/* support for members anti-wraparound measures */
- MultiXactOffset offsetStopLimit;
- bool offsetStopLimitKnown;
+ MultiXactOffset offsetStopLimit; /* known if oldestOffsetKnown */
/*
* Per-backend data starts here. We have two arrays stored in the area
MultiXactOffset offset2);
static void ExtendMultiXactOffset(MultiXactId multi);
static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
-static void DetermineSafeOldestOffset(MultiXactId oldestMXact);
static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
MultiXactOffset start, uint32 distance);
-static bool SetOffsetVacuumLimit(bool finish_setup);
+static bool SetOffsetVacuumLimit(void);
static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
static void WriteMZeroPageXlogRec(int pageno, uint8 info);
+static void WriteMTruncateXlogRec(MultiXactOffset startOff, MultiXactOffset endOff,
+ MultiXactOffset startMemb, MultiXactOffset endMemb);
/*
*----------
*/
#define OFFSET_WARN_SEGMENTS 20
- if (MultiXactState->offsetStopLimitKnown &&
+ if (MultiXactState->oldestOffsetKnown &&
MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset,
nmembers))
{
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
}
- if (MultiXactState->offsetStopLimitKnown &&
+ if (MultiXactState->oldestOffsetKnown &&
MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit,
nextOffset,
nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS))
void
TrimMultiXact(void)
{
- MultiXactId multi = MultiXactState->nextMXact;
- MultiXactOffset offset = MultiXactState->nextOffset;
+ MultiXactId nextMXact;
+ MultiXactOffset offset;
MultiXactId oldestMXact;
+ MultiXactId oldestMXactDB;
int pageno;
int entryno;
int flagsoff;
+ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
+ nextMXact = MultiXactState->nextMXact;
+ offset = MultiXactState->nextOffset;
+ oldestMXact = MultiXactState->oldestMultiXactId;
+ oldestMXactDB = MultiXactState->oldestMultiXactDB;
+ MultiXactState->finishedStartup = true;
+ LWLockRelease(MultiXactGenLock);
/* Clean up offsets state */
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
/*
* (Re-)Initialize our idea of the latest page number for offsets.
*/
- pageno = MultiXactIdToOffsetPage(multi);
+ pageno = MultiXactIdToOffsetPage(nextMXact);
MultiXactOffsetCtl->shared->latest_page_number = pageno;
/*
* Zero out the remainder of the current offsets page. See notes in
* TrimCLOG() for motivation.
*/
- entryno = MultiXactIdToOffsetEntry(multi);
+ entryno = MultiXactIdToOffsetEntry(nextMXact);
if (entryno != 0)
{
int slotno;
MultiXactOffset *offptr;
- slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
+ slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, nextMXact);
offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
offptr += entryno;
LWLockRelease(MultiXactMemberControlLock);
- if (SetOffsetVacuumLimit(true) && IsUnderPostmaster)
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
- LWLockAcquire(MultiXactGenLock, LW_SHARED);
- oldestMXact = MultiXactState->lastCheckpointedOldest;
- LWLockRelease(MultiXactGenLock);
- DetermineSafeOldestOffset(oldestMXact);
+ /*
+ * Recompute limits once fully started, we now can compute how far an
+ * members wraparound is away.
+ */
+ SetMultiXactIdLimit(oldestMXact, oldestMXactDB);
}
/*
(errmsg("MultiXactId wrap limit is %u, limited by database with OID %u",
multiWrapLimit, oldest_datoid)));
+ /*
+ * Computing the actual limits is only possible once the data directory is
+ * in a consistent state. There's no need to compute the limits while
+ * still replaying WAL as no new multis can be computed anyway. So we'll
+ * only do further checks once TrimMultiXact() has been called.
+ */
+ if (!MultiXactState->finishedStartup)
+ return;
+
+ Assert(!InRecovery);
+
/* Set limits for offset vacuum. */
- needs_offset_vacuum = SetOffsetVacuumLimit(false);
+ needs_offset_vacuum = SetOffsetVacuumLimit();
/*
* If past the autovacuum force point, immediately signal an autovac
* another iteration immediately if there are still any old databases.
*/
if ((MultiXactIdPrecedes(multiVacLimit, curMulti) ||
- needs_offset_vacuum) && IsUnderPostmaster && !InRecovery)
+ needs_offset_vacuum) && IsUnderPostmaster)
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
/* Give an immediate warning if past the wrap warn point */
- if (MultiXactIdPrecedes(multiWarnLimit, curMulti) && !InRecovery)
+ if (MultiXactIdPrecedes(multiWarnLimit, curMulti))
{
char *oldest_datname;
}
/*
- * Update our oldestMultiXactId value, but only if it's more recent than
- * what we had. However, even if not, always update the oldest multixact
- * offset limit.
+ * During WAL replay update our oldestMultiXactId value, but only if it's more
+ * recent than what we had.
*/
void
MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
{
if (MultiXactIdPrecedes(MultiXactState->oldestMultiXactId, oldestMulti))
+ {
+ /*
+ * If there has been a truncation on the master, detected via a moving
+ * oldestMulti, without a corresponding truncation record we know that
+ * the primary is still running an older version of postgres that
+ * doesn't yet log multixact truncations. So perform truncation
+ * ourselves.
+ */
+ if (!MultiXactState->sawTruncationCkptCyle)
+ {
+ ereport(LOG, (errmsg("performing legacy multixact truncation, upgrade master")));
+ TruncateMultiXact(oldestMulti, oldestMultiDB, true);
+ }
+
SetMultiXactIdLimit(oldestMulti, oldestMultiDB);
-}
+ }
-/*
- * Update the "safe truncation point". This is the newest value of oldestMulti
- * that is known to be flushed as part of a checkpoint record.
- */
-void
-MultiXactSetSafeTruncate(MultiXactId safeTruncateMulti)
-{
- LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
- MultiXactState->lastCheckpointedOldest = safeTruncateMulti;
- LWLockRelease(MultiXactGenLock);
+ /* only looked at in the startup process, no lock necessary */
+ MultiXactState->sawTruncationCkptCyle = false;
}
/*
return oldestMXact;
}
-/*
- * Based on the given oldest MultiXactId, determine what's the oldest member
- * offset and install the limit info in MultiXactState, where it can be used to
- * prevent overrun of old data in the members SLRU area.
- */
-static void
-DetermineSafeOldestOffset(MultiXactId oldestMXact)
-{
- MultiXactOffset oldestOffset;
- MultiXactOffset nextOffset;
- MultiXactOffset offsetStopLimit;
- MultiXactOffset prevOffsetStopLimit;
- MultiXactId nextMXact;
- bool finishedStartup;
- bool prevOffsetStopLimitKnown;
-
- /* Fetch values from shared memory. */
- LWLockAcquire(MultiXactGenLock, LW_SHARED);
- finishedStartup = MultiXactState->finishedStartup;
- nextMXact = MultiXactState->nextMXact;
- nextOffset = MultiXactState->nextOffset;
- prevOffsetStopLimit = MultiXactState->offsetStopLimit;
- prevOffsetStopLimitKnown = MultiXactState->offsetStopLimitKnown;
- LWLockRelease(MultiXactGenLock);
-
- /* Don't worry about this until after we've started up. */
- if (!finishedStartup)
- return;
-
- /*
- * Determine the offset of the oldest multixact. Normally, we can read
- * the offset from the multixact itself, but there's an important special
- * case: if there are no multixacts in existence at all, oldestMXact
- * obviously can't point to one. It will instead point to the multixact
- * ID that will be assigned the next time one is needed.
- *
- * NB: oldestMXact should be the oldest multixact that still exists in the
- * SLRU, unlike in SetOffsetVacuumLimit, where we do this same computation
- * based on the oldest value that might be referenced in a table.
- */
- if (nextMXact == oldestMXact)
- oldestOffset = nextOffset;
- else
- {
- bool oldestOffsetKnown;
-
- oldestOffsetKnown = find_multixact_start(oldestMXact, &oldestOffset);
- if (!oldestOffsetKnown)
- {
- ereport(LOG,
- (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %u does not exist on disk",
- oldestMXact)));
- return;
- }
- }
-
- /* move back to start of the corresponding segment */
- offsetStopLimit = oldestOffset - (oldestOffset %
- (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT));
- /* always leave one segment before the wraparound point */
- offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT);
-
- /* if nothing has changed, we're done */
- if (prevOffsetStopLimitKnown && offsetStopLimit == prevOffsetStopLimit)
- return;
-
- LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
- MultiXactState->offsetStopLimit = offsetStopLimit;
- MultiXactState->offsetStopLimitKnown = true;
- LWLockRelease(MultiXactGenLock);
-
- if (!prevOffsetStopLimitKnown && IsUnderPostmaster)
- ereport(LOG,
- (errmsg("MultiXact member wraparound protections are now enabled")));
- ereport(DEBUG1,
- (errmsg("MultiXact member stop limit is now %u based on MultiXact %u",
- offsetStopLimit, oldestMXact)));
-}
-
/*
* Determine how aggressively we need to vacuum in order to prevent member
* wraparound.
*
- * To determine the oldest multixact ID, we look at oldestMultiXactId, not
- * lastCheckpointedOldest. That's because vacuuming can't help with anything
- * older than oldestMultiXactId; anything older than that isn't referenced
- * by any table. Offsets older than oldestMultiXactId but not as old as
- * lastCheckpointedOldest will go away after the next checkpoint.
+ * To do so determine what's the oldest member offset and install the limit
+ * info in MultiXactState, where it can be used to prevent overrun of old data
+ * in the members SLRU area.
*
* The return value is true if emergency autovacuum is required and false
* otherwise.
*/
static bool
-SetOffsetVacuumLimit(bool finish_setup)
+SetOffsetVacuumLimit(void)
{
MultiXactId oldestMultiXactId;
MultiXactId nextMXact;
- bool finishedStartup;
MultiXactOffset oldestOffset = 0; /* placate compiler */
MultiXactOffset nextOffset;
bool oldestOffsetKnown = false;
- MultiXactOffset prevOldestOffset;
- bool prevOldestOffsetKnown;
+ bool prevOldestOffsetKnown;
+ MultiXactOffset offsetStopLimit = 0;
/* Read relevant fields from shared memory. */
LWLockAcquire(MultiXactGenLock, LW_SHARED);
oldestMultiXactId = MultiXactState->oldestMultiXactId;
nextMXact = MultiXactState->nextMXact;
nextOffset = MultiXactState->nextOffset;
- finishedStartup = MultiXactState->finishedStartup;
- prevOldestOffset = MultiXactState->oldestOffset;
prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown;
+ Assert(MultiXactState->finishedStartup);
LWLockRelease(MultiXactGenLock);
- /* Don't do this until after any recovery is complete. */
- if (!finishedStartup && !finish_setup)
- return false;
-
/*
- * If no multixacts exist, then oldestMultiXactId will be the next
- * multixact that will be created, rather than an existing multixact.
+ * Determine the offset of the oldest multixact. Normally, we can read
+ * the offset from the multixact itself, but there's an important special
+ * case: if there are no multixacts in existence at all, oldestMXact
+ * obviously can't point to one. It will instead point to the multixact
+ * ID that will be assigned the next time one is needed.
*/
if (oldestMultiXactId == nextMXact)
{
*/
oldestOffsetKnown =
find_multixact_start(oldestMultiXactId, &oldestOffset);
+
+ if (oldestOffsetKnown)
+ ereport(DEBUG1,
+ (errmsg("oldest MultiXactId member is at offset %u",
+ oldestOffset)));
+ else
+ ereport(LOG,
+ (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %u does not exist on disk",
+ oldestMultiXactId)));
}
/*
- * Except when initializing the system for the first time, there's no
- * need to update anything if we don't know the oldest offset or if it
- * hasn't changed.
+ * If we can, compute limits (and install them MultiXactState) to prevent
+ * overrun of old data in the members SLRU area. We can only do so if the
+ * oldest offset is known though.
*/
- if (finish_setup ||
- (oldestOffsetKnown && !prevOldestOffsetKnown) ||
- (oldestOffsetKnown && prevOldestOffset != oldestOffset))
+ if (oldestOffsetKnown)
{
- /* Install the new limits. */
- LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
- MultiXactState->oldestOffset = oldestOffset;
- MultiXactState->oldestOffsetKnown = oldestOffsetKnown;
- MultiXactState->finishedStartup = true;
- LWLockRelease(MultiXactGenLock);
+ /* move back to start of the corresponding segment */
+ offsetStopLimit = oldestOffset - (oldestOffset %
+ (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT));
- /* Log the info */
- if (oldestOffsetKnown)
- ereport(DEBUG1,
- (errmsg("oldest MultiXactId member is at offset %u",
- oldestOffset)));
- else
- ereport(DEBUG1,
- (errmsg("oldest MultiXactId member offset unknown")));
+ /* always leave one segment before the wraparound point */
+ offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT);
+
+ if (!prevOldestOffsetKnown && IsUnderPostmaster)
+ ereport(LOG,
+ (errmsg("MultiXact member wraparound protections are now enabled")));
+ ereport(DEBUG1,
+ (errmsg("MultiXact member stop limit is now %u based on MultiXact %u",
+ offsetStopLimit, oldestMultiXactId)));
}
+ /* Install the computed values */
+ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
+ MultiXactState->oldestOffset = oldestOffset;
+ MultiXactState->oldestOffsetKnown = oldestOffsetKnown;
+ MultiXactState->offsetStopLimit = offsetStopLimit;
+ LWLockRelease(MultiXactGenLock);
+
/*
* Do we need an emergency autovacuum? If we're not sure, assume yes.
*/
int slotno;
MultiXactOffset *offptr;
+ /* XXX: Remove || Startup after WAL page magic bump */
+ Assert(MultiXactState->finishedStartup || AmStartupProcess());
+
pageno = MultiXactIdToOffsetPage(multi);
entryno = MultiXactIdToOffsetEntry(multi);
+ /*
+ * FIXME: We need to flush out dirty data, so PhysicalPageExists can work
+ * correctly, but SimpleLruFlush() is a pretty big hammer for that.
+ */
+ SimpleLruFlush(MultiXactOffsetCtl, true);
+
if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
return false;
return multixacts - victim_multixacts;
}
-/*
- * SlruScanDirectory callback.
- * This callback deletes segments that are outside the range determined by
- * the given page numbers.
- *
- * Both range endpoints are exclusive (that is, segments containing any of
- * those pages are kept.)
- */
-typedef struct MembersLiveRange
-{
- int rangeStart;
- int rangeEnd;
-} MembersLiveRange;
-
-static bool
-SlruScanDirCbRemoveMembers(SlruCtl ctl, char *filename, int segpage,
- void *data)
-{
- MembersLiveRange *range = (MembersLiveRange *) data;
- MultiXactOffset nextOffset;
-
- if ((segpage == range->rangeStart) ||
- (segpage == range->rangeEnd))
- return false; /* easy case out */
-
- /*
- * To ensure that no segment is spuriously removed, we must keep track of
- * new segments added since the start of the directory scan; to do this,
- * we update our end-of-range point as we run.
- *
- * As an optimization, we can skip looking at shared memory if we know for
- * certain that the current segment must be kept. This is so because
- * nextOffset never decreases, and we never increase rangeStart during any
- * one run.
- */
- if (!((range->rangeStart > range->rangeEnd &&
- segpage > range->rangeEnd && segpage < range->rangeStart) ||
- (range->rangeStart < range->rangeEnd &&
- (segpage < range->rangeStart || segpage > range->rangeEnd))))
- return false;
-
- /*
- * Update our idea of the end of the live range.
- */
- LWLockAcquire(MultiXactGenLock, LW_SHARED);
- nextOffset = MultiXactState->nextOffset;
- LWLockRelease(MultiXactGenLock);
- range->rangeEnd = MXOffsetToMemberPage(nextOffset);
-
- /* Recheck the deletion condition. If it still holds, perform deletion */
- if ((range->rangeStart > range->rangeEnd &&
- segpage > range->rangeEnd && segpage < range->rangeStart) ||
- (range->rangeStart < range->rangeEnd &&
- (segpage < range->rangeStart || segpage > range->rangeEnd)))
- SlruDeleteSegment(ctl, filename);
-
- return false; /* keep going */
-}
-
typedef struct mxtruncinfo
{
int earliestExistingPage;
return false; /* keep going */
}
+
+/*
+ * Delete any members segment that doesn't contain the start or end point.
+*/
+static void
+PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset oldestAliveOffset)
+{
+ int startsegment = MXOffsetToMemberSegment(oldestOffset);
+ int endsegment = MXOffsetToMemberSegment(oldestAliveOffset);
+ int maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset);
+ int segment = startsegment;
+
+ while (segment != endsegment)
+ {
+ /* verify whether the current segment is to be deleted */
+ if (segment != startsegment && segment != endsegment)
+ SlruDeleteSegment(MultiXactMemberCtl, segment);
+
+ /* move to next segment, handle wraparound correctly */
+ if (segment == maxsegment)
+ segment = 0;
+ else
+ segment += 1;
+ }
+}
+
/*
* Remove all MultiXactOffset and MultiXactMember segments before the oldest
* ones still of interest.
* and kept up to date as new pages are zeroed.
*/
void
-TruncateMultiXact(void)
+TruncateMultiXact(MultiXactId frozenMulti, Oid minmulti_datoid, bool in_recovery)
{
MultiXactId oldestMXact;
MultiXactOffset oldestOffset;
MultiXactId nextMXact;
MultiXactOffset nextOffset;
+ MultiXactOffset oldestAliveOffset;
mxtruncinfo trunc;
MultiXactId earliest;
- MembersLiveRange range;
- Assert(AmCheckpointerProcess() || AmStartupProcess() ||
- !IsPostmasterEnvironment);
+ /*
+ * Need to allow being called in recovery for backward compatibility, when
+ * a updated standby replays WAL generated by a non-updated primary.
+ */
+ Assert(in_recovery || !RecoveryInProgress());
+ Assert(!in_recovery || AmStartupProcess());
+ Assert(in_recovery || MultiXactState->finishedStartup);
LWLockAcquire(MultiXactGenLock, LW_SHARED);
- oldestMXact = MultiXactState->lastCheckpointedOldest;
nextMXact = MultiXactState->nextMXact;
nextOffset = MultiXactState->nextOffset;
+ oldestMXact = MultiXactState->oldestMultiXactId;
LWLockRelease(MultiXactGenLock);
Assert(MultiXactIdIsValid(oldestMXact));
+ /*
+ * Make sure to only attempt truncation if there's values to truncate
+ * away. In normal processing values shouldn't go backwards, but there's
+ * some corner cases (due to bugs) where that's possible.
+ */
+ if (MultiXactIdPrecedesOrEquals(frozenMulti, oldestMXact))
+ return;
+
/*
* Note we can't just plow ahead with the truncation; it's possible that
* there are no segments to truncate, which is a problem because we are
* going to attempt to read the offsets page to determine where to
* truncate the members SLRU. So we first scan the directory to determine
* the earliest offsets page number that we can read without error.
+ *
+ * XXX: It's also possible that the page that oldestMXact is on has
+ * already been truncated away, and we crashed before updating
+ * oldestMXact.
*/
trunc.earliestExistingPage = -1;
SlruScanDirectory(MultiXactOffsetCtl, SlruScanDirCbFindEarliest, &trunc);
if (earliest < FirstMultiXactId)
earliest = FirstMultiXactId;
- /*
- * If there's nothing to remove, we can bail out early.
- *
- * Due to bugs in early releases of PostgreSQL 9.3.X and 9.4.X,
- * oldestMXact might point to a multixact that does not exist.
- * Autovacuum will eventually advance it to a value that does exist,
- * and we want to set a proper offsetStopLimit when that happens,
- * so call DetermineSafeOldestOffset here even if we're not actually
- * truncating.
- */
+ /* If there's nothing to remove, we can bail out early. */
if (MultiXactIdPrecedes(oldestMXact, earliest))
- {
- DetermineSafeOldestOffset(oldestMXact);
return;
- }
/*
* First, compute the safe truncation point for MultiXactMember. This is
}
/*
- * To truncate MultiXactMembers, we need to figure out the active page
- * range and delete all files outside that range. The start point is the
- * start of the segment containing the oldest offset; an end point of the
- * segment containing the next offset to use is enough. The end point is
- * updated as MultiXactMember gets extended concurrently, elsewhere.
+ * Secondly compute up to where to truncate. Lookup the corresponding
+ * member offset for frozenMulti for that.
*/
- range.rangeStart = MXOffsetToMemberPage(oldestOffset);
- range.rangeStart -= range.rangeStart % SLRU_PAGES_PER_SEGMENT;
-
- range.rangeEnd = MXOffsetToMemberPage(nextOffset);
+ if (frozenMulti == nextMXact)
+ oldestAliveOffset = nextOffset; /* there are NO MultiXacts */
+ else if (!find_multixact_start(frozenMulti, &oldestAliveOffset))
+ {
+ ereport(LOG,
+ (errmsg("supposedly still alive MultiXact %u not found, skipping truncation",
+ frozenMulti)));
+ return;
+ }
- SlruScanDirectory(MultiXactMemberCtl, SlruScanDirCbRemoveMembers, &range);
+ elog(DEBUG1, "performing multixact truncation starting (%u, %u), segments (%x to %x)",
+ oldestOffset,
+ oldestAliveOffset,
+ MXOffsetToMemberSegment(oldestOffset),
+ MXOffsetToMemberSegment(oldestAliveOffset));
- /* Now we can truncate MultiXactOffset */
- SimpleLruTruncate(MultiXactOffsetCtl,
- MultiXactIdToOffsetPage(oldestMXact));
+ /*
+ * Do truncation, and the WAL logging of the truncation, in a critical
+ * section. That way offsets/members cannot get out of sync anymore,
+ * i.e. once consistent the oldestMulti will always exist in members, even
+ * if we crashed in the wrong moment.
+ */
+ START_CRIT_SECTION();
+ /*
+ * Prevent checkpoints from being scheduled concurrently. This is critical
+ * because otherwise a truncation record might not be replayed after a
+ * crash/basebackup, even though the state of the data directory would
+ * require it. It's not possible, and not needed, to do this during
+ * recovery, when performing a old-style truncation, though, as the
+ * startup process doesn't have a PGXACT entry.
+ */
+ if (!in_recovery)
+ {
+ Assert(!MyPgXact->delayChkpt);
+ MyPgXact->delayChkpt = true;
+ }
/*
- * Now, and only now, we can advance the stop point for multixact members.
- * If we did it any sooner, the segments we deleted above might already
- * have been overwritten with new members. That would be bad.
+ * Wal log truncation - this has to be flushed before the truncation is
+ * actually performed, for the reasons explained in TruncateCLOG().
*/
- DetermineSafeOldestOffset(oldestMXact);
+ if (!in_recovery)
+ WriteMTruncateXlogRec(oldestMXact, frozenMulti,
+ oldestOffset, oldestAliveOffset);
+
+ /* First truncate members */
+ PerformMembersTruncation(oldestOffset, oldestAliveOffset);
+
+ /* Then offsets */
+ SimpleLruTruncate(MultiXactOffsetCtl,
+ MultiXactIdToOffsetPage(frozenMulti));
+
+ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
+ MultiXactState->oldestMultiXactId = frozenMulti;
+ MultiXactState->oldestMultiXactDB = minmulti_datoid;
+ LWLockRelease(MultiXactGenLock);
+
+ if (!in_recovery)
+ MyPgXact->delayChkpt = false;
+
+ END_CRIT_SECTION();
}
/*
(void) XLogInsert(RM_MULTIXACT_ID, info);
}
+/*
+ * Write a TRUNCATE xlog record
+ *
+ * We must flush the xlog record to disk before returning --- see notes
+ * in TruncateMultiXact().
+ */
+static void
+WriteMTruncateXlogRec(MultiXactOffset startOff, MultiXactOffset endOff,
+ MultiXactOffset startMemb, MultiXactOffset endMemb)
+{
+ XLogRecPtr recptr;
+ xl_multixact_truncate xlrec;
+
+ xlrec.startOff = startOff;
+ xlrec.endOff = endOff;
+ xlrec.startMemb = startMemb;
+ xlrec.endMemb = endMemb;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), SizeOfMultiXactTruncate);
+ recptr = XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_TRUNCATE_ID);
+ XLogFlush(recptr);
+}
+
/*
* MULTIXACT resource manager's routines
*/
LWLockRelease(XidGenLock);
}
}
+ else if (info == XLOG_MULTIXACT_TRUNCATE_ID)
+ {
+ xl_multixact_truncate xlrec;
+ int pageno;
+
+ memcpy(&xlrec, XLogRecGetData(record),
+ SizeOfMultiXactTruncate);
+
+ pageno = MultiXactIdToOffsetPage(xlrec.endOff);
+
+ elog(LOG, "replaying multixact truncation start: %u, %u, %x to %x",
+ xlrec.startMemb,
+ xlrec.endMemb,
+ MXOffsetToMemberSegment(xlrec.startMemb),
+ MXOffsetToMemberSegment(xlrec.endMemb));
+
+ PerformMembersTruncation(xlrec.startMemb, xlrec.endMemb);
+
+ /*
+ * During XLOG replay, latest_page_number isn't necessarily set up
+ * yet; insert a suitable value to bypass the sanity test in
+ * SimpleLruTruncate.
+ *
+ * XXX: We probably don't need this.
+ */
+ MultiXactOffsetCtl->shared->latest_page_number = pageno;
+ SimpleLruTruncate(MultiXactOffsetCtl,
+ MultiXactIdToOffsetPage(xlrec.endOff));
+
+ /* only looked at in the startup process, no lock necessary */
+ MultiXactState->sawTruncationCkptCyle = true;
+ }
else
elog(PANIC, "multixact_redo: unknown op code %u", info);
}