#include "access/slru.h"
#include "access/subtrans.h"
#include "access/transam.h"
-#include "miscadmin.h"
#include "pg_trace.h"
#include "utils/snapmgr.h"
* 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE, and segment numbering at
* 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need take no
* explicit notice of that fact in this module, except when comparing segment
- * and page numbers in TruncateSUBTRANS (see SubTransPagePrecedes)
- * and in recovery when we do ExtendSUBTRANS.
+ * and page numbers in TruncateSUBTRANS (see SubTransPagePrecedes).
*/
/* We need four bytes per xact */
ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
ptr += entryno;
- /*
- * Current state should be 0, except in recovery where we may
- * need to reset the value multiple times
- */
- Assert(*ptr == InvalidTransactionId ||
- (InRecovery && *ptr == parent));
+ /* Current state should be 0 */
+ Assert(*ptr == InvalidTransactionId);
*ptr = parent;
/*
* This must be called ONCE during postmaster or standalone-backend startup,
* after StartupXLOG has initialized ShmemVariableCache->nextXid.
+ *
+ * oldestActiveXID is the oldest XID of any prepared transaction, or nextXid
+ * if there are none.
*/
void
StartupSUBTRANS(TransactionId oldestActiveXID)
{
- TransactionId xid = ShmemVariableCache->nextXid;
- int pageno = TransactionIdToPage(xid);
-
- LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
+ int startPage;
+ int endPage;
/*
- * Initialize our idea of the latest page number.
+ * Since we don't expect pg_subtrans to be valid across crashes, we
+ * initialize the currently-active page(s) to zeroes during startup.
+ * Whenever we advance into a new page, ExtendSUBTRANS will likewise zero
+ * the new page without regard to whatever was previously on disk.
*/
- SubTransCtl->shared->latest_page_number = pageno;
+ LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
+
+ startPage = TransactionIdToPage(oldestActiveXID);
+ endPage = TransactionIdToPage(ShmemVariableCache->nextXid);
+
+ while (startPage != endPage)
+ {
+ (void) ZeroSUBTRANSPage(startPage);
+ startPage++;
+ }
+ (void) ZeroSUBTRANSPage(startPage);
LWLockRelease(SubtransControlLock);
}
ExtendSUBTRANS(TransactionId newestXact)
{
int pageno;
- static int last_pageno = 0;
- Assert(TransactionIdIsNormal(newestXact));
+ /*
+ * No work except at first XID of a page. But beware: just after
+ * wraparound, the first XID of page zero is FirstNormalTransactionId.
+ */
+ if (TransactionIdToEntry(newestXact) != 0 &&
+ !TransactionIdEquals(newestXact, FirstNormalTransactionId))
+ return;
- if (!InRecovery)
- {
- /*
- * No work except at first XID of a page. But beware: just after
- * wraparound, the first XID of page zero is FirstNormalTransactionId.
- */
- if (TransactionIdToEntry(newestXact) != 0 &&
- !TransactionIdEquals(newestXact, FirstNormalTransactionId))
- return;
-
- pageno = TransactionIdToPage(newestXact);
- }
- else
- {
- /*
- * InRecovery we keep track of the last page we extended, so
- * we can compare that against incoming XIDs. This will only
- * ever be run by startup process, so keep it as a static variable
- * rather than hiding behind the SubtransControlLock.
- */
- pageno = TransactionIdToPage(newestXact);
-
- if (pageno == last_pageno ||
- SubTransPagePrecedes(pageno, last_pageno))
- return;
-
- ereport(trace_recovery(DEBUG1),
- (errmsg("extend subtrans xid %u page %d last_page %d",
- newestXact, pageno, last_pageno)));
-
- last_pageno = pageno;
- }
+ pageno = TransactionIdToPage(newestXact);
LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
#include <signal.h>
+#include "access/clog.h"
+#include "access/multixact.h"
#include "access/subtrans.h"
#include "access/transam.h"
#include "access/xlog.h"
#define xc_slow_answer_inc() ((void) 0)
#endif /* XIDCACHE_DEBUG */
+static void RecoverySnapshotStateMachine(int newstate,
+ TransactionId oldestXid, TransactionId latestXid);
+
/* Primitives for KnownAssignedXids array handling for standby */
static Size KnownAssignedXidsShmemSize(int size);
static void KnownAssignedXidsInit(int size);
{
if (TransactionIdPrecedes(recoverySnapshotPendingXmin,
xlrec->oldestRunningXid))
- {
- recoverySnapshotState = RECOVERY_SNAPSHOT_READY;
- elog(trace_recovery(DEBUG2),
- "running xact data now proven complete");
- elog(trace_recovery(DEBUG2),
- "recovery snapshots are now enabled");
- }
- return;
- }
-
- /*
- * Can't initialise with an incomplete set of lock information.
- * XXX: Can't we go into pending state like with overflowed subxids?
- */
- if (xlrec->lock_overflow)
- {
- elog(trace_recovery(DEBUG2),
- "running xact data has incomplete lock data");
+ RecoverySnapshotStateMachine(RECOVERY_SNAPSHOT_READY,
+ xlrec->oldestRunningXid,
+ xlrec->latestRunningXid);
return;
}
/*
* If the snapshot overflowed, then we still initialise with what we
* know, but the recovery snapshot isn't fully valid yet because we
- * know there are some subxids missing (ergo we don't know which ones)
+ * know we have information missing. We either have missing subxids
+ * or missing locks, doesn't really matter which but which track each
+ * separately to help with debugging.
*/
- if (!xlrec->subxid_overflow)
- recoverySnapshotState = RECOVERY_SNAPSHOT_READY;
+ if (xlrec->subxid_overflow || xlrec->lock_overflow)
+ RecoverySnapshotStateMachine(RECOVERY_SNAPSHOT_PENDING,
+ xlrec->oldestRunningXid,
+ xlrec->latestRunningXid);
else
- {
- recoverySnapshotState = RECOVERY_SNAPSHOT_PENDING;
- ereport(LOG,
- (errmsg("consistent state delayed because recovery snapshot incomplete")));
- }
+ RecoverySnapshotStateMachine(RECOVERY_SNAPSHOT_READY,
+ xlrec->oldestRunningXid,
+ xlrec->latestRunningXid);
xids = palloc(sizeof(TransactionId) * (xlrec->xcnt + xlrec->subxcnt));
nxids = 0;
/*
* Scan through the incoming array of RunningXacts and collect xids.
- * We don't use SubtransSetParent because it doesn't matter yet. If
- * we aren't overflowed then all xids will fit in snapshot and so we
- * don't need subtrans. If we later overflow, an xid assignment record
- * will add xids to subtrans. If RunningXacts is overflowed then we
- * don't have enough information to correctly update subtrans anyway.
+ * We mark SubtransSetParent, just as we would in other cases. That
+ * is OK because we performed StartupSubtrans() when we changed state,
+ * above.
*/
for (xid_index = 0; xid_index < xlrec->xcnt; xid_index++)
{
xids[nxids++] = xid;
for(i = 0; i < rxact[xid_index].nsubxids; i++)
- xids[nxids++] = subxip[rxact[xid_index].subx_offset + i];
-
+ {
+ TransactionId subxid = subxip[rxact[xid_index].subx_offset + i];
+ xids[nxids++] = subxid;
+ SubTransSetParent(subxid, xid);
+ }
}
if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
loggableLocks = (xl_rel_lock *) &(xlrec->xrun[(xlrec->xcnt + xlrec->subxcnt)]);
relation_redo_locks(loggableLocks, xlrec->numLocks);
- elog(trace_recovery(DEBUG2),
- "running transaction data initialized");
- if (recoverySnapshotState == RECOVERY_SNAPSHOT_READY)
- elog(trace_recovery(DEBUG2),
- "recovery snapshots are now enabled");
+ /* nextXid must be beyond any observed xid */
+ if (TransactionIdFollowsOrEquals(latestObservedXid,
+ ShmemVariableCache->nextXid))
+ {
+ ShmemVariableCache->nextXid = latestObservedXid;
+ TransactionIdAdvance(ShmemVariableCache->nextXid);
+ }
+}
+
+static void
+RecoverySnapshotStateMachine(int newstate,
+ TransactionId oldestXid, TransactionId latestXid)
+{
+ TransactionId xid = oldestXid;
+ Assert(newstate > recoverySnapshotState);
+
+ switch (recoverySnapshotState)
+ {
+ case RECOVERY_SNAPSHOT_UNINITIALIZED:
+
+ ereport(trace_recovery(DEBUG2),
+ (errmsg("running transaction data initialized")));
+
+ /* Startup commit log and other stuff */
+ StartupCLOG();
+ StartupSUBTRANS(oldestXid);
+ StartupMultiXact();
+
+ TransactionIdAdvance(xid);
+ while (TransactionIdPrecedesOrEquals(xid, latestXid))
+ {
+ /*
+ * Extend clog and subtrans like we do in
+ * GetNewTransactionId() during normal operation.
+ */
+ ExtendCLOG(xid);
+ ExtendSUBTRANS(xid);
+
+ TransactionIdAdvance(xid);
+ }
+
+ if (newstate == RECOVERY_SNAPSHOT_READY)
+ ereport(trace_recovery(DEBUG1),
+ (errmsg("recovery snapshots are now enabled")));
+ else if (newstate == RECOVERY_SNAPSHOT_PENDING)
+ ereport(LOG,
+ (errmsg("consistent state delayed because "
+ "recovery snapshot incomplete")));
+ break;
+
+ case RECOVERY_SNAPSHOT_PENDING:
+
+ if (newstate == RECOVERY_SNAPSHOT_READY)
+ {
+ ereport(trace_recovery(DEBUG2),
+ (errmsg("running xact data now proven complete")));
+ ereport(trace_recovery(DEBUG1),
+ (errmsg("recovery snapshots are now enabled")));
+ break;
+ }
+
+ case RECOVERY_SNAPSHOT_READY:
+ default:
+ elog(ERROR, "invalid value for recoverySnapshotState");
+ break;
+ }
+
+ recoverySnapshotState = newstate;
}
/*
{
Assert(InHotStandby);
- recoverySnapshotState = RECOVERY_SNAPSHOT_READY;
+ RecoverySnapshotStateMachine(RECOVERY_SNAPSHOT_READY,
+ ShmemVariableCache->nextXid, InvalidTransactionId);
/* also initialize latestCompletedXid, to nextXid - 1 */
ShmemVariableCache->latestCompletedXid = ShmemVariableCache->nextXid;
TransactionIdRetreat(ShmemVariableCache->latestCompletedXid);
latestObservedXid = ShmemVariableCache->latestCompletedXid;
-
- elog(trace_recovery(DEBUG2),
- "running transaction data initialized");
- elog(trace_recovery(DEBUG2),
- "recovery snapshots are now enabled");
}
/*
(errmsg("recording unobserved xid %u (latestObservedXid %u)",
next_expected_xid, latestObservedXid)));
KnownAssignedXidsAdd(&next_expected_xid, 1);
+
+ /*
+ * Extend clog and subtrans like we do in GetNewTransactionId()
+ * during normal operation.
+ */
+ ExtendCLOG(next_expected_xid);
+ ExtendSUBTRANS(next_expected_xid);
+
TransactionIdAdvance(next_expected_xid);
}
latestObservedXid = xid;
}
+
+ /* nextXid must be beyond any observed xid */
+ if (TransactionIdFollowsOrEquals(latestObservedXid,
+ ShmemVariableCache->nextXid))
+ {
+ ShmemVariableCache->nextXid = latestObservedXid;
+ TransactionIdAdvance(ShmemVariableCache->nextXid);
+ }
}
void