Apply 0017-Revert-changes-to-subtrans.c-and-slru.c.-Instead-cal.patch but with heavy...
authorSimon Riggs <[email protected]>
Tue, 6 Oct 2009 11:50:13 +0000 (12:50 +0100)
committerSimon Riggs <[email protected]>
Tue, 6 Oct 2009 11:50:13 +0000 (12:50 +0100)
src/backend/access/transam/clog.c
src/backend/access/transam/slru.c
src/backend/access/transam/subtrans.c
src/backend/access/transam/xlog.c
src/backend/storage/ipc/procarray.c

index c6c27a79bc41317d89859ebdd0249aa8d431388d..9d9752771611da83223be0b1361099c77d836fcc 100644 (file)
@@ -575,7 +575,7 @@ ExtendCLOG(TransactionId newestXact)
        LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
 
        /* Zero the page and make an XLOG entry about it */
-       ZeroCLOGPage(pageno, true);
+       ZeroCLOGPage(pageno, !InRecovery);
 
        LWLockRelease(CLogControlLock);
 }
index 3f890872a57f4c50cc97e629df0772291607f156..68e38696fb5bfe2f8437d127ff273c4cde77170b 100644 (file)
@@ -598,8 +598,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
         * commands to set the commit status of transactions whose bits are in
         * already-truncated segments of the commit log (see notes in
         * SlruPhysicalWritePage).      Hence, if we are InRecovery, allow the case
-        * where the file doesn't exist, and return zeroes instead. We also
-        * return a zeroed page when seek and read fails.
+        * where the file doesn't exist, and return zeroes instead.
         */
        fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
        if (fd < 0)
@@ -620,14 +619,6 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
 
        if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
        {
-               if (InRecovery)
-               {
-                       ereport(LOG,
-                                       (errmsg("file \"%s\" doesn't exist, reading as zeroes",
-                                                       path)));
-                       MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
-                       return true;
-               }
                slru_errcause = SLRU_SEEK_FAILED;
                slru_errno = errno;
                close(fd);
@@ -637,14 +628,6 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
        errno = 0;
        if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
        {
-               if (InRecovery)
-               {
-                       ereport(LOG,
-                                       (errmsg("file \"%s\" doesn't exist, reading as zeroes",
-                                                       path)));
-                       MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
-                       return true;
-               }
                slru_errcause = SLRU_READ_FAILED;
                slru_errno = errno;
                close(fd);
index e9b3fbc816a71ac8ab427e2d91836f2a6d2f1693..0dbd2166be738393fa2eeb0bf46caa6555bc5100 100644 (file)
@@ -31,7 +31,6 @@
 #include "access/slru.h"
 #include "access/subtrans.h"
 #include "access/transam.h"
-#include "miscadmin.h"
 #include "pg_trace.h"
 #include "utils/snapmgr.h"
 
@@ -45,8 +44,7 @@
  * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE, and segment numbering at
  * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE/SLRU_SEGMENTS_PER_PAGE.  We need take no
  * explicit notice of that fact in this module, except when comparing segment
- * and page numbers in TruncateSUBTRANS (see SubTransPagePrecedes)
- * and in recovery when we do ExtendSUBTRANS.
+ * and page numbers in TruncateSUBTRANS (see SubTransPagePrecedes).
  */
 
 /* We need four bytes per xact */
@@ -85,12 +83,8 @@ SubTransSetParent(TransactionId xid, TransactionId parent)
        ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
        ptr += entryno;
 
-       /*
-        * Current state should be 0, except in recovery where we may
-        * need to reset the value multiple times
-        */
-       Assert(*ptr == InvalidTransactionId ||
-                       (InRecovery && *ptr == parent));
+       /* Current state should be 0 */
+       Assert(*ptr == InvalidTransactionId);
 
        *ptr = parent;
 
@@ -229,19 +223,33 @@ ZeroSUBTRANSPage(int pageno)
 /*
  * This must be called ONCE during postmaster or standalone-backend startup,
  * after StartupXLOG has initialized ShmemVariableCache->nextXid.
+ *
+ * oldestActiveXID is the oldest XID of any prepared transaction, or nextXid
+ * if there are none.
  */
 void
 StartupSUBTRANS(TransactionId oldestActiveXID)
 {
-       TransactionId xid = ShmemVariableCache->nextXid;
-       int                     pageno = TransactionIdToPage(xid);
-
-       LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
+       int                     startPage;
+       int                     endPage;
 
        /*
-        * Initialize our idea of the latest page number.
+        * Since we don't expect pg_subtrans to be valid across crashes, we
+        * initialize the currently-active page(s) to zeroes during startup.
+        * Whenever we advance into a new page, ExtendSUBTRANS will likewise zero
+        * the new page without regard to whatever was previously on disk.
         */
-       SubTransCtl->shared->latest_page_number = pageno;
+       LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
+
+       startPage = TransactionIdToPage(oldestActiveXID);
+       endPage = TransactionIdToPage(ShmemVariableCache->nextXid);
+
+       while (startPage != endPage)
+       {
+               (void) ZeroSUBTRANSPage(startPage);
+               startPage++;
+       }
+       (void) ZeroSUBTRANSPage(startPage);
 
        LWLockRelease(SubtransControlLock);
 }
@@ -294,42 +302,16 @@ void
 ExtendSUBTRANS(TransactionId newestXact)
 {
        int                     pageno;
-       static int last_pageno = 0;
 
-       Assert(TransactionIdIsNormal(newestXact));
+       /*
+        * No work except at first XID of a page.  But beware: just after
+        * wraparound, the first XID of page zero is FirstNormalTransactionId.
+        */
+       if (TransactionIdToEntry(newestXact) != 0 &&
+               !TransactionIdEquals(newestXact, FirstNormalTransactionId))
+               return;
 
-       if (!InRecovery)
-       {
-               /*
-                * No work except at first XID of a page.  But beware: just after
-                * wraparound, the first XID of page zero is FirstNormalTransactionId.
-                */
-               if (TransactionIdToEntry(newestXact) != 0 &&
-                       !TransactionIdEquals(newestXact, FirstNormalTransactionId))
-                       return;
-
-               pageno = TransactionIdToPage(newestXact);
-       }
-       else
-       {
-               /*
-                * InRecovery we keep track of the last page we extended, so
-                * we can compare that against incoming XIDs. This will only
-                * ever be run by startup process, so keep it as a static variable
-                * rather than hiding behind the SubtransControlLock.
-                */
-               pageno = TransactionIdToPage(newestXact);
-
-               if (pageno == last_pageno ||
-                       SubTransPagePrecedes(pageno, last_pageno))
-                       return;
-
-               ereport(trace_recovery(DEBUG1),
-                               (errmsg("extend subtrans  xid %u page %d last_page %d",
-                                               newestXact, pageno, last_pageno)));
-
-               last_pageno = pageno;
-       }
+       pageno = TransactionIdToPage(newestXact);
 
        LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
 
index 0613001136a627272097e00d1eb0e85c1004600b..932e57fe34b7f3f04ef418eb6b602712d2206ece 100644 (file)
@@ -5994,6 +5994,7 @@ StartupXLOG(void)
        uint32          freespace;
        TransactionId oldestActiveXID;
        bool            bgwriterLaunched = false;
+       bool            backendsAllowed = false;
 
        /*
         * Read control file and check XLOG status looks valid.
@@ -6319,7 +6320,6 @@ StartupXLOG(void)
                        bool            recoveryContinue = true;
                        bool            recoveryApply = true;
                        bool            reachedMinRecoveryPoint = false;
-                       bool            backendsAllowed = false;
                        ErrorContextCallback errcontext;
 
                        /* use volatile pointer to prevent code rearrangement */
@@ -6689,11 +6689,13 @@ StartupXLOG(void)
        ShmemVariableCache->latestCompletedXid = ShmemVariableCache->nextXid;
        TransactionIdRetreat(ShmemVariableCache->latestCompletedXid);
 
-       /* Start up the commit log and related stuff, too */
-       /* XXXHS: perhaps this should go after XactClearRecoveryTransactions */
-       StartupCLOG();
-       StartupSUBTRANS(oldestActiveXID);
-       StartupMultiXact();
+       /* Start up the commit log and related stuff, too, if not done already */
+       if (!backendsAllowed)
+       {
+               StartupCLOG();
+               StartupSUBTRANS(oldestActiveXID);
+               StartupMultiXact();
+       }
 
        /* Reload shared-memory state for prepared transactions */
        RecoverPreparedTransactions();
index 7d1f42ce29a1dea4f3b5374202e13bda6566fbb4..f6f50be01df13623c2fcfe7ead61f8cde4a928e4 100644 (file)
@@ -45,6 +45,8 @@
 
 #include <signal.h>
 
+#include "access/clog.h"
+#include "access/multixact.h"
 #include "access/subtrans.h"
 #include "access/transam.h"
 #include "access/xlog.h"
@@ -129,6 +131,9 @@ static void DisplayXidCache(void);
 #define xc_slow_answer_inc()           ((void) 0)
 #endif   /* XIDCACHE_DEBUG */
 
+static void RecoverySnapshotStateMachine(int newstate,
+                                                        TransactionId oldestXid, TransactionId latestXid);
+
 /* Primitives for KnownAssignedXids array handling for standby */
 static Size KnownAssignedXidsShmemSize(int size);
 static void KnownAssignedXidsInit(int size);
@@ -470,24 +475,9 @@ ProcArrayApplyRecoveryInfo(XLogRecPtr lsn, xl_xact_running_xacts *xlrec)
        {
                if (TransactionIdPrecedes(recoverySnapshotPendingXmin,
                                                                  xlrec->oldestRunningXid))
-               {
-                       recoverySnapshotState = RECOVERY_SNAPSHOT_READY;
-                       elog(trace_recovery(DEBUG2), 
-                                       "running xact data now proven complete");
-                       elog(trace_recovery(DEBUG2), 
-                                       "recovery snapshots are now enabled");
-               }
-               return;
-       }
-
-       /*
-        * Can't initialise with an incomplete set of lock information.
-        * XXX: Can't we go into pending state like with overflowed subxids?
-        */
-       if (xlrec->lock_overflow)
-       {
-               elog(trace_recovery(DEBUG2), 
-                               "running xact data has incomplete lock data");
+                       RecoverySnapshotStateMachine(RECOVERY_SNAPSHOT_READY,
+                                                                                xlrec->oldestRunningXid,
+                                                                                xlrec->latestRunningXid);
                return;
        }
 
@@ -499,16 +489,18 @@ ProcArrayApplyRecoveryInfo(XLogRecPtr lsn, xl_xact_running_xacts *xlrec)
        /*
         * If the snapshot overflowed, then we still initialise with what we
         * know, but the recovery snapshot isn't fully valid yet because we
-        * know there are some subxids missing (ergo we don't know which ones)
+        * know we have information missing. We either have missing subxids
+        * or missing locks, doesn't really matter which but which track each
+        * separately to help with debugging.
         */
-       if (!xlrec->subxid_overflow)
-               recoverySnapshotState = RECOVERY_SNAPSHOT_READY;
+       if (xlrec->subxid_overflow || xlrec->lock_overflow)
+               RecoverySnapshotStateMachine(RECOVERY_SNAPSHOT_PENDING,
+                                                                                xlrec->oldestRunningXid,
+                                                                                xlrec->latestRunningXid);
        else
-       {
-               recoverySnapshotState = RECOVERY_SNAPSHOT_PENDING;
-               ereport(LOG, 
-                               (errmsg("consistent state delayed because recovery snapshot incomplete")));
-       }
+               RecoverySnapshotStateMachine(RECOVERY_SNAPSHOT_READY,
+                                                                                xlrec->oldestRunningXid,
+                                                                                xlrec->latestRunningXid);
 
        xids = palloc(sizeof(TransactionId) * (xlrec->xcnt + xlrec->subxcnt));
        nxids = 0;
@@ -522,11 +514,9 @@ ProcArrayApplyRecoveryInfo(XLogRecPtr lsn, xl_xact_running_xacts *xlrec)
 
        /*
         * Scan through the incoming array of RunningXacts and collect xids.
-        * We don't use SubtransSetParent because it doesn't matter yet. If
-        * we aren't overflowed then all xids will fit in snapshot and so we
-        * don't need subtrans. If we later overflow, an xid assignment record
-        * will add xids to subtrans. If RunningXacts is overflowed then we
-        * don't have enough information to correctly update subtrans anyway.   
+        * We mark SubtransSetParent, just as we would in other cases. That 
+        * is OK because we performed StartupSubtrans() when we changed state,
+        * above.
         */
        for (xid_index = 0; xid_index < xlrec->xcnt; xid_index++)
        {
@@ -537,8 +527,11 @@ ProcArrayApplyRecoveryInfo(XLogRecPtr lsn, xl_xact_running_xacts *xlrec)
 
                xids[nxids++] = xid;
                for(i = 0; i < rxact[xid_index].nsubxids; i++)
-                       xids[nxids++] = subxip[rxact[xid_index].subx_offset + i];
-
+               {
+                       TransactionId subxid = subxip[rxact[xid_index].subx_offset + i];
+                       xids[nxids++] = subxid;
+                       SubTransSetParent(subxid, xid);
+               }
        }
 
        if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
@@ -565,11 +558,74 @@ ProcArrayApplyRecoveryInfo(XLogRecPtr lsn, xl_xact_running_xacts *xlrec)
        loggableLocks = (xl_rel_lock *) &(xlrec->xrun[(xlrec->xcnt + xlrec->subxcnt)]);
        relation_redo_locks(loggableLocks, xlrec->numLocks);
 
-       elog(trace_recovery(DEBUG2), 
-               "running transaction data initialized");
-       if (recoverySnapshotState == RECOVERY_SNAPSHOT_READY)
-               elog(trace_recovery(DEBUG2), 
-                       "recovery snapshots are now enabled");
+       /* nextXid must be beyond any observed xid */
+       if (TransactionIdFollowsOrEquals(latestObservedXid,
+                                                                        ShmemVariableCache->nextXid))
+       {
+               ShmemVariableCache->nextXid = latestObservedXid;
+               TransactionIdAdvance(ShmemVariableCache->nextXid);
+       }
+}
+
+static void
+RecoverySnapshotStateMachine(int newstate, 
+                                                        TransactionId oldestXid, TransactionId latestXid)
+{
+       TransactionId xid = oldestXid;
+       Assert(newstate > recoverySnapshotState);
+
+       switch (recoverySnapshotState)
+       {
+               case RECOVERY_SNAPSHOT_UNINITIALIZED:
+
+                               ereport(trace_recovery(DEBUG2), 
+                                               (errmsg("running transaction data initialized")));
+
+                               /* Startup commit log and other stuff */
+                               StartupCLOG();
+                               StartupSUBTRANS(oldestXid);
+                               StartupMultiXact();
+
+                               TransactionIdAdvance(xid);
+                               while (TransactionIdPrecedesOrEquals(xid, latestXid))
+                               {
+                                       /*
+                                        * Extend clog and subtrans like we do in 
+                                        * GetNewTransactionId() during normal operation.
+                                        */
+                                       ExtendCLOG(xid);
+                                       ExtendSUBTRANS(xid);
+
+                                       TransactionIdAdvance(xid);
+                               }
+
+                               if (newstate == RECOVERY_SNAPSHOT_READY)
+                                       ereport(trace_recovery(DEBUG1), 
+                                                       (errmsg("recovery snapshots are now enabled")));
+                               else if (newstate == RECOVERY_SNAPSHOT_PENDING)
+                                       ereport(LOG, 
+                                                       (errmsg("consistent state delayed because "
+                                                                       "recovery snapshot incomplete")));
+                               break;
+
+               case RECOVERY_SNAPSHOT_PENDING:
+
+                               if (newstate == RECOVERY_SNAPSHOT_READY)
+                               {
+                                       ereport(trace_recovery(DEBUG2), 
+                                                       (errmsg("running xact data now proven complete")));
+                                       ereport(trace_recovery(DEBUG1), 
+                                                       (errmsg("recovery snapshots are now enabled")));
+                                       break;
+                               }
+
+               case RECOVERY_SNAPSHOT_READY:
+               default:
+                               elog(ERROR, "invalid value for recoverySnapshotState");
+                               break;
+       }
+       
+       recoverySnapshotState = newstate;
 }
 
 /*
@@ -582,17 +638,13 @@ ProcArrayInitRecoveryInfo(void)
 {
        Assert(InHotStandby);
 
-       recoverySnapshotState = RECOVERY_SNAPSHOT_READY;
+       RecoverySnapshotStateMachine(RECOVERY_SNAPSHOT_READY, 
+                                                                ShmemVariableCache->nextXid, InvalidTransactionId);
 
        /* also initialize latestCompletedXid, to nextXid - 1 */
        ShmemVariableCache->latestCompletedXid = ShmemVariableCache->nextXid;
        TransactionIdRetreat(ShmemVariableCache->latestCompletedXid);
        latestObservedXid = ShmemVariableCache->latestCompletedXid;
-
-       elog(trace_recovery(DEBUG2), 
-               "running transaction data initialized");
-       elog(trace_recovery(DEBUG2), 
-               "recovery snapshots are now enabled");
 }
 
 /*
@@ -2311,6 +2363,14 @@ RecordKnownAssignedTransactionIds(TransactionId xid)
                                                (errmsg("recording unobserved xid %u (latestObservedXid %u)",
                                                                        next_expected_xid, latestObservedXid)));
                        KnownAssignedXidsAdd(&next_expected_xid, 1);
+
+                       /*
+                        * Extend clog and subtrans like we do in GetNewTransactionId()
+                        * during normal operation.
+                        */
+                       ExtendCLOG(next_expected_xid);
+                       ExtendSUBTRANS(next_expected_xid);
+
                        TransactionIdAdvance(next_expected_xid);
                }
 
@@ -2318,6 +2378,14 @@ RecordKnownAssignedTransactionIds(TransactionId xid)
 
                latestObservedXid = xid;
        }
+
+       /* nextXid must be beyond any observed xid */
+       if (TransactionIdFollowsOrEquals(latestObservedXid,
+                                                                        ShmemVariableCache->nextXid))
+       {
+               ShmemVariableCache->nextXid = latestObservedXid;
+               TransactionIdAdvance(ShmemVariableCache->nextXid);
+       }
 }
 
 void