committs: Store transaction commit time
authorAlvaro Herrera <[email protected]>
Mon, 11 Mar 2013 14:27:29 +0000 (11:27 -0300)
committerAndres Freund <[email protected]>
Fri, 17 May 2013 14:48:20 +0000 (16:48 +0200)
29 files changed:
contrib/pg_xlogdump/rmgrdesc.c
doc/src/sgml/config.sgml
src/backend/access/rmgrdesc/Makefile
src/backend/access/rmgrdesc/committsdesc.c [new file with mode: 0644]
src/backend/access/rmgrdesc/xlogdesc.c
src/backend/access/transam/Makefile
src/backend/access/transam/clog.c
src/backend/access/transam/committs.c [new file with mode: 0644]
src/backend/access/transam/rmgr.c
src/backend/access/transam/slru.c
src/backend/access/transam/varsup.c
src/backend/access/transam/xact.c
src/backend/access/transam/xlog.c
src/backend/commands/vacuum.c
src/backend/storage/ipc/ipci.c
src/backend/storage/ipc/procarray.c
src/backend/storage/lmgr/lwlock.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/bin/initdb/initdb.c
src/bin/pg_controldata/pg_controldata.c
src/include/access/committs.h [new file with mode: 0644]
src/include/access/rmgrlist.h
src/include/access/slru.h
src/include/access/transam.h
src/include/catalog/pg_control.h
src/include/catalog/pg_proc.h
src/include/storage/lwlock.h
src/include/utils/builtins.h

index 0508c8dae91cf0e8589ad8903e31f29952f9e97b..e8a719757328f08dd9ff1e594fad0b38d08989b2 100644 (file)
@@ -9,6 +9,7 @@
 #include "postgres.h"
 
 #include "access/clog.h"
+#include "access/committs.h"
 #include "access/gin.h"
 #include "access/gist_private.h"
 #include "access/hash.h"
index d750f0800b70083311112904c90658110d33b96c..67f755312bbbe0dcdf7c40c930ea373773673b52 100644 (file)
@@ -2272,6 +2272,21 @@ include 'filename'
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-track-commit-timestamp" xreflabel="track_commit_timestamp">
+      <term><varname>track_commit_timestamp</varname> (<type>bool</type>)</term>
+      <indexterm>
+       <primary><varname>track_commit_timestamp</> configuration parameter</primary>
+      </indexterm>
+      <listitem>
+       <para>
+        Record commit time of transactions.  This parameter
+        can only be set in
+        the <filename>postgresql.conf</> file or on the server command line.
+        The default value is off.
+       </para>
+      </listitem>
+     </varlistentry>
+
      </variablelist>
     </sect2>
 
index 7d092d205d6083404a50a09c4a4e3e7aae268469..20c88a829852f2c0f0917ba40d300774a9d6b283 100644 (file)
@@ -8,7 +8,8 @@ subdir = src/backend/access/rmgrdesc
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = clogdesc.o dbasedesc.o gindesc.o gistdesc.o hashdesc.o heapdesc.o \
+OBJS = clogdesc.o committsdesc.o dbasedesc.o gindesc.o gistdesc.o hashdesc.o \
+       heapdesc.o \
       mxactdesc.o nbtdesc.o relmapdesc.o seqdesc.o smgrdesc.o spgdesc.o \
       standbydesc.o tblspcdesc.o xactdesc.o xlogdesc.o
 
diff --git a/src/backend/access/rmgrdesc/committsdesc.c b/src/backend/access/rmgrdesc/committsdesc.c
new file mode 100644 (file)
index 0000000..320bec3
--- /dev/null
@@ -0,0 +1,53 @@
+/*-------------------------------------------------------------------------
+ *
+ * committsdesc.c
+ *    rmgr descriptor routines for access/transam/committs.c
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    src/backend/access/rmgrdesc/committsdesc.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/committs.h"
+#include "utils/timestamp.h"
+
+
+void
+committs_desc(StringInfo buf, uint8 xl_info, char *rec)
+{
+   uint8       info = xl_info & ~XLR_INFO_MASK;
+
+   if (info == COMMITTS_ZEROPAGE)
+   {
+       int         pageno;
+
+       memcpy(&pageno, rec, sizeof(int));
+       appendStringInfo(buf, "zeropage: %d", pageno);
+   }
+   else if (info == COMMITTS_TRUNCATE)
+   {
+       int         pageno;
+
+       memcpy(&pageno, rec, sizeof(int));
+       appendStringInfo(buf, "truncate before: %d", pageno);
+   }
+   else if (info == COMMITTS_SETTS)
+   {
+       xl_committs_set *xlrec = (xl_committs_set *) rec;
+       int     i;
+
+       appendStringInfo(buf, "set committs %s for: %u",
+                        timestamptz_to_str(xlrec->timestamp),
+                        xlrec->mainxid);
+       for (i = 0; i < xlrec->nsubxids; i++)
+           appendStringInfo(buf, ", %u", xlrec->subxids[i]);
+   }
+   else
+       appendStringInfo(buf, "UNKNOWN");
+}
index 4c68b6ae0a39095939c0359316e7d44aa916a1dd..9e752a7d5683921226b6c8be15944e8c24b4cccf 100644 (file)
@@ -44,7 +44,7 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
        appendStringInfo(buf, "checkpoint: redo %X/%X; "
                         "tli %u; prev tli %u; fpw %s; xid %u/%u; oid %u; multi %u; offset %u; "
                         "oldest xid %u in DB %u; oldest multi %u in DB %u; "
-                        "oldest running xid %u; %s",
+                        "oldest CommitTs xid: %u; oldest running xid %u; %s",
                         (uint32) (checkpoint->redo >> 32), (uint32) checkpoint->redo,
                         checkpoint->ThisTimeLineID,
                         checkpoint->PrevTimeLineID,
@@ -57,6 +57,7 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
                         checkpoint->oldestXidDB,
                         checkpoint->oldestMulti,
                         checkpoint->oldestMultiDB,
+                        checkpoint->oldestCommitTs,
                         checkpoint->oldestActiveXid,
                 (info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
    }
index eb6cfc5c44e9051fb968fdb5cd580200eab08a59..ace913edd45358f8ed4a99e68bb446d830f4b79d 100644 (file)
@@ -14,7 +14,7 @@ include $(top_builddir)/src/Makefile.global
 
 OBJS = clog.o transam.o varsup.o xact.o rmgr.o slru.o subtrans.o multixact.o \
    timeline.o twophase.o twophase_rmgr.o xlog.o xlogarchive.o xlogfuncs.o \
-   xlogreader.o xlogutils.o
+   xlogreader.o xlogutils.o committs.o
 
 include $(top_srcdir)/src/backend/common.mk
 
index cb95aa34dda602e6a394635c4090cd293d2b72b1..3747da67dcef21220119bc9e9dad19843641c654 100644 (file)
@@ -152,8 +152,7 @@ TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
           status == TRANSACTION_STATUS_ABORTED);
 
    /*
-    * See how many subxids, if any, are on the same page as the parent, if
-    * any.
+    * See how many subxids, if any, are on the same page as the parent.
     */
    for (i = 0; i < nsubxids; i++)
    {
diff --git a/src/backend/access/transam/committs.c b/src/backend/access/transam/committs.c
new file mode 100644 (file)
index 0000000..88df85c
--- /dev/null
@@ -0,0 +1,698 @@
+/*-------------------------------------------------------------------------
+ *
+ * committs.c
+ *     PostgreSQL commit timestamp manager
+ *
+ * This module is a pg_clog-like system that stores the commit timestamp
+ * for each transaction.
+ *
+ * XLOG interactions: this module generates an XLOG record whenever a new
+ * CommitTs page is initialized to zeroes.  Also, one XLOG record is
+ * generated for setting of values when the caller requests it; this allows
+ * us to support values coming from places other than transaction commit.
+ * Other writes of CommitTS come from recording of transaction commit in
+ * xact.c, which generates its own XLOG records for these events and will
+ * re-perform the status update on redo; so we need make no additional XLOG
+ * entry here.
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/transam/committs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/committs.h"
+#include "access/htup_details.h"
+#include "access/slru.h"
+#include "access/transam.h"
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "pg_trace.h"
+#include "utils/builtins.h"
+#include "utils/snapmgr.h"
+#include "utils/timestamp.h"
+
+/*
+ * Defines for CommitTs page sizes.  A page is the same BLCKSZ as is used
+ * everywhere else in Postgres.
+ *
+ * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
+ * CommitTs page numbering also wraps around at
+ * 0xFFFFFFFF/COMMITTS_XACTS_PER_PAGE, and CommitTs segment numbering at
+ * 0xFFFFFFFF/COMMITTS_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT.  We need take no
+ * explicit notice of that fact in this module, except when comparing segment
+ * and page numbers in TruncateCommitTs (see CommitTsPagePrecedes).
+ */
+
+/* We need 8+4 bytes per xact */
+#define COMMITTS_XACTS_PER_PAGE \
+   (BLCKSZ / (sizeof(TimestampTz) + sizeof(CommitExtraData)))
+
+#define TransactionIdToCTsPage(xid)    \
+   ((xid) / (TransactionId) COMMITTS_XACTS_PER_PAGE)
+#define TransactionIdToCTsEntry(xid)   \
+   ((xid) % (TransactionId) COMMITTS_XACTS_PER_PAGE)
+
+/*
+ * Link to shared-memory data structures for CLOG control
+ */
+static SlruCtlData CommitTsCtlData;
+
+#define CommitTsCtl (&CommitTsCtlData)
+
+/* GUC variables */
+bool   commit_ts_enabled;
+
+static void SetXidCommitTsInPage(TransactionId xid, int nsubxids,
+                    TransactionId *subxids, TimestampTz committs,
+                    CommitExtraData extra, int pageno);
+static void TransactionIdSetCommitTs(TransactionId xid, TimestampTz committs,
+                         CommitExtraData extra, int slotno);
+static int ZeroCommitTsPage(int pageno, bool writeXlog);
+static bool CommitTsPagePrecedes(int page1, int page2);
+static void WriteZeroPageXlogRec(int pageno);
+static void WriteTruncateXlogRec(int pageno);
+static void WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids,
+                        TransactionId *subxids, TimestampTz timestamp,
+                        CommitExtraData data);
+
+
+/*
+ * TransactionTreeSetCommitTimestamp
+ *
+ * Record the final commit timestamp of transaction entries in the commit log
+ * for a transaction and its subtransaction tree, as efficiently as possible.
+ *
+ * xid is the top level transaction id.
+ *
+ * subxids is an array of xids of length nsubxids, representing subtransactions
+ * in the tree of xid. In various cases nsubxids may be zero.
+ *
+ * The do_xlog parameter tells us whether to include a XLog record of this
+ * or not.  Normal path through RecordTransactionCommit() will be related
+ * to a transaction commit XLog record, and so should pass "false" here.
+ * Other callers probably want to pass true, so that the given values persist
+ * in case of crashes.
+ */
+void
+TransactionTreeSetCommitTimestamp(TransactionId xid, int nsubxids,
+                                 TransactionId *subxids, TimestampTz timestamp,
+                                 CommitExtraData extra, bool do_xlog)
+{
+   int         i;
+   TransactionId headxid;
+
+   if (!commit_ts_enabled)
+       return;
+
+   /*
+    * Comply with the WAL-before-data rule: if caller specified it wants
+    * this value to be recorded in WAL, do so before touching the data.
+    */
+   if (do_xlog)
+       WriteSetTimestampXlogRec(xid, nsubxids, subxids, timestamp, extra);
+
+   /*
+    * We split the xids to set the timestamp to in groups belonging to the
+    * same SLRU page; the first element in each such set is its head.  The
+    * first group has the main XID as the head; subsequent sets use the
+    * first subxid not on the previous page as head.  This way, we only have
+    * to lock/modify each SLRU page once.
+    */
+   for (i = 0, headxid = xid;;)
+   {
+       int         pageno = TransactionIdToCTsPage(headxid);
+       int         j;
+
+       for (j = i; j < nsubxids; j++)
+       {
+           if (TransactionIdToCTsPage(subxids[j]) != pageno)
+               break;
+       }
+       /* subxids[i..j] are on the same page as the head */
+
+       SetXidCommitTsInPage(headxid, j - i, subxids + i, timestamp, extra,
+                            pageno);
+
+       /* if we wrote out all subxids, we're done. */
+       if (j + 1 >= nsubxids)
+           break;
+
+       /*
+        * Set the new head and skip over it, as well as over the subxids
+        * we just wrote.
+        */
+       headxid = subxids[j];
+       i += j - i + 1;
+   }
+}
+
+/*
+ * Record the commit timestamp of transaction entries in the commit log for all
+ * entries on a single page.  Atomic only on this page.
+ */
+static void
+SetXidCommitTsInPage(TransactionId xid, int nsubxids,
+                    TransactionId *subxids, TimestampTz committs,
+                    CommitExtraData extra, int pageno)
+{
+   int         slotno;
+   int         i;
+
+   LWLockAcquire(CommitTsControlLock, LW_EXCLUSIVE);
+
+   slotno = SimpleLruReadPage(CommitTsCtl, pageno, true, xid);
+
+   TransactionIdSetCommitTs(xid, committs, extra, slotno);
+   for (i = 0; i < nsubxids; i++)
+       TransactionIdSetCommitTs(subxids[i], committs, extra, slotno);
+
+   CommitTsCtl->shared->page_dirty[slotno] = true;
+
+   LWLockRelease(CommitTsControlLock);
+}
+
+/*
+ * Sets the commit timestamp of a single transaction.
+ *
+ * Must be called with CommitTsControlLock held
+ */
+static void
+TransactionIdSetCommitTs(TransactionId xid, TimestampTz committs,
+                        CommitExtraData extra, int slotno)
+{
+   int         entryno = TransactionIdToCTsEntry(xid);
+   TimestampTz *timeptr;
+   CommitExtraData *dataptr;
+
+   timeptr = (TimestampTz *) CommitTsCtl->shared->page_buffer[slotno];
+   timeptr += entryno;
+   *timeptr = committs;
+
+   dataptr = (CommitExtraData *) ((char *) timeptr + sizeof(TimestampTz));
+   *dataptr = extra;
+}
+
+/*
+ * Interrogate the commit timestamp of a transaction.
+ */
+void
+TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
+                            CommitExtraData *data)
+{
+   int         pageno = TransactionIdToCTsPage(xid);
+   int         entryno = TransactionIdToCTsEntry(xid);
+   int         slotno;
+   TimestampTz *timeptr;
+   CommitExtraData    *dataptr;
+   TransactionId oldestCommitTs;
+
+   if (!commit_ts_enabled)
+   {
+       if (ts)
+           *ts = InvalidTransactionId;
+       if (data)
+           *data = (CommitExtraData) 0;
+       return;
+   }
+
+   LWLockAcquire(CommitTsControlLock, LW_SHARED);
+   oldestCommitTs = ShmemVariableCache->oldestCommitTs;
+   LWLockRelease(CommitTsControlLock);
+
+   if (!TransactionIdIsValid(oldestCommitTs) ||
+       TransactionIdPrecedes(xid, oldestCommitTs))
+   {
+       if (ts)
+           *ts = InvalidTransactionId;
+       if (data)
+           *data = (CommitExtraData) 0;
+       return;
+   }
+
+   /* lock is acquired by SimpleLruReadPage_ReadOnly */
+
+   slotno = SimpleLruReadPage_ReadOnly(CommitTsCtl, pageno, xid);
+   timeptr = (TimestampTz *) CommitTsCtl->shared->page_buffer[slotno];
+   timeptr += entryno;
+   if (ts)
+       *ts = *timeptr;
+
+   if (data)
+   {
+       dataptr = (CommitExtraData *) ((char *) timeptr + sizeof(TimestampTz));
+       *data = *dataptr;
+   }
+
+   LWLockRelease(CommitTsControlLock);
+}
+
+TimestampTz
+TransactionIdGetCommitTimestamp(TransactionId xid)
+{
+   TimestampTz     committs;
+
+   TransactionIdGetCommitTsData(xid, &committs, NULL);
+
+   return committs;
+}
+
+CommitExtraData
+TransactionIdGetCommitData(TransactionId xid)
+{
+   CommitExtraData     data;
+
+   TransactionIdGetCommitTsData(xid, NULL, &data);
+
+   return data;
+}
+
+/*
+ * SQL-callable wrapper to obtain commit time of a transaction
+ */
+PG_FUNCTION_INFO_V1(pg_get_transaction_committime);
+Datum
+pg_get_transaction_committime(PG_FUNCTION_ARGS)
+{
+   TransactionId   xid = PG_GETARG_UINT32(0);
+   TimestampTz     committs;
+
+   committs = TransactionIdGetCommitTimestamp(xid);
+
+   PG_RETURN_TIMESTAMPTZ(committs);
+}
+
+PG_FUNCTION_INFO_V1(pg_get_transaction_extradata);
+Datum
+pg_get_transaction_extradata(PG_FUNCTION_ARGS)
+{
+   TransactionId   xid = PG_GETARG_UINT32(0);
+   CommitExtraData data;
+
+   data = TransactionIdGetCommitData(xid);
+
+   PG_RETURN_INT32(data);
+}
+
+PG_FUNCTION_INFO_V1(pg_get_transaction_committime_data);
+Datum
+pg_get_transaction_committime_data(PG_FUNCTION_ARGS)
+{
+   TransactionId   xid = PG_GETARG_UINT32(0);
+   TimestampTz     committs;
+   CommitExtraData data;
+   Datum       values[2];
+   bool        nulls[2];
+   TupleDesc   tupdesc;
+   HeapTuple   htup;
+
+   /*
+    * Construct a tuple descriptor for the result row.  This must match this
+    * function's pg_proc entry!
+    */
+   tupdesc = CreateTemplateTupleDesc(2, false);
+   TupleDescInitEntry(tupdesc, (AttrNumber) 1, "timestamp",
+                      TIMESTAMPTZOID, -1, 0);
+   TupleDescInitEntry(tupdesc, (AttrNumber) 2, "extra",
+                      INT4OID, -1, 0);
+   tupdesc = BlessTupleDesc(tupdesc);
+
+   /* and construct a tuple with our data */
+   TransactionIdGetCommitTsData(xid, &committs, &data);
+
+   values[0] = TimestampTzGetDatum(committs);
+   nulls[0] = false;
+
+   values[1] = Int32GetDatum(data);
+   nulls[1] = false;
+
+   htup = heap_form_tuple(tupdesc, values, nulls);
+
+   PG_RETURN_DATUM(HeapTupleGetDatum(htup));
+}
+
+/*
+ * Number of shared CommitTS buffers.
+ *
+ * We use a very similar logic as for the number of CLOG buffers; see comments
+ * in CLOGShmemBuffers.
+ */
+Size
+CommitTsShmemBuffers(void)
+{
+   return Min(16, Max(4, NBuffers / 1024));
+}
+
+/*
+ * Initialization of shared memory for CommitTs
+ */
+Size
+CommitTsShmemSize(void)
+{
+   return SimpleLruShmemSize(CommitTsShmemBuffers(), 0);
+}
+
+void
+CommitTsShmemInit(void)
+{
+   CommitTsCtl->PagePrecedes = CommitTsPagePrecedes;
+   SimpleLruInit(CommitTsCtl, "CommitTs Ctl", CommitTsShmemBuffers(), 0,
+                 CommitTsControlLock, "pg_committs");
+}
+
+/*
+ * This func must be called ONCE on system install.
+ *
+ * (The CommitTs directory is assumed to have been created by initdb, and
+ * CommitTsShmemInit must have been called already.)
+ */
+void
+BootStrapCommitTs(void)
+{
+   /*
+    * Nothing to do here at present, unlike most other SLRU modules; segments
+    * are created when the server is started with this module enabled.
+    * See StartupCommitTs.
+    */
+}
+
+/*
+ * Initialize (or reinitialize) a page of CommitTs to zeroes.
+ * If writeXlog is TRUE, also emit an XLOG record saying we did this.
+ *
+ * The page is not actually written, just set up in shared memory.
+ * The slot number of the new page is returned.
+ *
+ * Control lock must be held at entry, and will be held at exit.
+ */
+static int
+ZeroCommitTsPage(int pageno, bool writeXlog)
+{
+   int         slotno;
+
+   slotno = SimpleLruZeroPage(CommitTsCtl, pageno);
+
+   if (writeXlog)
+       WriteZeroPageXlogRec(pageno);
+
+   return slotno;
+}
+
+/*
+ * This must be called ONCE during postmaster or standalone-backend startup,
+ * after StartupXLOG has initialized ShmemVariableCache->nextXid.
+ *
+ * This is in charge of creating the currently active segment, if it's not
+ * already there.  The reason for this is that the server might have been
+ * running with this module disabled for a while and thus might have skipped
+ * the normal creation point.
+ */
+void
+StartupCommitTs(void)
+{
+   TransactionId xid = ShmemVariableCache->nextXid;
+   int         pageno = TransactionIdToCTsPage(xid);
+   SlruCtl     ctl = CommitTsCtl;
+
+   LWLockAcquire(CommitTsControlLock, LW_EXCLUSIVE);
+
+   /*
+    * Initialize our idea of the latest page number.
+    */
+   CommitTsCtl->shared->latest_page_number = pageno;
+
+   /*
+    * If this module is not currently enabled, make sure we don't hand back
+    * possibly-invalid data; also remove segments of old data.
+    */
+   if (!commit_ts_enabled)
+   {
+       ShmemVariableCache->oldestCommitTs = InvalidTransactionId;
+       LWLockRelease(CommitTsControlLock);
+
+       TruncateCommitTs(ReadNewTransactionId());
+
+       return;
+   }
+
+   /*
+    * If CommitTs is enabled, but it wasn't in the previous server run, we
+    * need to set the oldest value to the next Xid; that way, we will not try
+    * to read data that might not have been set.
+    *
+    * XXX does this have a problem if a server is started with commitTs
+    * enabled, then started with commitTs disabled, then restarted with it
+    * enabled again?  It doesn't look like it does, because there should be a
+    * checkpoint that sets the value to InvalidTransactionId at end of
+    * recovery; and so any chance of injecting new transactions without
+    * CommitTs values would occur after the oldestCommitTs has been set to
+    * Invalid temporarily.
+    */
+   if (ShmemVariableCache->oldestCommitTs == InvalidTransactionId)
+       ShmemVariableCache->oldestCommitTs = ReadNewTransactionId();
+
+   /* Finally, create the current segment file, if necessary */
+   if (!SimpleLruDoesPhysicalPageExist(ctl, pageno))
+   {
+       int     slotno;
+
+       slotno = ZeroCommitTsPage(pageno, false);
+       SimpleLruWritePage(CommitTsCtl, slotno);
+       Assert(!CommitTsCtl->shared->page_dirty[slotno]);
+   }
+
+   LWLockRelease(CommitTsControlLock);
+}
+
+/*
+ * This must be called ONCE during postmaster or standalone-backend shutdown
+ */
+void
+ShutdownCommitTs(void)
+{
+   /* Flush dirty CommitTs pages to disk */
+   SimpleLruFlush(CommitTsCtl, false);
+}
+
+/*
+ * Perform a checkpoint --- either during shutdown, or on-the-fly
+ */
+void
+CheckPointCommitTs(void)
+{
+   /* Flush dirty CommitTs pages to disk */
+   SimpleLruFlush(CommitTsCtl, true);
+}
+
+
+/*
+ * Make sure that CommitTs has room for a newly-allocated XID.
+ *
+ * NB: this is called while holding XidGenLock.  We want it to be very fast
+ * most of the time; even when it's not so fast, no actual I/O need happen
+ * unless we're forced to write out a dirty CommitTs or xlog page to make room
+ * in shared memory.
+ */
+void
+ExtendCommitTs(TransactionId newestXact)
+{
+   int         pageno;
+
+   /* nothing to do if module not enabled */
+   if (!commit_ts_enabled)
+       return;
+
+   /*
+    * No work except at first XID of a page.  But beware: just after
+    * wraparound, the first XID of page zero is FirstNormalTransactionId.
+    */
+   if (TransactionIdToCTsEntry(newestXact) != 0 &&
+       !TransactionIdEquals(newestXact, FirstNormalTransactionId))
+       return;
+
+   pageno = TransactionIdToCTsPage(newestXact);
+
+   LWLockAcquire(CommitTsControlLock, LW_EXCLUSIVE);
+
+   /* Zero the page and make an XLOG entry about it */
+   ZeroCommitTsPage(pageno, !InRecovery);
+
+   LWLockRelease(CommitTsControlLock);
+}
+
+/*
+ * Remove all CommitTs segments before the one holding the passed
+ * transaction ID
+ *
+ * Note that we don't need to flush XLOG here.
+ */
+void
+TruncateCommitTs(TransactionId oldestXact)
+{
+   int         cutoffPage;
+
+   /*
+    * The cutoff point is the start of the segment containing oldestXact. We
+    * pass the *page* containing oldestXact to SimpleLruTruncate.
+    */
+   cutoffPage = TransactionIdToCTsPage(oldestXact);
+
+   /* Check to see if there's any files that could be removed */
+   if (!SlruScanDirectory(CommitTsCtl, SlruScanDirCbReportPresence, &cutoffPage))
+       return;                 /* nothing to remove */
+
+   /* Write XLOG record */
+   WriteTruncateXlogRec(cutoffPage);
+
+   /* Now we can remove the old CommitTs segment(s) */
+   SimpleLruTruncate(CommitTsCtl, cutoffPage);
+}
+
+/*
+ * Set the earliest value for which commit TS can be consulted.
+ */
+void
+SetCommitTsLimit(TransactionId oldestXact)
+{
+   LWLockAcquire(CommitTsControlLock, LW_EXCLUSIVE);
+   ShmemVariableCache->oldestCommitTs = oldestXact;
+   LWLockRelease(CommitTsControlLock);
+}
+
+/*
+ * Decide which of two CLOG page numbers is "older" for truncation purposes.
+ *
+ * We need to use comparison of TransactionIds here in order to do the right
+ * thing with wraparound XID arithmetic.  However, if we are asked about
+ * page number zero, we don't want to hand InvalidTransactionId to
+ * TransactionIdPrecedes: it'll get weird about permanent xact IDs.  So,
+ * offset both xids by FirstNormalTransactionId to avoid that.
+ */
+static bool
+CommitTsPagePrecedes(int page1, int page2)
+{
+   TransactionId xid1;
+   TransactionId xid2;
+
+   xid1 = ((TransactionId) page1) * COMMITTS_XACTS_PER_PAGE;
+   xid1 += FirstNormalTransactionId;
+   xid2 = ((TransactionId) page2) * COMMITTS_XACTS_PER_PAGE;
+   xid2 += FirstNormalTransactionId;
+
+   return TransactionIdPrecedes(xid1, xid2);
+}
+
+
+/*
+ * Write a ZEROPAGE xlog record
+ */
+static void
+WriteZeroPageXlogRec(int pageno)
+{
+   XLogRecData rdata;
+
+   rdata.data = (char *) (&pageno);
+   rdata.len = sizeof(int);
+   rdata.buffer = InvalidBuffer;
+   rdata.next = NULL;
+   (void) XLogInsert(RM_COMMITTS_ID, COMMITTS_ZEROPAGE, &rdata);
+}
+
+/*
+ * Write a TRUNCATE xlog record
+ */
+static void
+WriteTruncateXlogRec(int pageno)
+{
+   XLogRecData rdata;
+   XLogRecPtr  recptr;
+
+   rdata.data = (char *) (&pageno);
+   rdata.len = sizeof(int);
+   rdata.buffer = InvalidBuffer;
+   rdata.next = NULL;
+   recptr = XLogInsert(RM_COMMITTS_ID, COMMITTS_TRUNCATE, &rdata);
+}
+
+/*
+ * Write a SETTS xlog record
+ */
+static void
+WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids,
+                        TransactionId *subxids, TimestampTz timestamp,
+                        CommitExtraData data)
+{
+   XLogRecData rdata;
+   XLogRecPtr  recptr;
+   xl_committs_set record;
+
+   record.timestamp = timestamp;
+   record.data = data;
+   record.mainxid = mainxid;
+   record.nsubxids = nsubxids;
+   memcpy(record.subxids, subxids, sizeof(TransactionId) * nsubxids);
+
+   rdata.data = (char *) &record;
+   rdata.len = offsetof(xl_committs_set, subxids) +
+       nsubxids * sizeof(TransactionId);
+   rdata.buffer = InvalidBuffer;
+   rdata.next = NULL;
+   recptr = XLogInsert(RM_COMMITTS_ID, COMMITTS_SETTS, &rdata);
+}
+
+
+/*
+ * CommitTS resource manager's routines
+ */
+void
+committs_redo(XLogRecPtr lsn, XLogRecord *record)
+{
+   uint8       info = record->xl_info & ~XLR_INFO_MASK;
+
+   /* Backup blocks are not used in committs records */
+   Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+
+   if (info == COMMITTS_ZEROPAGE)
+   {
+       int         pageno;
+       int         slotno;
+
+       memcpy(&pageno, XLogRecGetData(record), sizeof(int));
+
+       LWLockAcquire(CommitTsControlLock, LW_EXCLUSIVE);
+
+       slotno = ZeroCommitTsPage(pageno, false);
+       SimpleLruWritePage(CommitTsCtl, slotno);
+       Assert(!CommitTsCtl->shared->page_dirty[slotno]);
+
+       LWLockRelease(CommitTsControlLock);
+   }
+   else if (info == COMMITTS_TRUNCATE)
+   {
+       int         pageno;
+
+       memcpy(&pageno, XLogRecGetData(record), sizeof(int));
+
+       /*
+        * During XLOG replay, latest_page_number isn't set up yet; insert a
+        * suitable value to bypass the sanity test in SimpleLruTruncate.
+        */
+       CommitTsCtl->shared->latest_page_number = pageno;
+
+       SimpleLruTruncate(CommitTsCtl, pageno);
+   }
+   else if (info == COMMITTS_SETTS)
+   {
+       xl_committs_set *setts = (xl_committs_set *) XLogRecGetData(record);
+
+       TransactionTreeSetCommitTimestamp(setts->mainxid, setts->nsubxids,
+                                         setts->subxids, setts->timestamp,
+                                         setts->data, false);
+   }
+   else
+       elog(PANIC, "committs_redo: unknown op code %u", info);
+}
index 41d437932cdfd08a884da595876d0655b8adebf9..a69cb0bb85a76a45ffab66d94cbbf4069960e573 100644 (file)
@@ -8,6 +8,7 @@
 #include "postgres.h"
 
 #include "access/clog.h"
+#include "access/committs.h"
 #include "access/gin.h"
 #include "access/gist_private.h"
 #include "access/hash.h"
index 5a8f654fb736dc94e2fb08ccd8b0a33be44cede2..5e53593a8f2735803fbf8b9ca613dc37c9cdfbcf 100644 (file)
@@ -563,6 +563,50 @@ SimpleLruWritePage(SlruCtl ctl, int slotno)
    SlruInternalWritePage(ctl, slotno, NULL);
 }
 
+/*
+ * Return whether the given page exists on disk.
+ *
+ * A false return means that either the file does not exist, or that it's not
+ * large enough to contain the given page.
+ */
+bool
+SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
+{
+   int         segno = pageno / SLRU_PAGES_PER_SEGMENT;
+   int         rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
+   int         offset = rpageno * BLCKSZ;
+   char        path[MAXPGPATH];
+   int         fd;
+   bool        result;
+   off_t       endpos;
+
+   SlruFileName(ctl, path, segno);
+
+   fd = OpenTransientFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
+   if (fd < 0)
+   {
+       /* expected: file doesn't exist */
+       if (errno == ENOENT)
+           return false;
+
+       /* report error normally */
+       slru_errcause = SLRU_OPEN_FAILED;
+       slru_errno = errno;
+       SlruReportIOError(ctl, pageno, 0);
+   }
+
+   if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
+   {
+       slru_errcause = SLRU_OPEN_FAILED;
+       slru_errno = errno;
+       SlruReportIOError(ctl, pageno, 0);
+   }
+
+   result = endpos >= (off_t) (offset + BLCKSZ);
+
+   CloseTransientFile(fd);
+   return result;
+}
 
 /*
  * Physical read of a (previously existing) page into a buffer slot
index 0579c84bea20ff19eca87a88ccd6c943bae4f943..90d9295a99db89563ce4fd57107519c72c72ab37 100644 (file)
@@ -14,6 +14,7 @@
 #include "postgres.h"
 
 #include "access/clog.h"
+#include "access/committs.h"
 #include "access/subtrans.h"
 #include "access/transam.h"
 #include "access/xact.h"
@@ -157,9 +158,10 @@ GetNewTransactionId(bool isSubXact)
     * XID before we zero the page.  Fortunately, a page of the commit log
     * holds 32K or more transactions, so we don't have to do this very often.
     *
-    * Extend pg_subtrans too.
+    * Extend pg_subtrans and pg_committs too.
     */
    ExtendCLOG(xid);
+   ExtendCommitTs(xid);
    ExtendSUBTRANS(xid);
 
    /*
index e62286f9f98eccfd9a30e2e8f4908e8757b48672..9f58d975bc735a65062318807d4fbb3993bd29de 100644 (file)
@@ -20,6 +20,7 @@
 #include <time.h>
 #include <unistd.h>
 
+#include "access/committs.h"
 #include "access/multixact.h"
 #include "access/subtrans.h"
 #include "access/transam.h"
@@ -1117,6 +1118,9 @@ RecordTransactionCommit(void)
        }
    }
 
+   TransactionTreeSetCommitTimestamp(xid, nchildren, children,
+                                     xactStopTimestamp, 0, false);
+
    /*
     * Check if we want to commit asynchronously.  We can allow the XLOG flush
     * to happen asynchronously if synchronous_commit=off, or if the current
@@ -4563,6 +4567,7 @@ xactGetCommittedChildren(TransactionId **ptr)
  */
 static void
 xact_redo_commit_internal(TransactionId xid, XLogRecPtr lsn,
+                         TimestampTz commit_time,
                          TransactionId *sub_xids, int nsubxacts,
                          SharedInvalidationMessage *inval_msgs, int nmsgs,
                          RelFileNode *xnodes, int nrels,
@@ -4590,6 +4595,10 @@ xact_redo_commit_internal(TransactionId xid, XLogRecPtr lsn,
        LWLockRelease(XidGenLock);
    }
 
+   /* Set the transaction commit time */
+   TransactionTreeSetCommitTimestamp(xid, nsubxacts, sub_xids,
+                                     commit_time, 0, false);
+
    if (standbyState == STANDBY_DISABLED)
    {
        /*
@@ -4710,7 +4719,8 @@ xact_redo_commit(xl_xact_commit *xlrec,
    /* invalidation messages array follows subxids */
    inval_msgs = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
 
-   xact_redo_commit_internal(xid, lsn, subxacts, xlrec->nsubxacts,
+   xact_redo_commit_internal(xid, lsn, xlrec->xact_time,
+                             subxacts, xlrec->nsubxacts,
                              inval_msgs, xlrec->nmsgs,
                              xlrec->xnodes, xlrec->nrels,
                              xlrec->dbId,
@@ -4725,7 +4735,8 @@ static void
 xact_redo_commit_compact(xl_xact_commit_compact *xlrec,
                         TransactionId xid, XLogRecPtr lsn)
 {
-   xact_redo_commit_internal(xid, lsn, xlrec->subxacts, xlrec->nsubxacts,
+   xact_redo_commit_internal(xid, lsn, xlrec->xact_time,
+                             xlrec->subxacts, xlrec->nsubxacts,
                              NULL, 0,  /* inval msgs */
                              NULL, 0,  /* relfilenodes */
                              InvalidOid,       /* dbId */
index f7dd61c4c7538fe99cef58bf8e27be9a1ede0052..fc615dd2b9be675eaf7e73c5cf470a54dfbfa027 100644 (file)
@@ -22,6 +22,7 @@
 #include <unistd.h>
 
 #include "access/clog.h"
+#include "access/committs.h"
 #include "access/multixact.h"
 #include "access/subtrans.h"
 #include "access/timeline.h"
@@ -4046,6 +4047,7 @@ BootStrapXLOG(void)
    checkPoint.oldestXidDB = TemplateDbOid;
    checkPoint.oldestMulti = FirstMultiXactId;
    checkPoint.oldestMultiDB = TemplateDbOid;
+   checkPoint.oldestCommitTs = InvalidTransactionId;
    checkPoint.time = (pg_time_t) time(NULL);
    checkPoint.oldestActiveXid = InvalidTransactionId;
 
@@ -4055,6 +4057,7 @@ BootStrapXLOG(void)
    MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
    SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
    SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
+   SetCommitTsLimit(InvalidTransactionId);
 
    /* Set up the XLOG page header */
    page->xlp_magic = XLOG_PAGE_MAGIC;
@@ -4134,6 +4137,7 @@ BootStrapXLOG(void)
 
    /* Bootstrap the commit log, too */
    BootStrapCLOG();
+   BootStrapCommitTs();
    BootStrapSUBTRANS();
    BootStrapMultiXact();
 
@@ -5186,6 +5190,9 @@ StartupXLOG(void)
    ereport(DEBUG1,
            (errmsg("oldest MultiXactId: %u, in database %u",
                    checkPoint.oldestMulti, checkPoint.oldestMultiDB)));
+   ereport(DEBUG1,
+           (errmsg("oldest CommitTs Xid: %u",
+                   checkPoint.oldestCommitTs)));
    if (!TransactionIdIsNormal(checkPoint.nextXid))
        ereport(PANIC,
                (errmsg("invalid next transaction ID")));
@@ -5197,6 +5204,7 @@ StartupXLOG(void)
    MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
    SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
    SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
+   SetCommitTsLimit(checkPoint.oldestCommitTs);
    XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
    XLogCtl->ckptXid = checkPoint.nextXid;
 
@@ -5397,10 +5405,12 @@ StartupXLOG(void)
            Assert(TransactionIdIsValid(oldestActiveXID));
 
            /*
-            * Startup commit log and subtrans only. Other SLRUs are not
-            * maintained during recovery and need not be started yet.
+            * Startup commit log, commit timestamp, and subtrans only. Other
+            * SLRUs are not maintained during recovery and need not be started
+            * yet.
             */
            StartupCLOG();
+           StartupCommitTs();
            StartupSUBTRANS(oldestActiveXID);
 
            /*
@@ -6043,6 +6053,7 @@ StartupXLOG(void)
    if (standbyState == STANDBY_DISABLED)
    {
        StartupCLOG();
+       StartupCommitTs();
        StartupSUBTRANS(oldestActiveXID);
    }
 
@@ -6603,6 +6614,7 @@ ShutdownXLOG(int code, Datum arg)
        CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
    }
    ShutdownCLOG();
+   ShutdownCommitTs();
    ShutdownSUBTRANS();
    ShutdownMultiXact();
 
@@ -6997,6 +7009,10 @@ CreateCheckPoint(int flags)
    checkPoint.oldestXidDB = ShmemVariableCache->oldestXidDB;
    LWLockRelease(XidGenLock);
 
+   LWLockAcquire(CommitTsControlLock, LW_SHARED);
+   checkPoint.oldestCommitTs = ShmemVariableCache->oldestCommitTs;
+   LWLockRelease(CommitTsControlLock);
+
    /* Increase XID epoch if we've wrapped around since last checkpoint */
    checkPoint.nextXidEpoch = ControlFile->checkPointCopy.nextXidEpoch;
    if (checkPoint.nextXid < ControlFile->checkPointCopy.nextXid)
@@ -7237,6 +7253,7 @@ static void
 CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
 {
    CheckPointCLOG();
+   CheckPointCommitTs();
    CheckPointSUBTRANS();
    CheckPointMultiXact();
    CheckPointPredicate();
index c984488e034f651c2db833768186f23ff7132377..2b1688435e3ab0ff1c4a5c0dd0a9fe8314603255 100644 (file)
@@ -23,6 +23,7 @@
 #include <math.h>
 
 #include "access/clog.h"
+#include "access/committs.h"
 #include "access/genam.h"
 #include "access/heapam.h"
 #include "access/htup_details.h"
@@ -894,8 +895,9 @@ vac_truncate_clog(TransactionId frozenXID, MultiXactId frozenMulti)
        return;
    }
 
-   /* Truncate CLOG and Multi to the oldest computed value */
+   /* Truncate CLOG, CommitTs and Multi to the oldest computed value */
    TruncateCLOG(frozenXID);
+   TruncateCommitTs(frozenXID);
    TruncateMultiXact(frozenMulti);
 
    /*
@@ -906,6 +908,7 @@ vac_truncate_clog(TransactionId frozenXID, MultiXactId frozenMulti)
     */
    SetTransactionIdLimit(frozenXID, oldestxid_datoid);
    MultiXactAdvanceOldest(frozenMulti, oldestmulti_datoid);
+   SetCommitTsLimit(frozenXID);
 }
 
 
index b34ba44712755fe8c51101e13b67b9f0920abce5..8c9060794bfff9a227450515eb04b63f99dc42fa 100644 (file)
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include "access/clog.h"
+#include "access/committs.h"
 #include "access/heapam.h"
 #include "access/multixact.h"
 #include "access/nbtree.h"
@@ -111,6 +112,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
        size = add_size(size, ProcGlobalShmemSize());
        size = add_size(size, XLOGShmemSize());
        size = add_size(size, CLOGShmemSize());
+       size = add_size(size, CommitTsShmemSize());
        size = add_size(size, SUBTRANSShmemSize());
        size = add_size(size, TwoPhaseShmemSize());
        size = add_size(size, MultiXactShmemSize());
@@ -192,6 +194,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
     */
    XLOGShmemInit();
    CLOGShmemInit();
+   CommitTsShmemInit();
    SUBTRANSShmemInit();
    MultiXactShmemInit();
    InitBufferPool();
index 4308128c7fdd5e276e40c8e649c544f89a98e999..09c0af13f3ef551151a8d0b476adf45b7e52bbe2 100644 (file)
@@ -46,6 +46,7 @@
 #include <signal.h>
 
 #include "access/clog.h"
+#include "access/committs.h"
 #include "access/subtrans.h"
 #include "access/transam.h"
 #include "access/xact.h"
@@ -2653,6 +2654,7 @@ RecordKnownAssignedTransactionIds(TransactionId xid)
        while (TransactionIdPrecedesOrEquals(next_expected_xid, xid))
        {
            ExtendCLOG(next_expected_xid);
+           ExtendCommitTs(next_expected_xid);
            ExtendSUBTRANS(next_expected_xid);
 
            TransactionIdAdvance(next_expected_xid);
index 4f88d3f12251bd7d57e10a2ddd872f12f281ed26..14626bead5e30cbf02430d153098525ac18aa064 100644 (file)
@@ -22,6 +22,7 @@
 #include "postgres.h"
 
 #include "access/clog.h"
+#include "access/committs.h"
 #include "access/multixact.h"
 #include "access/subtrans.h"
 #include "commands/async.h"
@@ -191,6 +192,9 @@ NumLWLocks(void)
    /* clog.c needs one per CLOG buffer */
    numLocks += CLOGShmemBuffers();
 
+   /* committs.c needs one per CommitTs buffer */
+   numLocks += CommitTsShmemBuffers();
+
    /* subtrans.c needs one per SubTrans buffer */
    numLocks += NUM_SUBTRANS_BUFFERS;
 
index 22ba35fef93fda2d13f0058cee1085ea6f3e416b..7aeca667130980b6874c0f03bab5864a100e8251 100644 (file)
@@ -26,6 +26,7 @@
 #include <syslog.h>
 #endif
 
+#include "access/committs.h"
 #include "access/gin.h"
 #include "access/transam.h"
 #include "access/twophase.h"
@@ -787,6 +788,15 @@ static struct config_bool ConfigureNamesBool[] =
        false,
        check_bonjour, NULL, NULL
    },
+   {
+       {"track_commit_timestamp", PGC_POSTMASTER, REPLICATION,
+           gettext_noop("Collects transaction commit time."),
+           NULL
+       },
+       &commit_ts_enabled,
+       false,
+       NULL, NULL, NULL
+   },
    {
        {"ssl", PGC_POSTMASTER, CONN_AUTH_SECURITY,
            gettext_noop("Enables SSL connections."),
index 307b456f0350b0dde25754dd0a2bfe9cdc8d79bc..402a89cd536f8a5f26bcdaacc862bec1f7139209 100644 (file)
 #wal_keep_segments = 0     # in logfile segments, 16MB each; 0 disables
 #wal_sender_timeout = 60s  # in milliseconds; 0 disables
 
+#track_commit_timestamp = off  # collect timestamp of transaction commit
+               # (change requires restart)
+
 # - Master Server -
 
 # These settings are ignored on a standby server.
index f9b3492ef3194c7a12214f145e8f678046618d9e..65dbdb4be374fd5942cc26b00f4dcd027ca4f406 100644 (file)
@@ -182,6 +182,7 @@ const char *subdirs[] = {
    "pg_xlog",
    "pg_xlog/archive_status",
    "pg_clog",
+   "pg_committs",
    "pg_notify",
    "pg_serial",
    "pg_snapshots",
index a790f99cb51d3649d8bbd2f32ab0065470bccda8..77ef2a2c4f29e7d3eb9f573c202ac6b667facba4 100644 (file)
@@ -238,6 +238,8 @@ main(int argc, char *argv[])
           ControlFile.checkPointCopy.oldestMulti);
    printf(_("Latest checkpoint's oldestMulti's DB: %u\n"),
           ControlFile.checkPointCopy.oldestMultiDB);
+   printf(_("Latest checkpoint's oldestCommitTs:   %u\n"),
+          ControlFile.checkPointCopy.oldestCommitTs);
    printf(_("Time of latest checkpoint:            %s\n"),
           ckpttime_str);
    printf(_("Fake LSN counter for unlogged rels:   %X/%X\n"),
diff --git a/src/include/access/committs.h b/src/include/access/committs.h
new file mode 100644 (file)
index 0000000..089cffc
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * committs.h
+ *
+ * PostgreSQL commit timestamp manager
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/committs.h
+ */
+#ifndef COMMITTS_H
+#define COMMITTS_H
+
+#include "access/xlog.h"
+#include "datatype/timestamp.h"
+
+
+extern PGDLLIMPORT bool    commit_ts_enabled;
+
+typedef uint32 CommitExtraData;
+
+extern void TransactionTreeSetCommitTimestamp(TransactionId xid, int nsubxids,
+                                 TransactionId *subxids,
+                                 TimestampTz timestamp,
+                                 CommitExtraData data,
+                                 bool do_xlog);
+extern TimestampTz TransactionIdGetCommitTimestamp(TransactionId xid);
+extern CommitExtraData TransactionIdGetCommitData(TransactionId xid);
+extern void TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
+                            CommitExtraData *data);
+
+extern Size CommitTsShmemBuffers(void);
+extern Size CommitTsShmemSize(void);
+extern void CommitTsShmemInit(void);
+extern void BootStrapCommitTs(void);
+extern void StartupCommitTs(void);
+extern void ShutdownCommitTs(void);
+extern void CheckPointCommitTs(void);
+extern void ExtendCommitTs(TransactionId newestXact);
+extern void TruncateCommitTs(TransactionId oldestXact);
+extern void SetCommitTsLimit(TransactionId oldestXact);
+
+/* XLOG stuff */
+#define COMMITTS_ZEROPAGE      0x00
+#define COMMITTS_TRUNCATE      0x10
+#define COMMITTS_SETTS         0x20
+
+typedef struct xl_committs_set
+{
+   TimestampTz     timestamp;
+   CommitExtraData data;
+   TransactionId   mainxid;
+   int             nsubxids;
+   TransactionId   subxids[FLEXIBLE_ARRAY_MEMBER];
+} xl_committs_set;
+
+
+extern void committs_redo(XLogRecPtr lsn, XLogRecord *record);
+extern void committs_desc(StringInfo buf, uint8 xl_info, char *rec);
+
+#endif   /* COMMITTS_H */
index 7ad71b32e2c7fdd2391eea8a768ce943304474b2..3c43798629e7246ef09c43f32cb7ca10062316e8 100644 (file)
@@ -42,3 +42,4 @@ PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup
 PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup, NULL)
 PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, NULL, NULL, NULL)
 PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_xlog_startup, spg_xlog_cleanup, NULL)
+PG_RMGR(RM_COMMITTS_ID, "CommitTs", committs_redo, committs_desc, NULL, NULL, NULL)
index 29ae9e0e5c1955d5103a48668b29dbc0682d699c..7e81e0f1135fbe6fc6817f00dcc8bed9bb9e8753 100644 (file)
@@ -145,6 +145,7 @@ extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno,
 extern void SimpleLruWritePage(SlruCtl ctl, int slotno);
 extern void SimpleLruFlush(SlruCtl ctl, bool checkpoint);
 extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage);
+extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno);
 
 typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage,
                                              void *data);
index 23a41fd585bed0947ea6ec965a153a7e096b14d7..bdc5d82bcffb5c812ceacd89ad2977236a356f35 100644 (file)
@@ -118,6 +118,11 @@ typedef struct VariableCacheData
    TransactionId xidWrapLimit; /* where the world ends */
    Oid         oldestXidDB;    /* database with minimum datfrozenxid */
 
+   /*
+    * These fields are protected by CommitTsControlLock
+    */
+   TransactionId oldestCommitTs;
+
    /*
     * These fields are protected by ProcArrayLock.
     */
index bf3c1bcc459b7d2064ce44b7398466a6a0917b50..057bd4867e3bd6e7a624fade7ae8bdc5a8ccaab0 100644 (file)
@@ -46,6 +46,7 @@ typedef struct CheckPoint
    MultiXactId oldestMulti;    /* cluster-wide minimum datminmxid */
    Oid         oldestMultiDB;  /* database with minimum datminmxid */
    pg_time_t   time;           /* time stamp of checkpoint */
+   TransactionId oldestCommitTs; /* oldest Xid with valid commit timestamp */
 
    /*
     * Oldest XID still running. This is only needed to initialize hot standby
index feecbf96959e568f0a3d31274f0a8741dc14e0bb..cc45be1ffd5f89ebf7d377c32495dc5abf74c26d 100644 (file)
@@ -2912,6 +2912,13 @@ DESCR("view two-phase transactions");
 DATA(insert OID = 3819 (  pg_get_multixact_members PGNSP PGUID 12 1 1000 0 0 f f f f t t v 1 0 2249 "28" "{28,28,25}" "{i,o,o}" "{multixid,xid,mode}" _null_ pg_get_multixact_members _null_ _null_ _null_ ));
 DESCR("view members of a multixactid");
 
+DATA(insert OID = 3461 ( pg_get_transaction_committime PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 1184 "28" _null_ _null_ _null_ _null_ pg_get_transaction_committime _null_ _null_ _null_ ));
+DESCR("get commit time of transaction");
+
+DATA(insert OID = 3462 ( pg_get_transaction_extradata PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 23 "28" _null_ _null_ _null_ _null_ pg_get_transaction_extradata _null_ _null_ _null_ ));
+
+DATA(insert OID = 3463 ( pg_get_transaction_committime_data PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 2249 "28" "{28,1184,23}" "{i,o,o}" "{xid,committime,extradata}" _null_ pg_get_transaction_committime_data _null_ _null_ _null_ ));
+
 DATA(insert OID = 3537 (  pg_describe_object       PGNSP PGUID 12 1 0 0 0 f f f f t f s 3 0 25 "26 26 23" _null_ _null_ _null_ _null_ pg_describe_object _null_ _null_ _null_ ));
 DESCR("get identification of SQL object");
 
index d8f7e9d64a080da690826edcc581659bf27aff47..c14f9328a5832eafcad4a275a1db843cee746005 100644 (file)
@@ -59,6 +59,7 @@ typedef enum LWLockId
    CheckpointLock,
    CLogControlLock,
    SubtransControlLock,
+   CommitTsControlLock,
    MultiXactGenLock,
    MultiXactOffsetControlLock,
    MultiXactMemberControlLock,
index 15b60abfcd9360730ff86fe85a6918599e5546e3..de0ed33cf05c3574b9f02c3b971ec71645a573e1 100644 (file)
@@ -1143,6 +1143,11 @@ extern Datum pg_prepared_xact(PG_FUNCTION_ARGS);
 /* access/transam/multixact.c */
 extern Datum pg_get_multixact_members(PG_FUNCTION_ARGS);
 
+/* access/transam/committs.c */
+extern Datum pg_get_transaction_committime(PG_FUNCTION_ARGS);
+extern Datum pg_get_transaction_extradata(PG_FUNCTION_ARGS);
+extern Datum pg_get_transaction_committime_data(PG_FUNCTION_ARGS);
+
 /* catalogs/dependency.c */
 extern Datum pg_describe_object(PG_FUNCTION_ARGS);
 extern Datum pg_identify_object(PG_FUNCTION_ARGS);