For inplace update, send nontransactional invalidations.

author Noah Misch <[email protected]>

Wed, 17 Dec 2025 00:13:54 +0000 (16:13 -0800)

committer Noah Misch <[email protected]>

Wed, 17 Dec 2025 00:13:54 +0000 (16:13 -0800)
author Noah Misch <[email protected]>
Wed, 17 Dec 2025 00:13:54 +0000 (16:13 -0800)
committer Noah Misch <[email protected]>
Wed, 17 Dec 2025 00:13:54 +0000 (16:13 -0800)
diff --git a/src/backend/access/heap/README.tuplock b/src/backend/access/heap/README.tuplock

index 750684d33989c090dbcc8e6bae6d44c23be80d12..ad835ff4820afef2c453c712c4b2381655ee6d04 100644 (file)
--- a/src/backend/access/heap/README.tuplock
+++ b/src/backend/access/heap/README.tuplock
@@ -201,3 +201,35 @@ wider than four bytes, and current readers don't need consistency across
  fields.  Hence, they get by with just fetching each field once.  XXX such a
  caller may also read a value that has not reached WAL; see
  systable_inplace_update_finish().
+
+During logical decoding, caches reflect an inplace update no later than the
+next XLOG_XACT_INVALIDATIONS.  That record witnesses the end of a command.
+Tuples of its cmin are then visible to decoding, as are inplace updates of any
+lower LSN.  Inplace updates of a higher LSN may also be visible, even if those
+updates would have been invisible to a non-historic snapshot matching
+decoding's historic snapshot.  (In other words, decoding may see inplace
+updates that were not visible to a similar snapshot taken during original
+transaction processing.)  That's a consequence of inplace update violating
+MVCC: there are no snapshot-specific versions of inplace-updated values.  This
+all makes it hard to reason about inplace-updated column reads during logical
+decoding, but the behavior does suffice for relhasindex.  A relhasindex=t in
+CREATE INDEX becomes visible no later than the new pg_index row.  While it may
+be visible earlier, that's harmless.  Finding zero indexes despite
+relhasindex=t is normal in more cases than this, e.g. after DROP INDEX.
+Example of a case that meaningfully reacts to the inplace inval:
+
+CREATE TABLE cat (c int) WITH (user_catalog_table = true);
+CREATE TABLE normal (d int);
+...
+CREATE INDEX ON cat (c)\; INSERT INTO normal VALUES (1);
+
+If the output plugin reads "cat" during decoding of the INSERT, it's fair to
+want that read to see relhasindex=t and use the new index.
+
+An alternative would be to have decoding of XLOG_HEAP_INPLACE immediately
+execute its invals.  That would behave more like invals during original
+transaction processing.  It would remove the decoding-specific delay in e.g. a
+decoding plugin witnessing a relfrozenxid change.  However, a good use case
+for that is unlikely, since the plugin would still witness relfrozenxid
+changes prematurely.  Hence, inplace update takes the trivial approach of
+delegating to XLOG_XACT_INVALIDATIONS.
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c

index cce38f482bd693df541bbbaebba210840ab652fa..731d504489bc4302c2d1b324e28a4f7dce22ec57 100644 (file)
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -6320,6 +6320,19 @@ heap_inplace_lock(Relation relation,
  
     Assert(BufferIsValid(buffer));
  
+   /*
+    * Register shared cache invals if necessary.  Other sessions may finish
+    * inplace updates of this tuple between this step and LockTuple().  Since
+    * inplace updates don't change cache keys, that's harmless.
+    *
+    * While it's tempting to register invals only after confirming we can
+    * return true, the following obstacle precludes reordering steps that
+    * way.  Registering invals might reach a CatalogCacheInitializeCache()
+    * that locks "buffer".  That would hang indefinitely if running after our
+    * own LockBuffer().  Hence, we must register invals before LockBuffer().
+    */
+   CacheInvalidateHeapTupleInplace(relation, oldtup_ptr);
+
     LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
     LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
  
@@ -6415,6 +6428,7 @@ heap_inplace_lock(Relation relation,
     if (!ret)
     {
         UnlockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
+       ForgetInplace_Inval();
         InvalidateCatalogSnapshot();
     }
     return ret;
@@ -6443,6 +6457,16 @@ heap_inplace_update_and_unlock(Relation relation,
     if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
         elog(ERROR, "wrong tuple length");
  
+   /*
+    * Unlink relcache init files as needed.  If unlinking, acquire
+    * RelCacheInitLock until after associated invalidations.  By doing this
+    * in advance, if we checkpoint and then crash between inplace
+    * XLogInsert() and inval, we don't rely on StartupXLOG() ->
+    * RelationCacheInitFileRemove().  That uses elevel==LOG, so replay would
+    * neglect to PANIC on EIO.
+    */
+   PreInplace_Inval();
+
     /* NO EREPORT(ERROR) from here till changes are logged */
     START_CRIT_SECTION();
  
@@ -6486,17 +6510,24 @@ heap_inplace_update_and_unlock(Relation relation,
         PageSetLSN(BufferGetPage(buffer), recptr);
     }
  
+   LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+
+   /*
+    * Send invalidations to shared queue.  SearchSysCacheLocked1() assumes we
+    * do this before UnlockTuple().
+    */
+   AtInplace_Inval();
+
     END_CRIT_SECTION();
+   UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
  
-   heap_inplace_unlock(relation, oldtup, buffer);
+   AcceptInvalidationMessages();   /* local processing of just-sent inval */
  
     /*
-    * Send out shared cache inval if necessary.  Note that because we only
-    * pass the new version of the tuple, this mustn't be used for any
-    * operations that could change catcache lookup keys.  But we aren't
-    * bothering with index updates either, so that's true a fortiori.
-    *
-    * XXX ROLLBACK discards the invalidation.  See test inplace-inval.spec.
+    * Queue a transactional inval, for logical decoding and for third-party
+    * code that might have been relying on it since long before inplace
+    * update adopted immediate invalidation.  See README.tuplock section
+    * "Reading inplace-updated columns" for logical decoding details.
      */
     if (!IsBootstrapProcessingMode())
         CacheInvalidateHeapTuple(relation, tuple, NULL);
@@ -6511,6 +6542,7 @@ heap_inplace_unlock(Relation relation,
  {
     LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
     UnlockTuple(relation, &oldtup->t_self, InplaceUpdateTupleLock);
+   ForgetInplace_Inval();
  }
  
  /*
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c

index 4cecf63006043bbc9ad35633f24173e2961bfceb..053a200d9cb1fb2e18600583ed88df510c3582d5 100644 (file)
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -1358,14 +1358,24 @@ RecordTransactionCommit(void)
  
         /*
          * Transactions without an assigned xid can contain invalidation
-        * messages (e.g. explicit relcache invalidations or catcache
-        * invalidations for inplace updates); standbys need to process those.
-        * We can't emit a commit record without an xid, and we don't want to
-        * force assigning an xid, because that'd be problematic for e.g.
-        * vacuum.  Hence we emit a bespoke record for the invalidations. We
-        * don't want to use that in case a commit record is emitted, so they
-        * happen synchronously with commits (besides not wanting to emit more
-        * WAL records).
+        * messages.  While inplace updates do this, this is not known to be
+        * necessary; see comment at inplace CacheInvalidateHeapTuple().
+        * Extensions might still rely on this capability, and standbys may
+        * need to process those invals.  We can't emit a commit record
+        * without an xid, and we don't want to force assigning an xid,
+        * because that'd be problematic for e.g. vacuum.  Hence we emit a
+        * bespoke record for the invalidations. We don't want to use that in
+        * case a commit record is emitted, so they happen synchronously with
+        * commits (besides not wanting to emit more WAL records).
+        *
+        * XXX Every known use of this capability is a defect.  Since an XID
+        * isn't controlling visibility of the change that prompted invals,
+        * other sessions need the inval even if this transactions aborts.
+        *
+        * ON COMMIT DELETE ROWS does a nontransactional index_build(), which
+        * queues a relcache inval, including in transactions without an xid
+        * that had read the (empty) table.  Standbys don't need any ON COMMIT
+        * DELETE ROWS invals, but we've not done the work to withhold them.
          */
         if (nmsgs != 0)
         {
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c

index abd8eef08651a25b53615a31e2c4fab9ee8f0a08..b625e8a3907ee2ebda00366c8f778567aec09105 100644 (file)
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -2905,12 +2905,19 @@ index_update_stats(Relation rel,
     if (dirty)
     {
         systable_inplace_update_finish(state, tuple);
-       /* the above sends a cache inval message */
+       /* the above sends transactional and immediate cache inval messages */
     }
     else
     {
         systable_inplace_update_cancel(state);
-       /* no need to change tuple, but force relcache inval anyway */
+
+       /*
+        * While we didn't change relhasindex, CREATE INDEX needs a
+        * transactional inval for when the new index's catalog rows become
+        * visible.  Other CREATE INDEX and REINDEX code happens to also queue
+        * this inval, but keep this in case rare callers rely on this part of
+        * our API contract.
+        */
         CacheInvalidateRelcacheByTuple(tuple);
     }
  
diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c

index 72f975a736322f91c11eb95c68b77f6298c6da16..a87b77e0b1baee18b5e1af97768ce4809f5fb992 100644 (file)
--- a/src/backend/commands/event_trigger.c
+++ b/src/backend/commands/event_trigger.c
@@ -979,11 +979,6 @@ EventTriggerOnLogin(void)
                  * this instead of regular updates serves two purposes. First,
                  * that avoids possible waiting on the row-level lock. Second,
                  * that avoids dealing with TOAST.
-                *
-                * Changes made by inplace update may be lost due to
-                * concurrent normal updates; see inplace-inval.spec. However,
-                * we are OK with that.  The subsequent connections will still
-                * have a chance to set "dathasloginevt" to false.
                  */
                 systable_inplace_update_finish(state, tuple);
             }
diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c

index 4911e98ec2d2ec43a0ee55ec916bafc01ca6e4b0..02d4e4fd8eefa84abb23287096afac2b8e67c73c 100644 (file)
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -521,20 +521,13 @@ heap_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
             /*
              * Inplace updates are only ever performed on catalog tuples and
              * can, per definition, not change tuple visibility.  Since we
-            * don't decode catalog tuples, we're not interested in the
+            * also don't decode catalog tuples, we're not interested in the
              * record's contents.
-            *
-            * In-place updates can be used either by XID-bearing transactions
-            * (e.g.  in CREATE INDEX CONCURRENTLY) or by XID-less
-            * transactions (e.g.  VACUUM).  In the former case, the commit
-            * record will include cache invalidations, so we mark the
-            * transaction as catalog modifying here. Currently that's
-            * redundant because the commit will do that as well, but once we
-            * support decoding in-progress relations, this will be important.
              */
             if (!TransactionIdIsValid(xid))
                 break;
  
+           /* PostgreSQL 13 was the last to need these actions. */
             (void) SnapBuildProcessChange(builder, xid, buf->origptr);
             ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
             break;
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c

index 59d3825917b819c907406db4dd21572b0589f7fb..5169ca72bc087c0214f92598eec443b2a4430f32 100644 (file)
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -2356,7 +2356,8 @@ void
  PrepareToInvalidateCacheTuple(Relation relation,
                               HeapTuple tuple,
                               HeapTuple newtuple,
-                             void (*function) (int, uint32, Oid))
+                             void (*function) (int, uint32, Oid, void *),
+                             void *context)
  {
     slist_iter  iter;
     Oid         reloid;
@@ -2397,7 +2398,7 @@ PrepareToInvalidateCacheTuple(Relation relation,
         hashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, tuple);
         dbid = ccp->cc_relisshared ? (Oid) 0 : MyDatabaseId;
  
-       (*function) (ccp->id, hashvalue, dbid);
+       (*function) (ccp->id, hashvalue, dbid, context);
  
         if (newtuple)
         {
@@ -2406,7 +2407,7 @@ PrepareToInvalidateCacheTuple(Relation relation,
             newhashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, newtuple);
  
             if (newhashvalue != hashvalue)
-               (*function) (ccp->id, newhashvalue, dbid);
+               (*function) (ccp->id, newhashvalue, dbid, context);
         }
     }
  }
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c

index 66e04f973f67565138c835092a8ecdbd3a999436..4c8715bf6721925f21f23b656e1ba7adb17799ea 100644 (file)
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -94,6 +94,10 @@
   * worth trying to avoid sending such inval traffic in the future, if those
   * problems can be overcome cheaply.
   *
+ * When making a nontransactional change to a cacheable object, we must
+ * likewise send the invalidation immediately, before ending the change's
+ * critical section.  This includes inplace heap updates, relmap, and smgr.
+ *
   * When wal_level=logical, write invalidations into WAL at each command end to
   * support the decoding of the in-progress transactions.  See
   * CommandEndInvalidationMessages.
@@ -131,13 +135,15 @@
  
  /*
   * Pending requests are stored as ready-to-send SharedInvalidationMessages.
- * We keep the messages themselves in arrays in TopTransactionContext
- * (there are separate arrays for catcache and relcache messages).  Control
- * information is kept in a chain of TransInvalidationInfo structs, also
- * allocated in TopTransactionContext.  (We could keep a subtransaction's
- * TransInvalidationInfo in its CurTransactionContext; but that's more
- * wasteful not less so, since in very many scenarios it'd be the only
- * allocation in the subtransaction's CurTransactionContext.)
+ * We keep the messages themselves in arrays in TopTransactionContext (there
+ * are separate arrays for catcache and relcache messages).  For transactional
+ * messages, control information is kept in a chain of TransInvalidationInfo
+ * structs, also allocated in TopTransactionContext.  (We could keep a
+ * subtransaction's TransInvalidationInfo in its CurTransactionContext; but
+ * that's more wasteful not less so, since in very many scenarios it'd be the
+ * only allocation in the subtransaction's CurTransactionContext.)  For
+ * inplace update messages, control information appears in an
+ * InvalidationInfo, allocated in CurrentMemoryContext.
   *
   * We can store the message arrays densely, and yet avoid moving data around
   * within an array, because within any one subtransaction we need only
@@ -148,7 +154,9 @@
   * struct.  Similarly, we need distinguish messages of prior subtransactions
   * from those of the current subtransaction only until the subtransaction
   * completes, after which we adjust the array indexes in the parent's
- * TransInvalidationInfo to include the subtransaction's messages.
+ * TransInvalidationInfo to include the subtransaction's messages.  Inplace
+ * invalidations don't need a concept of command or subtransaction boundaries,
+ * since we send them during the WAL insertion critical section.
   *
   * The ordering of the individual messages within a command's or
   * subtransaction's output is not considered significant, although this
@@ -201,7 +209,7 @@ typedef struct InvalidationMsgsGroup
  
  
  /*----------------
- * Invalidation messages are divided into two groups:
+ * Transactional invalidation messages are divided into two groups:
   * 1) events so far in current command, not yet reflected to caches.
   * 2) events in previous commands of current transaction; these have
   *    been reflected to local caches, and must be either broadcast to
@@ -217,26 +225,36 @@ typedef struct InvalidationMsgsGroup
   *----------------
   */
  
-typedef struct TransInvalidationInfo
+/* fields common to both transactional and inplace invalidation */
+typedef struct InvalidationInfo
  {
-   /* Back link to parent transaction's info */
-   struct TransInvalidationInfo *parent;
-
-   /* Subtransaction nesting depth */
-   int         my_level;
-
     /* Events emitted by current command */
     InvalidationMsgsGroup CurrentCmdInvalidMsgs;
  
+   /* init file must be invalidated? */
+   bool        RelcacheInitFileInval;
+} InvalidationInfo;
+
+/* subclass adding fields specific to transactional invalidation */
+typedef struct TransInvalidationInfo
+{
+   /* Base class */
+   struct InvalidationInfo ii;
+
     /* Events emitted by previous commands of this (sub)transaction */
     InvalidationMsgsGroup PriorCmdInvalidMsgs;
  
-   /* init file must be invalidated? */
-   bool        RelcacheInitFileInval;
+   /* Back link to parent transaction's info */
+   struct TransInvalidationInfo *parent;
+
+   /* Subtransaction nesting depth */
+   int         my_level;
  } TransInvalidationInfo;
  
  static TransInvalidationInfo *transInvalInfo = NULL;
  
+static InvalidationInfo *inplaceInvalInfo = NULL;
+
  /* GUC storage */
  int            debug_discard_caches = 0;
  
@@ -544,9 +562,12 @@ ProcessInvalidationMessagesMulti(InvalidationMsgsGroup *group,
  static void
  RegisterCatcacheInvalidation(int cacheId,
                              uint32 hashValue,
-                            Oid dbId)
+                            Oid dbId,
+                            void *context)
  {
-   AddCatcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
+   InvalidationInfo *info = (InvalidationInfo *) context;
+
+   AddCatcacheInvalidationMessage(&info->CurrentCmdInvalidMsgs,
                                    cacheId, hashValue, dbId);
  }
  
@@ -556,10 +577,9 @@ RegisterCatcacheInvalidation(int cacheId,
   * Register an invalidation event for all catcache entries from a catalog.
   */
  static void
-RegisterCatalogInvalidation(Oid dbId, Oid catId)
+RegisterCatalogInvalidation(InvalidationInfo *info, Oid dbId, Oid catId)
  {
-   AddCatalogInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
-                                 dbId, catId);
+   AddCatalogInvalidationMessage(&info->CurrentCmdInvalidMsgs, dbId, catId);
  }
  
  /*
@@ -568,10 +588,9 @@ RegisterCatalogInvalidation(Oid dbId, Oid catId)
   * As above, but register a relcache invalidation event.
   */
  static void
-RegisterRelcacheInvalidation(Oid dbId, Oid relId)
+RegisterRelcacheInvalidation(InvalidationInfo *info, Oid dbId, Oid relId)
  {
-   AddRelcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
-                                  dbId, relId);
+   AddRelcacheInvalidationMessage(&info->CurrentCmdInvalidMsgs, dbId, relId);
  
     /*
      * Most of the time, relcache invalidation is associated with system
@@ -588,7 +607,7 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId)
      * as well.  Also zap when we are invalidating whole relcache.
      */
     if (relId == InvalidOid || RelationIdIsInInitFile(relId))
-       transInvalInfo->RelcacheInitFileInval = true;
+       info->RelcacheInitFileInval = true;
  }
  
  /*
@@ -598,24 +617,27 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId)
   * Only needed for catalogs that don't have catcaches.
   */
  static void
-RegisterSnapshotInvalidation(Oid dbId, Oid relId)
+RegisterSnapshotInvalidation(InvalidationInfo *info, Oid dbId, Oid relId)
  {
-   AddSnapshotInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
-                                  dbId, relId);
+   AddSnapshotInvalidationMessage(&info->CurrentCmdInvalidMsgs, dbId, relId);
  }
  
  /*
   * PrepareInvalidationState
   *     Initialize inval data for the current (sub)transaction.
   */
-static void
+static InvalidationInfo *
  PrepareInvalidationState(void)
  {
     TransInvalidationInfo *myInfo;
  
+   Assert(IsTransactionState());
+   /* Can't queue transactional message while collecting inplace messages. */
+   Assert(inplaceInvalInfo == NULL);
+
     if (transInvalInfo != NULL &&
         transInvalInfo->my_level == GetCurrentTransactionNestLevel())
-       return;
+       return (InvalidationInfo *) transInvalInfo;
  
     myInfo = (TransInvalidationInfo *)
         MemoryContextAllocZero(TopTransactionContext,
@@ -638,7 +660,7 @@ PrepareInvalidationState(void)
          * counter.  This is a convenient place to check for that, as well as
          * being important to keep management of the message arrays simple.
          */
-       if (NumMessagesInGroup(&transInvalInfo->CurrentCmdInvalidMsgs) != 0)
+       if (NumMessagesInGroup(&transInvalInfo->ii.CurrentCmdInvalidMsgs) != 0)
             elog(ERROR, "cannot start a subtransaction when there are unprocessed inval messages");
  
         /*
@@ -647,8 +669,8 @@ PrepareInvalidationState(void)
          * to update them to follow whatever is already in the arrays.
          */
         SetGroupToFollow(&myInfo->PriorCmdInvalidMsgs,
-                        &transInvalInfo->CurrentCmdInvalidMsgs);
-       SetGroupToFollow(&myInfo->CurrentCmdInvalidMsgs,
+                        &transInvalInfo->ii.CurrentCmdInvalidMsgs);
+       SetGroupToFollow(&myInfo->ii.CurrentCmdInvalidMsgs,
                          &myInfo->PriorCmdInvalidMsgs);
     }
     else
@@ -664,6 +686,41 @@ PrepareInvalidationState(void)
     }
  
     transInvalInfo = myInfo;
+   return (InvalidationInfo *) myInfo;
+}
+
+/*
+ * PrepareInplaceInvalidationState
+ *     Initialize inval data for an inplace update.
+ *
+ * See previous function for more background.
+ */
+static InvalidationInfo *
+PrepareInplaceInvalidationState(void)
+{
+   InvalidationInfo *myInfo;
+
+   Assert(IsTransactionState());
+   /* limit of one inplace update under assembly */
+   Assert(inplaceInvalInfo == NULL);
+
+   /* gone after WAL insertion CritSection ends, so use current context */
+   myInfo = (InvalidationInfo *) palloc0(sizeof(InvalidationInfo));
+
+   /* Stash our messages past end of the transactional messages, if any. */
+   if (transInvalInfo != NULL)
+       SetGroupToFollow(&myInfo->CurrentCmdInvalidMsgs,
+                        &transInvalInfo->ii.CurrentCmdInvalidMsgs);
+   else
+   {
+       InvalMessageArrays[CatCacheMsgs].msgs = NULL;
+       InvalMessageArrays[CatCacheMsgs].maxmsgs = 0;
+       InvalMessageArrays[RelCacheMsgs].msgs = NULL;
+       InvalMessageArrays[RelCacheMsgs].maxmsgs = 0;
+   }
+
+   inplaceInvalInfo = myInfo;
+   return myInfo;
  }
  
  /* ----------------------------------------------------------------
@@ -903,7 +960,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,
      * after we send the SI messages.  However, we need not do anything unless
      * we committed.
      */
-   *RelcacheInitFileInval = transInvalInfo->RelcacheInitFileInval;
+   *RelcacheInitFileInval = transInvalInfo->ii.RelcacheInitFileInval;
  
     /*
      * Collect all the pending messages into a single contiguous array of
@@ -914,7 +971,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,
      * not new ones.
      */
     nummsgs = NumMessagesInGroup(&transInvalInfo->PriorCmdInvalidMsgs) +
-       NumMessagesInGroup(&transInvalInfo->CurrentCmdInvalidMsgs);
+       NumMessagesInGroup(&transInvalInfo->ii.CurrentCmdInvalidMsgs);
  
     *msgs = msgarray = (SharedInvalidationMessage *)
         MemoryContextAlloc(CurTransactionContext,
@@ -927,7 +984,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,
                                         msgs,
                                         n * sizeof(SharedInvalidationMessage)),
                                  nmsgs += n));
-   ProcessMessageSubGroupMulti(&transInvalInfo->CurrentCmdInvalidMsgs,
+   ProcessMessageSubGroupMulti(&transInvalInfo->ii.CurrentCmdInvalidMsgs,
                                 CatCacheMsgs,
                                 (memcpy(msgarray + nmsgs,
                                         msgs,
@@ -939,7 +996,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,
                                         msgs,
                                         n * sizeof(SharedInvalidationMessage)),
                                  nmsgs += n));
-   ProcessMessageSubGroupMulti(&transInvalInfo->CurrentCmdInvalidMsgs,
+   ProcessMessageSubGroupMulti(&transInvalInfo->ii.CurrentCmdInvalidMsgs,
                                 RelCacheMsgs,
                                 (memcpy(msgarray + nmsgs,
                                         msgs,
@@ -1025,7 +1082,9 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
  void
  AtEOXact_Inval(bool isCommit)
  {
-   /* Quick exit if no messages */
+   inplaceInvalInfo = NULL;
+
+   /* Quick exit if no transactional messages */
     if (transInvalInfo == NULL)
         return;
  
@@ -1041,16 +1100,16 @@ AtEOXact_Inval(bool isCommit)
          * after we send the SI messages.  However, we need not do anything
          * unless we committed.
          */
-       if (transInvalInfo->RelcacheInitFileInval)
+       if (transInvalInfo->ii.RelcacheInitFileInval)
             RelationCacheInitFilePreInvalidate();
  
         AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
-                                  &transInvalInfo->CurrentCmdInvalidMsgs);
+                                  &transInvalInfo->ii.CurrentCmdInvalidMsgs);
  
         ProcessInvalidationMessagesMulti(&transInvalInfo->PriorCmdInvalidMsgs,
                                          SendSharedInvalidMessages);
  
-       if (transInvalInfo->RelcacheInitFileInval)
+       if (transInvalInfo->ii.RelcacheInitFileInval)
             RelationCacheInitFilePostInvalidate();
     }
     else
@@ -1063,6 +1122,56 @@ AtEOXact_Inval(bool isCommit)
     transInvalInfo = NULL;
  }
  
+/*
+ * PreInplace_Inval
+ *     Process queued-up invalidation before inplace update critical section.
+ *
+ * Tasks belong here if they are safe even if the inplace update does not
+ * complete.  Currently, this just unlinks a cache file, which can fail.  The
+ * sum of this and AtInplace_Inval() mirrors AtEOXact_Inval(isCommit=true).
+ */
+void
+PreInplace_Inval(void)
+{
+   Assert(CritSectionCount == 0);
+
+   if (inplaceInvalInfo && inplaceInvalInfo->RelcacheInitFileInval)
+       RelationCacheInitFilePreInvalidate();
+}
+
+/*
+ * AtInplace_Inval
+ *     Process queued-up invalidations after inplace update buffer mutation.
+ */
+void
+AtInplace_Inval(void)
+{
+   Assert(CritSectionCount > 0);
+
+   if (inplaceInvalInfo == NULL)
+       return;
+
+   ProcessInvalidationMessagesMulti(&inplaceInvalInfo->CurrentCmdInvalidMsgs,
+                                    SendSharedInvalidMessages);
+
+   if (inplaceInvalInfo->RelcacheInitFileInval)
+       RelationCacheInitFilePostInvalidate();
+
+   inplaceInvalInfo = NULL;
+}
+
+/*
+ * ForgetInplace_Inval
+ *     Alternative to PreInplace_Inval()+AtInplace_Inval(): discard queued-up
+ *     invalidations.  This lets inplace update enumerate invalidations
+ *     optimistically, before locking the buffer.
+ */
+void
+ForgetInplace_Inval(void)
+{
+   inplaceInvalInfo = NULL;
+}
+
  /*
   * AtEOSubXact_Inval
   *     Process queued-up invalidation messages at end of subtransaction.
@@ -1085,9 +1194,20 @@ void
  AtEOSubXact_Inval(bool isCommit)
  {
     int         my_level;
-   TransInvalidationInfo *myInfo = transInvalInfo;
+   TransInvalidationInfo *myInfo;
  
-   /* Quick exit if no messages. */
+   /*
+    * Successful inplace update must clear this, but we clear it on abort.
+    * Inplace updates allocate this in CurrentMemoryContext, which has
+    * lifespan <= subtransaction lifespan.  Hence, don't free it explicitly.
+    */
+   if (isCommit)
+       Assert(inplaceInvalInfo == NULL);
+   else
+       inplaceInvalInfo = NULL;
+
+   /* Quick exit if no transactional messages. */
+   myInfo = transInvalInfo;
     if (myInfo == NULL)
         return;
  
@@ -1128,12 +1248,12 @@ AtEOSubXact_Inval(bool isCommit)
                                    &myInfo->PriorCmdInvalidMsgs);
  
         /* Must readjust parent's CurrentCmdInvalidMsgs indexes now */
-       SetGroupToFollow(&myInfo->parent->CurrentCmdInvalidMsgs,
+       SetGroupToFollow(&myInfo->parent->ii.CurrentCmdInvalidMsgs,
                          &myInfo->parent->PriorCmdInvalidMsgs);
  
         /* Pending relcache inval becomes parent's problem too */
-       if (myInfo->RelcacheInitFileInval)
-           myInfo->parent->RelcacheInitFileInval = true;
+       if (myInfo->ii.RelcacheInitFileInval)
+           myInfo->parent->ii.RelcacheInitFileInval = true;
  
         /* Pop the transaction state stack */
         transInvalInfo = myInfo->parent;
@@ -1180,7 +1300,7 @@ CommandEndInvalidationMessages(void)
     if (transInvalInfo == NULL)
         return;
  
-   ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs,
+   ProcessInvalidationMessages(&transInvalInfo->ii.CurrentCmdInvalidMsgs,
                                 LocalExecuteInvalidationMessage);
  
     /* WAL Log per-command invalidation messages for wal_level=logical */
@@ -1188,26 +1308,21 @@ CommandEndInvalidationMessages(void)
         LogLogicalInvalidations();
  
     AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
-                              &transInvalInfo->CurrentCmdInvalidMsgs);
+                              &transInvalInfo->ii.CurrentCmdInvalidMsgs);
  }
  
  
  /*
- * CacheInvalidateHeapTuple
- *     Register the given tuple for invalidation at end of command
- *     (ie, current command is creating or outdating this tuple).
- *     Also, detect whether a relcache invalidation is implied.
- *
- * For an insert or delete, tuple is the target tuple and newtuple is NULL.
- * For an update, we are called just once, with tuple being the old tuple
- * version and newtuple the new version.  This allows avoidance of duplicate
- * effort during an update.
+ * CacheInvalidateHeapTupleCommon
+ *     Common logic for end-of-command and inplace variants.
   */
-void
-CacheInvalidateHeapTuple(Relation relation,
-                        HeapTuple tuple,
-                        HeapTuple newtuple)
+static void
+CacheInvalidateHeapTupleCommon(Relation relation,
+                              HeapTuple tuple,
+                              HeapTuple newtuple,
+                              InvalidationInfo *(*prepare_callback) (void))
  {
+   InvalidationInfo *info;
     Oid         tupleRelId;
     Oid         databaseId;
     Oid         relationId;
@@ -1231,11 +1346,8 @@ CacheInvalidateHeapTuple(Relation relation,
     if (IsToastRelation(relation))
         return;
  
-   /*
-    * If we're not prepared to queue invalidation messages for this
-    * subtransaction level, get ready now.
-    */
-   PrepareInvalidationState();
+   /* Allocate any required resources. */
+   info = prepare_callback();
  
     /*
      * First let the catcache do its thing
@@ -1244,11 +1356,12 @@ CacheInvalidateHeapTuple(Relation relation,
     if (RelationInvalidatesSnapshotsOnly(tupleRelId))
     {
         databaseId = IsSharedRelation(tupleRelId) ? InvalidOid : MyDatabaseId;
-       RegisterSnapshotInvalidation(databaseId, tupleRelId);
+       RegisterSnapshotInvalidation(info, databaseId, tupleRelId);
     }
     else
         PrepareToInvalidateCacheTuple(relation, tuple, newtuple,
-                                     RegisterCatcacheInvalidation);
+                                     RegisterCatcacheInvalidation,
+                                     (void *) info);
  
     /*
      * Now, is this tuple one of the primary definers of a relcache entry? See
@@ -1321,7 +1434,48 @@ CacheInvalidateHeapTuple(Relation relation,
     /*
      * Yes.  We need to register a relcache invalidation event.
      */
-   RegisterRelcacheInvalidation(databaseId, relationId);
+   RegisterRelcacheInvalidation(info, databaseId, relationId);
+}
+
+/*
+ * CacheInvalidateHeapTuple
+ *     Register the given tuple for invalidation at end of command
+ *     (ie, current command is creating or outdating this tuple) and end of
+ *     transaction.  Also, detect whether a relcache invalidation is implied.
+ *
+ * For an insert or delete, tuple is the target tuple and newtuple is NULL.
+ * For an update, we are called just once, with tuple being the old tuple
+ * version and newtuple the new version.  This allows avoidance of duplicate
+ * effort during an update.
+ */
+void
+CacheInvalidateHeapTuple(Relation relation,
+                        HeapTuple tuple,
+                        HeapTuple newtuple)
+{
+   CacheInvalidateHeapTupleCommon(relation, tuple, newtuple,
+                                  PrepareInvalidationState);
+}
+
+/*
+ * CacheInvalidateHeapTupleInplace
+ *     Register the given tuple for nontransactional invalidation pertaining
+ *     to an inplace update.  Also, detect whether a relcache invalidation is
+ *     implied.
+ *
+ * Like CacheInvalidateHeapTuple(), but for inplace updates.
+ *
+ * Just before and just after the inplace update, the tuple's cache keys must
+ * match those in key_equivalent_tuple.  Cache keys consist of catcache lookup
+ * key columns and columns referencing pg_class.oid values,
+ * e.g. pg_constraint.conrelid, which would trigger relcache inval.
+ */
+void
+CacheInvalidateHeapTupleInplace(Relation relation,
+                               HeapTuple key_equivalent_tuple)
+{
+   CacheInvalidateHeapTupleCommon(relation, key_equivalent_tuple, NULL,
+                                  PrepareInplaceInvalidationState);
  }
  
  /*
@@ -1340,14 +1494,13 @@ CacheInvalidateCatalog(Oid catalogId)
  {
     Oid         databaseId;
  
-   PrepareInvalidationState();
-
     if (IsSharedRelation(catalogId))
         databaseId = InvalidOid;
     else
         databaseId = MyDatabaseId;
  
-   RegisterCatalogInvalidation(databaseId, catalogId);
+   RegisterCatalogInvalidation(PrepareInvalidationState(),
+                               databaseId, catalogId);
  }
  
  /*
@@ -1365,15 +1518,14 @@ CacheInvalidateRelcache(Relation relation)
     Oid         databaseId;
     Oid         relationId;
  
-   PrepareInvalidationState();
-
     relationId = RelationGetRelid(relation);
     if (relation->rd_rel->relisshared)
         databaseId = InvalidOid;
     else
         databaseId = MyDatabaseId;
  
-   RegisterRelcacheInvalidation(databaseId, relationId);
+   RegisterRelcacheInvalidation(PrepareInvalidationState(),
+                                databaseId, relationId);
  }
  
  /*
@@ -1386,9 +1538,8 @@ CacheInvalidateRelcache(Relation relation)
  void
  CacheInvalidateRelcacheAll(void)
  {
-   PrepareInvalidationState();
-
-   RegisterRelcacheInvalidation(InvalidOid, InvalidOid);
+   RegisterRelcacheInvalidation(PrepareInvalidationState(),
+                                InvalidOid, InvalidOid);
  }
  
  /*
@@ -1402,14 +1553,13 @@ CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
     Oid         databaseId;
     Oid         relationId;
  
-   PrepareInvalidationState();
-
     relationId = classtup->oid;
     if (classtup->relisshared)
         databaseId = InvalidOid;
     else
         databaseId = MyDatabaseId;
-   RegisterRelcacheInvalidation(databaseId, relationId);
+   RegisterRelcacheInvalidation(PrepareInvalidationState(),
+                                databaseId, relationId);
  }
  
  /*
@@ -1423,8 +1573,6 @@ CacheInvalidateRelcacheByRelid(Oid relid)
  {
     HeapTuple   tup;
  
-   PrepareInvalidationState();
-
     tup = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
     if (!HeapTupleIsValid(tup))
         elog(ERROR, "cache lookup failed for relation %u", relid);
@@ -1614,7 +1762,7 @@ LogLogicalInvalidations(void)
     if (transInvalInfo == NULL)
         return;
  
-   group = &transInvalInfo->CurrentCmdInvalidMsgs;
+   group = &transInvalInfo->ii.CurrentCmdInvalidMsgs;
     nmsgs = NumMessagesInGroup(group);
  
     if (nmsgs > 0)
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c

index 88c47a999650582f5f38de1443dbd2c9ccd758a2..f7f4f56a4d28ca73a542034b7bf6f3d6d2447f1e 100644 (file)
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -356,8 +356,7 @@ SearchSysCacheLocked1(int cacheId,
  
         /*
          * If an inplace update just finished, ensure we process the syscache
-        * inval.  XXX this is insufficient: the inplace updater may not yet
-        * have reached AtEOXact_Inval().  See test at inplace-inval.spec.
+        * inval.
          *
          * If a heap_update() call just released its LOCKTAG_TUPLE, we'll
          * probably find the old tuple and reach "tuple concurrently updated".
diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h

index 99169a93d91459e06d6f40a39def312adcfecabf..b71d0109dfd586994b727b237dbd56b381f405b5 100644 (file)
--- a/src/include/utils/catcache.h
+++ b/src/include/utils/catcache.h
@@ -226,6 +226,7 @@ extern void CatCacheInvalidate(CatCache *cache, uint32 hashValue);
  extern void PrepareToInvalidateCacheTuple(Relation relation,
                                           HeapTuple tuple,
                                           HeapTuple newtuple,
-                                         void (*function) (int, uint32, Oid));
+                                         void (*function) (int, uint32, Oid, void *),
+                                         void *context);
  
  #endif                         /* CATCACHE_H */
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h

index 24695facf22183a01b25c1f18808761cac07f2ad..121ffa9b26cd86288f1009aefa201944670e6b5e 100644 (file)
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -28,6 +28,10 @@ extern void AcceptInvalidationMessages(void);
  
  extern void AtEOXact_Inval(bool isCommit);
  
+extern void PreInplace_Inval(void);
+extern void AtInplace_Inval(void);
+extern void ForgetInplace_Inval(void);
+
  extern void AtEOSubXact_Inval(bool isCommit);
  
  extern void PostPrepare_Inval(void);
@@ -37,6 +41,8 @@ extern void CommandEndInvalidationMessages(void);
  extern void CacheInvalidateHeapTuple(Relation relation,
                                      HeapTuple tuple,
                                      HeapTuple newtuple);
+extern void CacheInvalidateHeapTupleInplace(Relation relation,
+                                           HeapTuple key_equivalent_tuple);
  
  extern void CacheInvalidateCatalog(Oid catalogId);
  
diff --git a/src/test/isolation/expected/inplace-inval.out b/src/test/isolation/expected/inplace-inval.out

index e68eca5de98ddd92d631fc551d14f0549542ac9a..c35895a8aa7b0ad102188daa86516547cf0d8659 100644 (file)
--- a/src/test/isolation/expected/inplace-inval.out
+++ b/src/test/isolation/expected/inplace-inval.out
@@ -1,6 +1,6 @@
  Parsed test spec with 3 sessions
  
-starting permutation: cachefill3 cir1 cic2 ddl3
+starting permutation: cachefill3 cir1 cic2 ddl3 read1
  step cachefill3: TABLE newly_indexed;
  c
  -
@@ -9,6 +9,14 @@ c
  step cir1: BEGIN; CREATE INDEX i1 ON newly_indexed (c); ROLLBACK;
  step cic2: CREATE INDEX i2 ON newly_indexed (c);
  step ddl3: ALTER TABLE newly_indexed ADD extra int;
+step read1: 
+   SELECT relhasindex FROM pg_class WHERE oid = 'newly_indexed'::regclass;
+
+relhasindex
+-----------
+t          
+(1 row)
+
  
  starting permutation: cir1 cic2 ddl3 read1
  step cir1: BEGIN; CREATE INDEX i1 ON newly_indexed (c); ROLLBACK;
diff --git a/src/test/isolation/specs/inplace-inval.spec b/src/test/isolation/specs/inplace-inval.spec

index 96954fd86c439fa086e07261479b2c9979e12afd..b99112ddb8818e077f28a27027a247a327322ad7 100644 (file)
--- a/src/test/isolation/specs/inplace-inval.spec
+++ b/src/test/isolation/specs/inplace-inval.spec
@@ -1,7 +1,7 @@
-# If a heap_update() caller retrieves its oldtup from a cache, it's possible
-# for that cache entry to predate an inplace update, causing loss of that
-# inplace update.  This arises because the transaction may abort before
-# sending the inplace invalidation message to the shared queue.
+# An inplace update had been able to abort before sending the inplace
+# invalidation message to the shared queue.  If a heap_update() caller then
+# retrieved its oldtup from a cache, the heap_update() could revert the
+# inplace update.
  
  setup
  {
@@ -27,14 +27,12 @@ step cachefill3 { TABLE newly_indexed; }
  step ddl3      { ALTER TABLE newly_indexed ADD extra int; }
  
  
-# XXX shows an extant bug.  Adding step read1 at the end would usually print
-# relhasindex=f (not wanted).  This does not reach the unwanted behavior under
-# -DCATCACHE_FORCE_RELEASE and friends.
  permutation
     cachefill3  # populates the pg_class row in the catcache
     cir1    # sets relhasindex=true; rollback discards cache inval
     cic2    # sees relhasindex=true, skips changing it (so no inval)
     ddl3    # cached row as the oldtup of an update, losing relhasindex
+   read1   # observe damage
  
  # without cachefill3, no bug
  permutation cir1 cic2 ddl3 read1
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list

index 72e6d3a9865ab5c00ab9c5c75a9209efe2892fe9..c0e48a7e20055c9f4ed30cbb7a373e7a51820099 100644 (file)
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1254,6 +1254,7 @@ Interval
  IntervalAggState
  IntoClause
  InvalMessageArray
+InvalidationInfo
  InvalidationMsgsGroup
  IpcMemoryId
  IpcMemoryKey
author	Noah Misch <[email protected]>
	Wed, 17 Dec 2025 00:13:54 +0000 (16:13 -0800)
committer	Noah Misch <[email protected]>
	Wed, 17 Dec 2025 00:13:54 +0000 (16:13 -0800)
src/backend/access/heap/README.tuplock		patch \| blob \| blame \| history
src/backend/access/heap/heapam.c		patch \| blob \| blame \| history
src/backend/access/transam/xact.c		patch \| blob \| blame \| history
src/backend/catalog/index.c		patch \| blob \| blame \| history
src/backend/commands/event_trigger.c		patch \| blob \| blame \| history
src/backend/replication/logical/decode.c		patch \| blob \| blame \| history
src/backend/utils/cache/catcache.c		patch \| blob \| blame \| history
src/backend/utils/cache/inval.c		patch \| blob \| blame \| history
src/backend/utils/cache/syscache.c		patch \| blob \| blame \| history
src/include/utils/catcache.h		patch \| blob \| blame \| history
src/include/utils/inval.h		patch \| blob \| blame \| history
src/test/isolation/expected/inplace-inval.out		patch \| blob \| blame \| history
src/test/isolation/specs/inplace-inval.spec		patch \| blob \| blame \| history
src/tools/pgindent/typedefs.list		patch \| blob \| blame \| history