From 09ae2c8bac8db409a8cd0b8ee438ea7526deb4c3 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Fri, 19 Dec 2025 13:23:13 -0500 Subject: [PATCH] bufmgr: Optimize & harmonize LockBufHdr(), LWLockWaitListLock() The main optimization is for LockBufHdr() to delay initializing SpinDelayStatus, similar to what LWLockWaitListLock already did. The initialization is sufficiently expensive & buffer header lock acquisitions are sufficiently frequent, to make it worthwhile to instead have a fastpath (via a likely() branch) that does not initialize the SpinDelayStatus. While LWLockWaitListLock() already the aforementioned optimization, it did not use likely(), and inspection of the assembly shows that this indeed leads to worse code generation (also observed in a microbenchmark). Fix that by adding the likely(). While the LockBufHdr() improvement is a small gain on its own, it mainly is aimed at preventing a regression after a future commit, which requires additional locking to set hint bits. While touching both, also make the comments more similar to each other. Reviewed-by: Heikki Linnakangas Discussion: https://postgr.es/m/fvfmkr5kk4nyex56ejgxj3uzi63isfxovp2biecb4bspbjrze7@az2pljabhnff --- src/backend/storage/buffer/bufmgr.c | 36 +++++++++++++++++++++-------- src/backend/storage/lmgr/lwlock.c | 8 +++++-- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index a768fb129ae..eb55102b0d7 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -6358,23 +6358,41 @@ rlocator_comparator(const void *p1, const void *p2) uint32 LockBufHdr(BufferDesc *desc) { - SpinDelayStatus delayStatus; uint32 old_buf_state; Assert(!BufferIsLocal(BufferDescriptorGetBuffer(desc))); - init_local_spin_delay(&delayStatus); - while (true) { - /* set BM_LOCKED flag */ + /* + * Always try once to acquire the lock directly, without setting up + * the spin-delay infrastructure. The work necessary for that shows up + * in profiles and is rarely necessary. + */ old_buf_state = pg_atomic_fetch_or_u32(&desc->state, BM_LOCKED); - /* if it wasn't set before we're OK */ - if (!(old_buf_state & BM_LOCKED)) - break; - perform_spin_delay(&delayStatus); + if (likely(!(old_buf_state & BM_LOCKED))) + break; /* got lock */ + + /* and then spin without atomic operations until lock is released */ + { + SpinDelayStatus delayStatus; + + init_local_spin_delay(&delayStatus); + + while (old_buf_state & BM_LOCKED) + { + perform_spin_delay(&delayStatus); + old_buf_state = pg_atomic_read_u32(&desc->state); + } + finish_spin_delay(&delayStatus); + } + + /* + * Retry. The lock might obviously already be re-acquired by the time + * we're attempting to get it again. + */ } - finish_spin_delay(&delayStatus); + return old_buf_state | BM_LOCKED; } diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 255cfa8fa95..b839ace57cb 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -870,9 +870,13 @@ LWLockWaitListLock(LWLock *lock) while (true) { - /* always try once to acquire lock directly */ + /* + * Always try once to acquire the lock directly, without setting up + * the spin-delay infrastructure. The work necessary for that shows up + * in profiles and is rarely necessary. + */ old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED); - if (!(old_state & LW_FLAG_LOCKED)) + if (likely(!(old_state & LW_FLAG_LOCKED))) break; /* got lock */ /* and then spin without atomic operations until lock is released */ -- 2.39.5