Make improvements to sequence handling
authorPavan Deolasee <[email protected]>
Thu, 16 Aug 2018 12:01:53 +0000 (17:31 +0530)
committerPavan Deolasee <[email protected]>
Fri, 17 Aug 2018 09:26:41 +0000 (14:56 +0530)
We were pre-maturely checking for maximum and minimum values, thus throwing an
error when the maximum/minimum was still farther by one count. Fix that.
Improve the GTM error message by including the sequence name and the max/min
value reached. This though slightly changes the end user error message because
we also include database and schema in the sequence name at the GTM.

Make some changes to the way sequence is WAL logged. This is still not entirely
correct since we log every time a value is fetched from the GTM. But then WAL
logging is not strictly required in XL because sequence values are managed at
the GTM. In the old code, we were not WAL logging at all (though the code
existed and it was a bit confusing)

The sequence tuple is still not maintained correctly at the coordinator because
it may not know about the sequence values fetched and consumed by the
datanodes. But that's an existing problem and we should look at that
separately.

src/backend/commands/sequence.c
src/gtm/main/gtm_seq.c
src/test/regress/expected/sequence.out
src/test/regress/sql/sequence.sql

index d9aae36ce2b5887ac50f6d0ff2294f6f289a5e69..f429be6d22313ec46ab1a80a4e27b5ad0f234efe 100644 (file)
@@ -709,14 +709,11 @@ nextval_internal(Oid relid, bool check_permissions)
                                maxv,
                                minv,
                                cache,
-                               log,
-                               fetch,
-                               last;
-       int64           result,
-                               next,
-                               rescnt = 0;
+                               result;
        bool            cycle;
-       bool            logit = false;
+       int64           range;
+       int64           rangemax;
+       char       *seqname;
 
        /* open and lock sequence */
        init_sequence(relid, &elm, &seqrel);
@@ -765,185 +762,93 @@ nextval_internal(Oid relid, bool check_permissions)
        seq = read_seq_tuple(seqrel, &buf, &seqdatatuple);
        page = BufferGetPage(buf);
 
-       {
-               int64 range = cache; /* how many values to ask from GTM? */
-               int64 rangemax; /* the max value returned from the GTM for our request */
-               char *seqname = GetGlobalSeqName(seqrel, NULL, NULL);
-
-               /*
-                * Above, we still use the page as a locking mechanism to handle
-                * concurrency
-                *
-                * If the user has set a CACHE parameter, we use that. Else we pass in
-                * the SequenceRangeVal value
-                */
-               if (range == DEFAULT_CACHEVAL && SequenceRangeVal > range)
-               {
-                       TimestampTz curtime = GetCurrentTimestamp();
-
-                       if (!TimestampDifferenceExceeds(elm->last_call_time,
-                                                                                                       curtime, 1000))
-                       {
-                               /*
-                                * The previous GetNextValGTM call was made just a while back.
-                                * Request double the range of what was requested in the
-                                * earlier call. Honor the SequenceRangeVal boundary
-                                * value to limit very large range requests!
-                                */
-                               elm->range_multiplier *= 2;
-                               if (elm->range_multiplier < SequenceRangeVal)
-                                       range = elm->range_multiplier;
-                               else
-                                       elm->range_multiplier = range = SequenceRangeVal;
-
-                               elog(DEBUG1, "increase sequence range %ld", range);
-                       }
-                       else if (TimestampDifferenceExceeds(elm->last_call_time,
-                                                                                               curtime, 5000))
-                       {
-                               /* The previous GetNextValGTM call was pretty old */
-                               range = elm->range_multiplier = DEFAULT_CACHEVAL;
-                               elog(DEBUG1, "reset sequence range %ld", range);
-                       }
-                       else if (TimestampDifferenceExceeds(elm->last_call_time,
-                                                                                               curtime, 3000))
-                       {
-                               /*
-                                * The previous GetNextValGTM call was made quite some time
-                                * ago. Try to reduce the range request to reduce the gap
-                                */
-                               if (elm->range_multiplier != DEFAULT_CACHEVAL)
-                               {
-                                       range = elm->range_multiplier =
-                                                               rint(elm->range_multiplier/2);
-                                       elog(DEBUG1, "decrease sequence range %ld", range);
-                               }
-                       }
-                       else
-                       {
-                               /*
-                                * Current range_multiplier alllows to cache sequence values
-                                * for 1-3 seconds of work. Keep that rate.
-                                */
-                               range = elm->range_multiplier;
-                       }
-                       elm->last_call_time = curtime;
-               }
-
-               result = (int64) GetNextValGTM(seqname, range, &rangemax);
-               pfree(seqname);
-
-               /* Update the on-disk data */
-               seq->last_value = result; /* last fetched number */
-               seq->is_called = true;
-
-               /* save info in local cache */
-               elm->last = result;                     /* last returned number */
-               elm->cached = rangemax;         /* last fetched range max limit */
-               elm->last_valid = true;
-
-               last_used_seq = elm;
-       }
-
-       elm->increment = incby;
-       last = next = result = seq->last_value;
-       fetch = cache;
-       log = seq->log_cnt;
-
-       if (!seq->is_called)
-       {
-               rescnt++;                               /* return last_value if not is_called */
-               fetch--;
-       }
+       range = cache; /* how many values to ask from GTM? */
+       seqname = GetGlobalSeqName(seqrel, NULL, NULL);
 
        /*
-        * Decide whether we should emit a WAL log record.  If so, force up the
-        * fetch count to grab SEQ_LOG_VALS more values than we actually need to
-        * cache.  (These will then be usable without logging.)
+        * Above, we still use the page as a locking mechanism to handle
+        * concurrency
         *
-        * If this is the first nextval after a checkpoint, we must force a new
-        * WAL record to be written anyway, else replay starting from the
-        * checkpoint would fail to advance the sequence past the logged values.
-        * In this case we may as well fetch extra values.
+        * If the user has set a CACHE parameter, we use that. Else we pass in
+        * the SequenceRangeVal value
         */
-       if (log < fetch || !seq->is_called)
+       if (range == DEFAULT_CACHEVAL && SequenceRangeVal > range)
        {
-               /* forced log to satisfy local demand for values */
-               fetch = log = fetch + SEQ_LOG_VALS;
-       }
-       else
-       {
-               XLogRecPtr      redoptr = GetRedoRecPtr();
+               TimestampTz curtime = GetCurrentTimestamp();
 
-               if (PageGetLSN(page) <= redoptr)
+               if (!TimestampDifferenceExceeds(elm->last_call_time,
+                                       curtime, 1000))
                {
-                       /* last update of seq was before checkpoint */
-                       fetch = log = fetch + SEQ_LOG_VALS;
-               }
-       }
+                       /*
+                        * The previous GetNextValGTM call was made just a while back.
+                        * Request double the range of what was requested in the
+                        * earlier call. Honor the SequenceRangeVal boundary
+                        * value to limit very large range requests!
+                        */
+                       elm->range_multiplier *= 2;
+                       if (elm->range_multiplier < SequenceRangeVal)
+                               range = elm->range_multiplier;
+                       else
+                               elm->range_multiplier = range = SequenceRangeVal;
 
-       while (fetch)                           /* try to fetch cache [+ log ] numbers */
-       {
-               /*
-                * Check MAXVALUE for ascending sequences and MINVALUE for descending
-                * sequences
-                */
-               if (incby > 0)
+                       elog(DEBUG1, "increase sequence range %ld", range);
+               }
+               else if (TimestampDifferenceExceeds(elm->last_call_time,
+                                       curtime, 5000))
                {
-                       /* ascending sequence */
-                       if ((maxv >= 0 && next > maxv - incby) ||
-                               (maxv < 0 && next + incby > maxv))
-                       {
-                               if (rescnt > 0)
-                                       break;          /* stop fetching */
-                               if (!cycle)
-                               {
-                                       char            buf[100];
-
-                                       snprintf(buf, sizeof(buf), INT64_FORMAT, maxv);
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED),
-                                                        errmsg("nextval: reached maximum value of sequence \"%s\" (%s)",
-                                                                       RelationGetRelationName(seqrel), buf)));
-                               }
-                               next = minv;
-                       }
-                       else
-                               next += incby;
+                       /* The previous GetNextValGTM call was pretty old */
+                       range = elm->range_multiplier = DEFAULT_CACHEVAL;
+                       elog(DEBUG1, "reset sequence range %ld", range);
                }
-               else
+               else if (TimestampDifferenceExceeds(elm->last_call_time,
+                                       curtime, 3000))
                {
-                       /* descending sequence */
-                       if ((minv < 0 && next < minv - incby) ||
-                               (minv >= 0 && next + incby < minv))
+                       /*
+                        * The previous GetNextValGTM call was made quite some time
+                        * ago. Try to reduce the range request to reduce the gap
+                        */
+                       if (elm->range_multiplier != DEFAULT_CACHEVAL)
                        {
-                               if (rescnt > 0)
-                                       break;          /* stop fetching */
-                               if (!cycle)
-                               {
-                                       char            buf[100];
-
-                                       snprintf(buf, sizeof(buf), INT64_FORMAT, minv);
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED),
-                                                        errmsg("nextval: reached minimum value of sequence \"%s\" (%s)",
-                                                                       RelationGetRelationName(seqrel), buf)));
-                               }
-                               next = maxv;
+                               range = elm->range_multiplier =
+                                       rint(elm->range_multiplier/2);
+                               elog(DEBUG1, "decrease sequence range %ld", range);
                        }
-                       else
-                               next += incby;
                }
-               fetch--;
-               if (rescnt < cache)
+               else
                {
-                       log--;
-                       rescnt++;
+                       /*
+                        * Current range_multiplier alllows to cache sequence values
+                        * for 1-3 seconds of work. Keep that rate.
+                        */
+                       range = elm->range_multiplier;
                }
+               elm->last_call_time = curtime;
        }
 
-       log -= fetch;                           /* adjust for any unfetched numbers */
-       Assert(log >= 0);
+       result = (int64) GetNextValGTM(seqname, range, &rangemax);
+       pfree(seqname);
+
+       /* Update the on-disk data */
+       seq->last_value = result; /* last fetched number */
+       seq->is_called = true;
+
+       /* save info in local cache */
+       elm->last = result;                     /* last returned number */
+       elm->cached = rangemax;         /* last fetched range max limit */
+       elm->last_valid = true;
+       elm->increment = incby;
+
+       last_used_seq = elm;
+
+       /*
+        * In Postgres-XL, we always WAL log the sequence fetch if we had to go to
+        * the GTM to fetch new set of values. This may seem a lot of overhead, but
+        * since we cache a lot more values in XL, in practice, the amount of WAL
+        * logging should be considerably less. Also, since sequence values are
+        * managed at the GTM, WAL logging is not strictly required for
+        * correctness, but it helps us to restore sequence state to a somewhat
+        * credible value in case of a crash and recovery.
+        */
 
        /*
         * If something needs to be WAL logged, acquire an xid, so this
@@ -952,7 +857,7 @@ nextval_internal(Oid relid, bool check_permissions)
         * to assign xids subxacts, that'll already trigger an appropriate wait.
         * (Have to do that here, so we're outside the critical section)
         */
-       if (logit && RelationNeedsWAL(seqrel))
+       if (RelationNeedsWAL(seqrel))
                GetTopTransactionId();
 
        /* ready to change the on-disk (or really, in-buffer) tuple */
@@ -970,22 +875,21 @@ nextval_internal(Oid relid, bool check_permissions)
        MarkBufferDirty(buf);
 
        /* XLOG stuff */
-       if (logit && RelationNeedsWAL(seqrel))
+       if (RelationNeedsWAL(seqrel))
        {
                xl_seq_rec      xlrec;
                XLogRecPtr      recptr;
 
                /*
                 * We don't log the current state of the tuple, but rather the state
-                * as it would appear after "log" more fetches.  This lets us skip
-                * that many future WAL records, at the cost that we lose those
-                * sequence values if we crash.
+                * as it would appear after all cached values are used. This is
+                * different than what we do in vanilla PostgreSQL.
                 */
                XLogBeginInsert();
                XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
 
                /* set values that will be saved in xlog */
-               seq->last_value = next;
+               seq->last_value = rangemax;
                seq->is_called = true;
                seq->log_cnt = 0;
 
@@ -1000,9 +904,9 @@ nextval_internal(Oid relid, bool check_permissions)
        }
 
        /* Now update sequence tuple to the intended final state */
-       seq->last_value = last;         /* last fetched number */
+       seq->last_value = rangemax;             /* last fetched number */
        seq->is_called = true;
-       seq->log_cnt = log;                     /* how much is logged */
+       seq->log_cnt = (rangemax - result) / incby;
 
        END_CRIT_SECTION();
 
index 2265c037ace89bbaab91598f3d1355cb93399f0a..edabcb5a98abf0ed821650e880ab09e00e174669 100644 (file)
@@ -943,12 +943,9 @@ GTM_SeqGetNext(GTM_SequenceKey seqkey, char *coord_name,
        GTM_SeqInfo *seqinfo = seq_find_seqinfo(seqkey);
 
        if (seqinfo == NULL)
-       {
-               ereport(LOG,
+               ereport(ERROR,
                                (EINVAL,
                                 errmsg("The sequence with the given key does not exist")));
-               return EINVAL;
-       }
 
        GTM_RWLockAcquire(&seqinfo->gs_lock, GTM_LOCKMODE_WRITE);
 
@@ -980,12 +977,16 @@ GTM_SeqGetNext(GTM_SequenceKey seqkey, char *coord_name,
                                *result = seqinfo->gs_value = seqinfo->gs_min_value;
                        else
                        {
+                               char        buf[100];
+
                                GTM_RWLockRelease(&seqinfo->gs_lock);
                                seq_release_seqinfo(seqinfo);
-                               ereport(LOG,
+
+                               snprintf(buf, sizeof(buf), INT64_FORMAT, seqinfo->gs_max_value);
+                               ereport(ERROR,
                                                (ERANGE,
-                                                errmsg("Sequence reached maximum value")));
-                               return ERANGE;
+                                                errmsg("nextval: reached maximum value of sequence \"%s\" (%s)",
+                                                        seqinfo->gs_key->gsk_key, buf)));
                        }
                }
                else
@@ -1008,12 +1009,15 @@ GTM_SeqGetNext(GTM_SequenceKey seqkey, char *coord_name,
                                *result = seqinfo->gs_value = seqinfo->gs_max_value;
                        else
                        {
+                               char        buf[100];
                                GTM_RWLockRelease(&seqinfo->gs_lock);
                                seq_release_seqinfo(seqinfo);
-                               ereport(LOG,
+
+                               snprintf(buf, sizeof(buf), INT64_FORMAT, seqinfo->gs_min_value);
+                               ereport(ERROR,
                                                (ERANGE,
-                                                errmsg("Sequence reached maximum value")));
-                               return ERANGE;
+                                                errmsg("nextval: reached minimum value of sequence \"%s\" (%s)",
+                                                        seqinfo->gs_key->gsk_key, buf)));
                        }
                }
        }
index 911d1661a4c85324d8a363cb4154af1caa13d0a2..dd065a3f3561cb230ab2a8ea4c8d611609d6d3ae 100644 (file)
@@ -401,7 +401,7 @@ SELECT nextval('sequence_test2');
 (1 row)
 
 SELECT nextval('sequence_test2');  -- error
-ERROR:  nextval: reached maximum value of sequence "sequence_test2" (36)
+ERROR:  nextval: reached maximum value of sequence "regression.public.sequence_test2" (36)
 ALTER SEQUENCE sequence_test2 RESTART WITH -24 START WITH -24
   INCREMENT BY -4 MINVALUE -36 MAXVALUE -5 CYCLE;
 SELECT nextval('sequence_test2');
@@ -461,7 +461,7 @@ SELECT nextval('sequence_test2');
 (1 row)
 
 SELECT nextval('sequence_test2');  -- error
-ERROR:  nextval: reached minimum value of sequence "sequence_test2" (-36)
+ERROR:  nextval: reached minimum value of sequence "regression.public.sequence_test2" (-36)
 -- reset
 ALTER SEQUENCE IF EXISTS sequence_test2 RESTART WITH 32 START WITH 32
   INCREMENT BY 4 MAXVALUE 36 MINVALUE 5 CYCLE;
@@ -764,10 +764,10 @@ BEGIN;
 SET LOCAL SESSION AUTHORIZATION regress_seq_user;
 CREATE SEQUENCE seq3;
 REVOKE ALL ON seq3 FROM regress_seq_user;
-SAVEPOINT save;
-SELECT setval('seq3', 5);
-ERROR:  permission denied for sequence seq3
-ROLLBACK TO save;
+-- XL does not support SAVEPOINT
+-- SAVEPOINT save;
+-- SELECT setval('seq3', 5);
+-- ROLLBACK TO save;
 GRANT UPDATE ON seq3 TO regress_seq_user;
 SELECT setval('seq3', 5);
  setval 
index 04e9eaefe356259c1552f23289525f3b25e0aad0..98b5c26bd107075bd492f9bb19460cf842560693 100644 (file)
@@ -374,9 +374,10 @@ BEGIN;
 SET LOCAL SESSION AUTHORIZATION regress_seq_user;
 CREATE SEQUENCE seq3;
 REVOKE ALL ON seq3 FROM regress_seq_user;
-SAVEPOINT save;
-SELECT setval('seq3', 5);
-ROLLBACK TO save;
+-- XL does not support SAVEPOINT
+-- SAVEPOINT save;
+-- SELECT setval('seq3', 5);
+-- ROLLBACK TO save;
 GRANT UPDATE ON seq3 TO regress_seq_user;
 SELECT setval('seq3', 5);
 SELECT nextval('seq3');