values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr)); /* shared with xvac */
- values[7] = PointerGetDatum(&tuphdr->t_ctid);
+ if (!HeapTupleHeaderIsSpeculative(tuphdr))
+ values[7] = PointerGetDatum(&tuphdr->t_tidstate.t_ctid);
+ else
+ values[7] = PointerGetDatum(&tuphdr->t_tidstate.t_token);
values[8] = UInt32GetDatum(tuphdr->t_infomask2);
values[9] = UInt32GetDatum(tuphdr->t_infomask);
values[10] = UInt8GetDatum(tuphdr->t_hoff);
JumbleRangeTable(jstate, query->rtable);
JumbleExpr(jstate, (Node *) query->jointree);
JumbleExpr(jstate, (Node *) query->targetList);
+ APP_JUMB(query->specClause);
+ JumbleExpr(jstate, (Node *) query->arbiterElems);
+ JumbleExpr(jstate, query->arbiterWhere);
JumbleExpr(jstate, (Node *) query->returningList);
JumbleExpr(jstate, (Node *) query->groupClause);
JumbleExpr(jstate, query->havingQual);
APP_JUMB(ce->cursor_param);
}
break;
+ case T_InferenceElem:
+ {
+ InferenceElem *ie = (InferenceElem *) node;
+
+ APP_JUMB(ie->infercollid);
+ APP_JUMB(ie->inferopfamily);
+ APP_JUMB(ie->inferopcinputtype);
+ JumbleExpr(jstate, ie->expr);
+ }
+ break;
case T_TargetEntry:
{
TargetEntry *tle = (TargetEntry *) node;
void
deparseInsertSql(StringInfo buf, PlannerInfo *root,
Index rtindex, Relation rel,
- List *targetAttrs, List *returningList,
- List **retrieved_attrs)
+ List *targetAttrs, bool ignore,
+ List *returningList, List **retrieved_attrs)
{
AttrNumber pindex;
bool first;
else
appendStringInfoString(buf, " DEFAULT VALUES");
+ if (ignore)
+ appendStringInfoString(buf, " ON CONFLICT IGNORE");
+
deparseReturningList(buf, root, rtindex, rel,
rel->trigdesc && rel->trigdesc->trig_insert_after_row,
returningList, retrieved_attrs);
ERROR: duplicate key value violates unique constraint "t1_pkey"
DETAIL: Key ("C 1")=(11) already exists.
CONTEXT: Remote SQL command: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
+INSERT INTO ft1(c1, c2) VALUES(11, 12) ON CONFLICT IGNORE; -- works
+INSERT INTO ft1(c1, c2) VALUES(11, 12) ON CONFLICT (c1, c2) IGNORE; -- unsupported
+ERROR: relation "ft1" is not an ordinary table
+HINT: Only ordinary tables are accepted as targets when a unique index is inferred for ON CONFLICT.
INSERT INTO ft1(c1, c2) VALUES(1111, -2); -- c2positive
ERROR: new row for relation "T 1" violates check constraint "c2positive"
DETAIL: Failing row contains (1111, -2, null, null, null, null, ft1 , null).
List *targetAttrs = NIL;
List *returningList = NIL;
List *retrieved_attrs = NIL;
+ bool ignore = false;
initStringInfo(&sql);
if (plan->returningLists)
returningList = (List *) list_nth(plan->returningLists, subplan_index);
+ if (root->parse->arbiterElems)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("postgres_fdw does not support ON CONFLICT unique index inference")));
+ else if (plan->spec == SPEC_IGNORE)
+ ignore = true;
+ else if (plan->spec != SPEC_NONE)
+ elog(ERROR, "unexpected speculative specification: %d", (int) plan->spec);
+
/*
* Construct the SQL command string.
*/
{
case CMD_INSERT:
deparseInsertSql(&sql, root, resultRelation, rel,
- targetAttrs, returningList,
+ targetAttrs, ignore, returningList,
&retrieved_attrs);
break;
case CMD_UPDATE:
List **params);
extern void deparseInsertSql(StringInfo buf, PlannerInfo *root,
Index rtindex, Relation rel,
- List *targetAttrs, List *returningList,
+ List *targetAttrs, bool ignore, List *returningList,
List **retrieved_attrs);
extern void deparseUpdateSql(StringInfo buf, PlannerInfo *root,
Index rtindex, Relation rel,
ALTER TABLE "S 1"."T 1" ADD CONSTRAINT c2positive CHECK (c2 >= 0);
INSERT INTO ft1(c1, c2) VALUES(11, 12); -- duplicate key
+INSERT INTO ft1(c1, c2) VALUES(11, 12) ON CONFLICT IGNORE; -- works
+INSERT INTO ft1(c1, c2) VALUES(11, 12) ON CONFLICT (c1, c2) IGNORE; -- unsupported
INSERT INTO ft1(c1, c2) VALUES(1111, -2); -- c2positive
UPDATE ft1 SET c2 = -c2 WHERE c1 = 1; -- c2positive
All check constraints and not-null constraints on a parent table are
automatically inherited by its children. Other types of constraints
(unique, primary key, and foreign key constraints) are not inherited.
+ Therefore, <command>INSERT</command> with <literal>ON CONFLICT</>
+ unique index inference considers only unique constraints/indexes
+ directly associated with the table inserted into (which can be an
+ inheritance parent or child).
</para>
<para>
source provides.
</para>
+ <para>
+ <command>INSERT</> with an <literal>ON CONFLICT</> clause is not
+ supported with a unique index inference specification, since a
+ conflict arbitrating unique index cannot meaningfully be inferred
+ on a foreign table.
+ </para>
+
</sect1>
</chapter>
<entry></entry>
<entry></entry>
</row>
+ <row>
+ <entry><token>CONFLICT</token></entry>
+ <entry>non-reserved</entry>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ </row>
<row>
<entry><token>CONNECT</token></entry>
<entry></entry>
in your user mapping must have privileges to do these things.)
</para>
+ <para>
+ <filename>postgres_fdw</> supports <command>INSERT</command>
+ statements with an <literal>ON CONFLICT IGNORE</> clause, provided a
+ unique index inference specification is omitted.
+ </para>
+
<para>
It is generally recommended that the columns of a foreign table be declared
with exactly the same data types, and collations if applicable, as the
<para>
The event is one of <literal>SELECT</literal>,
<literal>INSERT</literal>, <literal>UPDATE</literal>, or
- <literal>DELETE</literal>.
+ <literal>DELETE</literal>. Note that an
+ <command>INSERT</command> containing an <literal>ON CONFLICT
+ IGNORE</literal> clause cannot be used on tables that have
+ either <literal>INSERT</literal> or <literal>UPDATE</literal>
+ rules. Consider using an updatable view instead, which are
+ supported with <literal>ON CONFLICT IGNORE</literal>.
</para>
</listitem>
</varlistentry>
<literal>EXCLUDE</>, and
<literal>REFERENCES</> (foreign key) constraints accept this
clause. <literal>NOT NULL</> and <literal>CHECK</> constraints are not
- deferrable.
+ deferrable. Note that constraints that were created with this
+ clause cannot be used as arbiters of whether or not to take the
+ alternative path with an <command>INSERT</command> statement
+ that includes an <literal>ON CONFLICT</> clause.
</para>
</listitem>
</varlistentry>
<para>
Simple views are automatically updatable: the system will allow
<command>INSERT</>, <command>UPDATE</> and <command>DELETE</> statements
- to be used on the view in the same way as on a regular table. A view is
- automatically updatable if it satisfies all of the following conditions:
+ to be used on the view in the same way as on a regular table (aside from
+ the limitations on ON CONFLICT noted below). A view is automatically
+ updatable if it satisfies all of the following conditions:
<itemizedlist>
<listitem>
not need any permissions on the underlying base relations (see
<xref linkend="rules-privileges">).
</para>
+ <para>
+ <command>INSERT</command> with an <literal>ON CONFLICT IGNORE</>
+ clause is supported on updatable views (if an inference
+ specification is provided, it must infer a unique index on the
+ underlying base relation).
+ </para>
</refsect2>
</refsect1>
[ WITH [ RECURSIVE ] <replaceable class="parameter">with_query</replaceable> [, ...] ]
INSERT INTO <replaceable class="PARAMETER">table_name</replaceable> [ ( <replaceable class="PARAMETER">column_name</replaceable> [, ...] ) ]
{ DEFAULT VALUES | VALUES ( { <replaceable class="PARAMETER">expression</replaceable> | DEFAULT } [, ...] ) [, ...] | <replaceable class="PARAMETER">query</replaceable> }
+ [ ON CONFLICT [ ( { <replaceable class="parameter">column_name_index</replaceable> | ( <replaceable class="parameter">expression_index</replaceable> ) } [ COLLATE <replaceable class="parameter">collation</replaceable> ] [ <replaceable class="parameter">opclass</replaceable> ] [, ...] [ WHERE <replaceable class="PARAMETER">index_predicate</replaceable> ] ) ] IGNORE]
[ RETURNING * | <replaceable class="parameter">output_expression</replaceable> [ [ AS ] <replaceable class="parameter">output_name</replaceable> ] [, ...] ]
</synopsis>
</refsynopsisdiv>
automatic type conversion will be attempted.
</para>
+ <para>
+ The optional <literal>ON CONFLICT</> clause specifies a path to
+ take as an alternative to raising a conflict related error. The
+ alternative path is considered individually for each row proposed
+ for insertion; it is taken (or not taken) once per row.
+ <literal>ON CONFLICT IGNORE</> simply avoids inserting any
+ individual row when it is determined that a conflict related error
+ would otherwise need to be raised.
+ </para>
+
+ <para>
+ <literal>ON CONFLICT IGNORE</> optionally accepts a
+ <emphasis>unique index inference</emphasis> specification, which
+ consists of one or more <replaceable
+ class="PARAMETER">column_name_index</replaceable> columns and/or
+ <replaceable class="PARAMETER">expression_index</replaceable>
+ expressions on columns, appearing between parenthesis. These are
+ used to infer a unique index to limit pre-checking for conflicts to
+ (if no appropriate index is available, an error is raised). A
+ subset of the table to limit the check for conflicts to can
+ optionally also be specified using <replaceable
+ class="PARAMETER">index_predicate</replaceable> (this allows the
+ implementation to use an expression index only covering at least
+ the subset). Note that omitting a unique index inference
+ specification indicates a total indifference to where any conflict
+ could occur, which isn't always appropriate. At times, it may be
+ desirable for <literal>ON CONFLICT IGNORE</> to
+ <emphasis>not</emphasis> suppress a conflict related error
+ associated with an index where that isn't explicitly anticipated.
+ </para>
+
+ <para>
+ Columns and/or expressions appearing in a unique index inference
+ specification must match all the columns/expressions of some
+ existing unique index on <replaceable
+ class="PARAMETER">table_name</replaceable> - there can be no
+ columns/expressions from the unique index that do not appear in the
+ inference specification, nor can there be any columns/expressions
+ appearing in the inference specification that do not appear in the
+ unique index definition. However, the order of the
+ columns/expressions in the index definition, or whether or not the
+ index definition specified <literal>NULLS FIRST</> or
+ <literal>NULLS LAST</>, or the internal sort order of each column
+ (whether <literal>DESC</> or <literal>ASC</> were specified) are
+ all irrelevant. Deferred unique constraints are not supported as
+ arbiters of whether an alternative <literal>ON CONFLICT</> path
+ should be taken.
+ </para>
+
+ <para>
+ The definition of a conflict for the purposes of <literal>ON
+ CONFLICT</> is somewhat subtle, although the exact definition is
+ seldom of great interest. A conflict is a condition that
+ ordinarily necessitates raising either a unique violation from a
+ unique constraint (or unique index), or an exclusion violation from
+ an exclusion constraint, occurring in an index/constraint that
+ arbitrates the <literal>ON CONFLICT</> path. Only unique indexes
+ (or unique constraints) can be inferred with a unique index
+ inference specification. In contrast to the rules around certain
+ other SQL clauses, like the <literal>DISTINCT</literal> clause, the
+ definition of a duplicate (a conflict) is based on whatever unique
+ indexes happen to be defined on columns on the table. In
+ particular, the default operator class for the type of each indexed
+ column is not considered. The inference clause can require a
+ particular named operator class be used per column/expression
+ indexed if that's a concern. Similarly, the inference
+ specification can limit its consideration of arbiter unique indexes
+ on the basis of collations on column/expression covered by
+ available indexes.
+ </para>
+
+ <para>
+ The optional <replaceable
+ class="PARAMETER">index_predicate</replaceable> can be used to
+ allow the inference specification to infer that a partial unique
+ index can be used. Any unique index that otherwise satisfies the
+ inference specification, while also covering at least all the rows
+ in the table covered by <replaceable
+ class="PARAMETER">index_predicate</replaceable> may be used. It is
+ recommended that the partial index predicate of the unique index
+ intended to be used as the arbiter of taking the alternative path
+ be matched exactly, but this is not required. Note that an error
+ will be raised if an arbiter unique index is chosen that does not
+ cover the tuple or tuples proposed for insertion. However, an
+ overly specific <replaceable
+ class="PARAMETER">index_predicate</replaceable> does not imply that
+ arbitrating conflicts will be limited to the subset of rows covered
+ by the inferred unique index corresponding to <replaceable
+ class="PARAMETER">index_predicate</replaceable>.
+ </para>
+
+ <para>
+ Multiple unique indexes/constraints may be inferred where multiple
+ indexes exist that satisfy the inference specification, although
+ typically this does not occur (this behavior only exists to
+ smoothly cover certain corner cases). Note that the ordering of
+ multiple <replaceable
+ class="PARAMETER">column_name_index</replaceable> columns and/or
+ <replaceable class="PARAMETER">expression_index</replaceable>
+ within the inference specification is not significant.
+ </para>
+
<para>
The optional <literal>RETURNING</> clause causes <command>INSERT</>
to compute and return value(s) based on each row actually inserted.
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><replaceable class="PARAMETER">column_name_index</replaceable></term>
+ <listitem>
+ <para>
+ The name of a <replaceable
+ class="PARAMETER">table_name</replaceable> column. Part of a
+ unique inference specification. Follows <command>CREATE
+ INDEX</command> format.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable class="PARAMETER">expression_index</replaceable></term>
+ <listitem>
+ <para>
+ Similar to <replaceable
+ class="PARAMETER">column_name_index</replaceable>, but used to
+ infer expressions on <replaceable
+ class="PARAMETER">table_name</replaceable> columns appearing
+ within index definitions (not simple columns). Part of unique
+ index inference clause. Follows <command>CREATE INDEX</command>
+ format.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable class="PARAMETER">collation</replaceable></term>
+ <listitem>
+ <para>
+ When specified, mandates that corresponding <replaceable
+ class="PARAMETER">column_name_index</replaceable> or
+ <replaceable class="PARAMETER">expression_index</replaceable>
+ use particular collation in order to be inferred as arbitrating
+ the <literal>ON CONFLICT</> path taken. Typically this is
+ omitted even when it is intended that <literal>ON CONFLICT</>
+ infer a particular unique index or unique constraint with a
+ non-default collation, since the use of a non-default collation
+ does not usually change the semantics of arbitration (because
+ the <emphasis>equality</emphasis> semantics are often equivalent
+ anyway). Follows <command>CREATE INDEX</command> format.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable class="PARAMETER">opclass</replaceable></term>
+ <listitem>
+ <para>
+ When specified, mandates that corresponding <replaceable
+ class="PARAMETER">column_name_index</replaceable> or
+ <replaceable class="PARAMETER">expression_index</replaceable>
+ use particular operator class in order to be inferred as
+ arbitrating the <literal>ON CONFLICT</> path taken. Sometimes
+ this is omitted even when it is intended that <literal>ON
+ CONFLICT</> infer a particular unique index or unique constraint
+ with a non-default operator class (because the
+ <emphasis>equality</emphasis> semantics are often equivalent
+ across a type's operator classes anyway, or because it's
+ sufficient to trust that the defined unique index has the
+ pertinent definition of equality). Follows <command>CREATE
+ INDEX</command> format.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable class="PARAMETER">index_predicate</replaceable></term>
+ <listitem>
+ <para>
+ Used to allow inference of partial unique indexes. Any indexes
+ that satisfy the predicate (which need not actually be partial
+ indexes) will be used in conflict arbitration. Follows
+ <command>CREATE INDEX</command> format.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><literal>DEFAULT VALUES</literal></term>
<listitem>
<listitem>
<para>
An expression to be computed and returned by the <command>INSERT</>
- command after each row is inserted. The expression can use any
- column names of the table named by <replaceable class="PARAMETER">table_name</replaceable>.
+ command after each row is inserted (not updated). The
+ expression can use any column names of the table named by
+ <replaceable class="PARAMETER">table_name</replaceable>.
Write <literal>*</> to return all columns of the inserted row(s).
</para>
</listitem>
RETURNING *
)
INSERT INTO employees_log SELECT *, current_timestamp FROM upd;
-</programlisting></para>
+</programlisting>
+ </para>
+ <para>
+ Insert a distributor, or do nothing for rows proposed for insertion
+ when an existing, excluded row (a row with a matching constrained
+ column or columns after before row insert triggers fire) exists.
+ Example assumes a unique index has been defined that constrains
+ values appearing in the <literal>did</literal> column:
+<programlisting>
+ INSERT INTO distributors (did, dname) VALUES (7, 'Redline GmbH')
+ ON CONFLICT (did) IGNORE
+</programlisting>
+ </para>
+ <para>
+ Insert new distributor if possible; otherwise
+ <literal>IGNORE</literal>. Example assumes a unique index has been
+ defined that constrains values appearing in the
+ <literal>did</literal> column on a subset of rows where the
+ <literal>is_active</literal> boolean column evaluates to
+ <literal>true</literal>:
+<programlisting>
+ -- This statement could infer a partial unique index on did
+ -- with a predicate of WHERE is_active, but it could also
+ -- just use a regular unique constraint on did if that was
+ -- all that was available.
+ INSERT INTO distributors (did, dname) VALUES (9, 'Antwerp Design')
+ ON CONFLICT (did WHERE is_active) IGNORE
+</programlisting>
+ </para>
</refsect1>
<refsect1>
<command>INSERT</command> conforms to the SQL standard, except that
the <literal>RETURNING</> clause is a
<productname>PostgreSQL</productname> extension, as is the ability
- to use <literal>WITH</> with <command>INSERT</>.
+ to use <literal>WITH</> with <command>INSERT</>, and the ability to
+ specify an alternative path with <literal>ON CONFLICT</>.
Also, the case in
which a column name list is omitted, but not all the columns are
filled from the <literal>VALUES</> clause or <replaceable>query</>,
<para>
Currently, only <literal>UNIQUE</>, <literal>PRIMARY KEY</>,
<literal>REFERENCES</> (foreign key), and <literal>EXCLUDE</>
- constraints are affected by this setting.
+ constraints are affected by this setting. Note that constraints
+ that are <literal>DEFERRED</literal> cannot be used as arbiters by
+ the <literal>ON CONFLICT</> clause that <command>INSERT</>
+ supports.
<literal>NOT NULL</> and <literal>CHECK</> constraints are
always checked immediately when a row is inserted or modified
(<emphasis>not</> at the end of the statement).
* copy the identification info of the old tuple: t_ctid, t_self, and OID
* (if any)
*/
- newTuple->t_data->t_ctid = tuple->t_data->t_ctid;
+ newTuple->t_data->t_tidstate = tuple->t_data->t_tidstate;
newTuple->t_self = tuple->t_self;
newTuple->t_tableOid = tuple->t_tableOid;
if (tupleDesc->tdhasoid)
static HTSU_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple,
ItemPointer ctid, TransactionId xid,
LockTupleMode mode);
+static void heap_affirm_insert(Relation relation, HeapTuple tuple);
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask,
uint16 *new_infomask2);
static TransactionId MultiXactIdGetUpdateXid(TransactionId xmax,
*/
if (HeapTupleIsHotUpdated(heapTuple))
{
- Assert(ItemPointerGetBlockNumber(&heapTuple->t_data->t_ctid) ==
+ Assert(ItemPointerGetBlockNumber(&heapTuple->t_data->t_tidstate.t_ctid) ==
ItemPointerGetBlockNumber(tid));
- offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
+ offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_tidstate.t_ctid);
at_chain_start = false;
prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
}
if (valid)
*tid = ctid;
+ /*
+ * Should not have followed a t_ctid chain and found a speculative
+ * tuple that way
+ */
+ Assert(!HeapTupleHeaderIsSpeculative(tp.t_data) ||
+ !TransactionIdIsValid(priorXmax));
+
/*
* If there's a valid t_ctid link, follow it, else we're done.
*/
if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
+ HeapTupleHeaderIsSpeculative(tp.t_data) ||
HeapTupleHeaderIsOnlyLocked(tp.t_data) ||
- ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
+ ItemPointerEquals(&tp.t_self, &tp.t_data->t_tidstate.t_ctid))
{
UnlockReleaseBuffer(buffer);
break;
}
- ctid = tp.t_data->t_ctid;
+ ctid = tp.t_data->t_tidstate.t_ctid;
priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
UnlockReleaseBuffer(buffer);
} /* end of loop */
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
- RelationPutHeapTuple(relation, buffer, heaptup);
+ RelationPutHeapTuple(relation, buffer, heaptup,
+ (options & HEAP_INSERT_SPECULATIVE) != 0);
+
if (PageIsAllVisible(BufferGetPage(buffer)))
{
}
xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
- xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
+ xlrec.flags = 0;
+ if (all_visible_cleared)
+ xlrec.flags |= XLOG_HEAP_ALL_VISIBLE_CLEARED;
+ if (options & HEAP_INSERT_SPECULATIVE)
+ xlrec.flags |= XLOG_HEAP_SPECULATIVE_TUPLE;
Assert(ItemPointerGetBlockNumber(&heaptup->t_self) == BufferGetBlockNumber(buffer));
/*
* RelationGetBufferForTuple has ensured that the first tuple fits.
* Put that on the page, and then as many other tuples as fit.
*/
- RelationPutHeapTuple(relation, buffer, heaptuples[ndone]);
+ RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
{
HeapTuple heaptup = heaptuples[ndone + nthispage];
if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
break;
- RelationPutHeapTuple(relation, buffer, heaptup);
+ RelationPutHeapTuple(relation, buffer, heaptup, false);
/*
* We don't use heap_multi_insert for catalog tuples yet, but
* (the last only for HeapTupleSelfUpdated, since we
* cannot obtain cmax from a combocid generated by another transaction).
* See comments for struct HeapUpdateFailureData for additional info.
+ *
+ * If 'speculative' is true, caller requires that we "super-delete" a tuple we
+ * just inserted in the same command. Instead of the normal visibility checks,
+ * we check that the tuple was inserted by the current transaction and given
+ * command id. Also, instead of setting its xmax, we set xmin to invalid,
+ * making it immediately appear as dead to everyone.
*/
HTSU_Result
heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait,
- HeapUpdateFailureData *hufd)
+ HeapUpdateFailureData *hufd, bool speculative)
{
HTSU_Result result;
TransactionId xid = GetCurrentTransactionId();
tp.t_self = *tid;
l1:
- result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
+ if (!speculative)
+ {
+ result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
+ }
+ else
+ {
+ if (tp.t_data->t_choice.t_heap.t_xmin != xid ||
+ tp.t_data->t_choice.t_heap.t_field3.t_cid != cid)
+ elog(ERROR, "attempted to super-delete a tuple from other CID");
+ result = HeapTupleMayBeUpdated;
+ }
if (result == HeapTupleInvisible)
{
result == HeapTupleUpdated ||
result == HeapTupleBeingUpdated);
Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID));
- hufd->ctid = tp.t_data->t_ctid;
+ hufd->ctid = tp.t_data->t_tidstate.t_ctid;
hufd->xmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
if (result == HeapTupleSelfUpdated)
hufd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
* using our own TransactionId below, since some other backend could
* incorporate our XID into a MultiXact immediately afterwards.)
*/
- MultiXactIdSetOldestMember();
+ if (!speculative)
+ {
+ MultiXactIdSetOldestMember();
- compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(tp.t_data),
- tp.t_data->t_infomask, tp.t_data->t_infomask2,
- xid, LockTupleExclusive, true,
- &new_xmax, &new_infomask, &new_infomask2);
+ compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(tp.t_data),
+ tp.t_data->t_infomask, tp.t_data->t_infomask2,
+ xid, LockTupleExclusive, true,
+ &new_xmax, &new_infomask, &new_infomask2);
+ }
+ else
+ {
+ new_xmax = new_infomask = new_infomask2 = 0;
+ }
START_CRIT_SECTION();
tp.t_data->t_infomask |= new_infomask;
tp.t_data->t_infomask2 |= new_infomask2;
HeapTupleHeaderClearHotUpdated(tp.t_data);
- HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
- HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
+ /*
+ * When killing a speculatively-inserted tuple, we set xmin to invalid
+ * instead of setting xmax, to make the tuple clearly invisible to
+ * everyone. In particular, we want HeapTupleSatisfiesDirty() to regard
+ * the tuple as dead, so that another backend inserting a duplicate key
+ * value won't unnecessarily wait for our transaction to finish.
+ */
+ if (!speculative)
+ {
+ HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
+ HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
+ }
+ else
+ {
+ HeapTupleHeaderSetXmin(tp.t_data, InvalidTransactionId);
+ }
+
/* Make sure there is no forward chain link in t_ctid */
- tp.t_data->t_ctid = tp.t_self;
+ tp.t_data->t_tidstate.t_ctid = tp.t_self;
MarkBufferDirty(buffer);
if (RelationIsAccessibleInLogicalDecoding(relation))
log_heap_new_cid(relation, &tp);
- xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
+ xlrec.flags = 0;
+ if (all_visible_cleared)
+ xlrec.flags |= XLOG_HEAP_ALL_VISIBLE_CLEARED;
+ if (speculative)
+ xlrec.flags |= XLOG_HEAP_SPECULATIVE_TUPLE;
xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
tp.t_data->t_infomask2);
xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
return HeapTupleMayBeUpdated;
}
+/*
+ * heap_finish_speculative - set speculative tuple as permanent (or
+ * unsuccessful).
+ *
+ * This routine may be used to mark a tuple originally only inserted
+ * speculatively as a bonafide, permanent tuple. The t_ctid field (which will
+ * contain a speculative token value) is modified in place to point to the
+ * tuple itself, which is characteristic of a newly inserted ordinary tuple.
+ * Alternatively, the tuple is "super deleted" when a speculative insertion was
+ * unsuccessful.
+ */
+void
+heap_finish_speculative(Relation relation, HeapTuple tuple, bool conflict)
+{
+ if (!conflict)
+ {
+ /*
+ * Affirm successful speculative insertion, making it physically
+ * indistinguishable from a tuple that was inserted in the conventional
+ * manner.
+ */
+ heap_affirm_insert(relation, tuple);
+ }
+ else
+ {
+ HeapUpdateFailureData hufd;
+
+ /*
+ * "Super delete" tuple due to conflict from concurrent insert.
+ *
+ * This is occasionally necessary so that "unprincipled deadlocks" are
+ * avoided; now that a conflict was found, other sessions should not
+ * wait on our speculative token, and they certainly shouldn't treat
+ * our speculatively-inserted heap tuple as an ordinary tuple that it
+ * must wait on the outcome of our xact to UPDATE/DELETE. This makes
+ * heap tuples behave as conceptual "value locks" of short duration,
+ * distinct from ordinary tuples that other xacts must wait on
+ * xmin-xact-end of in the event of a possible unique/exclusion
+ * violation (the violation that arbitrates taking the alternative
+ * path).
+ */
+ heap_delete(relation, &(tuple->t_self), FirstCommandId,
+ InvalidSnapshot, false, &hufd, true);
+ }
+}
+
/*
* simple_heap_delete - delete a tuple
*
result = heap_delete(relation, tid,
GetCurrentCommandId(true), InvalidSnapshot,
true /* wait for commit */ ,
- &hufd);
+ &hufd, false);
switch (result)
{
case HeapTupleSelfUpdated:
result == HeapTupleUpdated ||
result == HeapTupleBeingUpdated);
Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
- hufd->ctid = oldtup.t_data->t_ctid;
+ hufd->ctid = oldtup.t_data->t_tidstate.t_ctid;
hufd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
if (result == HeapTupleSelfUpdated)
hufd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
/* temporarily make it look not-updated */
- oldtup.t_data->t_ctid = oldtup.t_self;
+ oldtup.t_data->t_tidstate.t_ctid = oldtup.t_self;
already_marked = true;
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
HeapTupleClearHeapOnly(newtup);
}
- RelationPutHeapTuple(relation, newbuf, heaptup); /* insert new tuple */
+ RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */
if (!already_marked)
{
}
/* record address of new tuple in t_ctid of old one */
- oldtup.t_data->t_ctid = heaptup->t_self;
+ oldtup.t_data->t_tidstate.t_ctid = heaptup->t_self;
/* clear PD_ALL_VISIBLE flags */
if (PageIsAllVisible(BufferGetPage(buffer)))
xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
infomask = tuple->t_data->t_infomask;
infomask2 = tuple->t_data->t_infomask2;
- ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
+ ItemPointerCopy(&tuple->t_data->t_tidstate.t_ctid, &t_ctid);
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated ||
result == HeapTupleWouldBlock);
Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
- hufd->ctid = tuple->t_data->t_ctid;
+ hufd->ctid = tuple->t_data->t_tidstate.t_ctid;
hufd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
if (result == HeapTupleSelfUpdated)
hufd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
* the tuple as well.
*/
if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
- tuple->t_data->t_ctid = *tid;
+ tuple->t_data->t_tidstate.t_ctid = *tid;
MarkBufferDirty(*buffer);
/* if we find the end of update chain, we're done. */
if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID ||
- ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) ||
+ ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_tidstate.t_ctid) ||
HeapTupleHeaderIsOnlyLocked(mytup.t_data))
{
UnlockReleaseBuffer(buf);
/* tail recursion */
priorXmax = HeapTupleHeaderGetUpdateXid(mytup.t_data);
- ItemPointerCopy(&(mytup.t_data->t_ctid), &tupid);
+ ItemPointerCopy(&(mytup.t_data->t_tidstate.t_ctid), &tupid);
UnlockReleaseBuffer(buf);
}
}
return HeapTupleMayBeUpdated;
}
+/*
+ * heap_affirm_insert - clear speculative token from tuple
+ *
+ * It would not be okay for a transaction to commit without confirming outcome
+ * of speculative insertion. Occasionally, super deletion is necessary (if an
+ * attempt at speculative insertion failed). More often, this routine affirms
+ * a speculative insertion was successful.
+ *
+ * The need to WAL-log this action may not be obvious (logical decoding does
+ * not require it). Doing so allows the implementation to not have to consider
+ * speculative tuples with an in-doubt status. Either a transaction is in
+ * progress and its tuples may be speculative, or it committed and they cannot
+ * be, or it aborted and it doesn't matter either way.
+ */
+static void
+heap_affirm_insert(Relation relation, HeapTuple tuple)
+{
+ Buffer buffer;
+ Page page;
+ OffsetNumber offnum;
+ ItemId lp = NULL;
+ HeapTupleHeader htup;
+
+ buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self)));
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ page = (Page) BufferGetPage(buffer);
+
+ offnum = ItemPointerGetOffsetNumber(&(tuple->t_self));
+ if (PageGetMaxOffsetNumber(page) >= offnum)
+ lp = PageGetItemId(page, offnum);
+
+ if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+ elog(ERROR, "heap_inplace_update: invalid lp");
+
+ htup = (HeapTupleHeader) PageGetItem(page, lp);
+
+ /* NO EREPORT(ERROR) from here till changes are logged */
+ START_CRIT_SECTION();
+
+ Assert(HeapTupleHeaderIsSpeculative(tuple->t_data));
+
+ MarkBufferDirty(buffer);
+
+ /*
+ * Make sure there is no apparent forward chain link in t_ctid.
+ * Speculative inserters rely on this (in fact, the forward link is a
+ * speculative token value).
+ */
+ htup->t_tidstate.t_ctid = tuple->t_self;
+
+ /* XLOG stuff */
+ if (RelationNeedsWAL(relation))
+ {
+ xl_heap_affirm xlrec;
+ XLogRecPtr recptr;
+
+ xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapAffirm);
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_AFFIRM);
+
+ PageSetLSN(page, recptr);
+ }
+
+ END_CRIT_SECTION();
+
+ UnlockReleaseBuffer(buffer);
+}
/*
* heap_inplace_update - update a tuple "in place" (ie, overwrite it)
HeapTupleHeaderClearHotUpdated(htup);
fix_infomask_from_infobits(xlrec->infobits_set,
&htup->t_infomask, &htup->t_infomask2);
- HeapTupleHeaderSetXmax(htup, xlrec->xmax);
+ if (!(xlrec->flags & XLOG_HEAP_SPECULATIVE_TUPLE))
+ HeapTupleHeaderSetXmax(htup, xlrec->xmax);
+ else
+ HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Mark the page as a candidate for pruning */
PageClearAllVisible(page);
/* Make sure there is no forward chain link in t_ctid */
- htup->t_ctid = target_tid;
+ htup->t_tidstate.t_ctid = target_tid;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
htup->t_hoff = xlhdr.t_hoff;
HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
HeapTupleHeaderSetCmin(htup, FirstCommandId);
- htup->t_ctid = target_tid;
+ htup->t_tidstate.t_ctid = target_tid;
if (PageAddItem(page, (Item) htup, newlen, xlrec->offnum,
true, true) == InvalidOffsetNumber)
htup->t_hoff = xlhdr->t_hoff;
HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
HeapTupleHeaderSetCmin(htup, FirstCommandId);
- ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
- ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
+ ItemPointerSetBlockNumber(&htup->t_tidstate.t_ctid, blkno);
+ ItemPointerSetOffsetNumber(&htup->t_tidstate.t_ctid, offnum);
offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
if (offnum == InvalidOffsetNumber)
HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Set forward chain link in t_ctid */
- htup->t_ctid = newtid;
+ htup->t_tidstate.t_ctid = newtid;
/* Mark the page as a candidate for pruning */
PageSetPrunable(page, XLogRecGetXid(record));
HeapTupleHeaderSetCmin(htup, FirstCommandId);
HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
/* Make sure there is no forward chain link in t_ctid */
- htup->t_ctid = newtid;
+ htup->t_tidstate.t_ctid = newtid;
offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
if (offnum == InvalidOffsetNumber)
XLogRecordPageWithFreeSpace(rnode, newblk, freespace);
}
+static void
+heap_xlog_affirm(XLogReaderState *record)
+{
+ XLogRecPtr lsn = record->EndRecPtr;
+ xl_heap_affirm *xlrec = (xl_heap_affirm *) XLogRecGetData(record);
+ Buffer buffer;
+ Page page;
+ OffsetNumber offnum;
+ ItemId lp = NULL;
+ HeapTupleHeader htup;
+
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
+ {
+ page = BufferGetPage(buffer);
+
+ offnum = xlrec->offnum;
+ if (PageGetMaxOffsetNumber(page) >= offnum)
+ lp = PageGetItemId(page, offnum);
+
+ if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+ elog(PANIC, "heap_affirm_redo: invalid lp");
+
+ htup = (HeapTupleHeader) PageGetItem(page, lp);
+
+ /*
+ * Affirm tuple as actually inserted
+ */
+ ItemPointerSet(&htup->t_tidstate.t_ctid,
+ BufferGetBlockNumber(buffer),
+ offnum);
+
+ PageSetLSN(page, lsn);
+ MarkBufferDirty(buffer);
+ }
+ if (BufferIsValid(buffer))
+ UnlockReleaseBuffer(buffer);
+}
+
static void
heap_xlog_lock(XLogReaderState *record)
{
{
HeapTupleHeaderClearHotUpdated(htup);
/* Make sure there is no forward chain link in t_ctid */
- ItemPointerSet(&htup->t_ctid,
+ ItemPointerSet(&htup->t_tidstate.t_ctid,
BufferGetBlockNumber(buffer),
offnum);
}
case XLOG_HEAP_HOT_UPDATE:
heap_xlog_update(record, true);
break;
+ case XLOG_HEAP_AFFIRM:
+ heap_xlog_affirm(record);
+ break;
case XLOG_HEAP_LOCK:
heap_xlog_lock(record);
break;
void
RelationPutHeapTuple(Relation relation,
Buffer buffer,
- HeapTuple tuple)
+ HeapTuple tuple,
+ bool token)
{
Page pageHeader;
OffsetNumber offnum;
/* Update tuple->t_self to the actual position where it was stored */
ItemPointerSet(&(tuple->t_self), BufferGetBlockNumber(buffer), offnum);
- /* Insert the correct position into CTID of the stored tuple, too */
itemId = PageGetItemId(pageHeader, offnum);
item = PageGetItem(pageHeader, itemId);
- ((HeapTupleHeader) item)->t_ctid = tuple->t_self;
+
+ if (!token)
+ {
+ /* Insert the correct position into CTID of the stored tuple, too */
+ ((HeapTupleHeader) item)->t_tidstate.t_ctid = tuple->t_self;
+ }
+ else
+ {
+ /* Speculatively inserted tuples should retain token value instead */
+ Assert(HeapTupleHeaderIsSpeculative(((HeapTupleHeader) item)));
+ }
}
/*
/*
* Advance to next chain member.
*/
- Assert(ItemPointerGetBlockNumber(&htup->t_ctid) ==
+ Assert(ItemPointerGetBlockNumber(&htup->t_tidstate.t_ctid) ==
BufferGetBlockNumber(buffer));
- offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
+ offnum = ItemPointerGetOffsetNumber(&htup->t_tidstate.t_ctid);
priorXmax = HeapTupleHeaderGetUpdateXid(htup);
}
continue;
/* Set up to scan the HOT-chain */
- nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
+ nextoffnum = ItemPointerGetOffsetNumber(&htup->t_tidstate.t_ctid);
priorXmax = HeapTupleHeaderGetUpdateXid(htup);
}
else
if (!HeapTupleHeaderIsHotUpdated(htup))
break;
- nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
+ nextoffnum = ItemPointerGetOffsetNumber(&htup->t_tidstate.t_ctid);
priorXmax = HeapTupleHeaderGetUpdateXid(htup);
}
}
while ((unresolved = hash_seq_search(&seq_status)) != NULL)
{
- ItemPointerSetInvalid(&unresolved->tuple->t_data->t_ctid);
+ ItemPointerSetInvalid(&unresolved->tuple->t_data->t_tidstate.t_ctid);
raw_heap_insert(state, unresolved->tuple);
}
* Invalid ctid means that ctid should point to the tuple itself. We'll
* override it later if the tuple is part of an update chain.
*/
- ItemPointerSetInvalid(&new_tuple->t_data->t_ctid);
+ ItemPointerSetInvalid(&new_tuple->t_data->t_tidstate.t_ctid);
/*
* If the tuple has been updated, check the old-to-new mapping hash table.
if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) &&
!(ItemPointerEquals(&(old_tuple->t_self),
- &(old_tuple->t_data->t_ctid))))
+ &(old_tuple->t_data->t_tidstate.t_ctid))))
{
OldToNewMapping mapping;
memset(&hashkey, 0, sizeof(hashkey));
hashkey.xmin = HeapTupleHeaderGetUpdateXid(old_tuple->t_data);
- hashkey.tid = old_tuple->t_data->t_ctid;
+ hashkey.tid = old_tuple->t_data->t_tidstate.t_ctid;
mapping = (OldToNewMapping)
hash_search(state->rs_old_new_tid_map, &hashkey,
* set the ctid of this tuple to point to the new location, and
* insert it right away.
*/
- new_tuple->t_data->t_ctid = mapping->new_tid;
+ new_tuple->t_data->t_tidstate.t_ctid = mapping->new_tid;
/* We don't need the mapping entry anymore */
hash_search(state->rs_old_new_tid_map, &hashkey,
new_tuple = unresolved->tuple;
free_new = true;
old_tid = unresolved->old_tid;
- new_tuple->t_data->t_ctid = new_tid;
+ new_tuple->t_data->t_tidstate.t_ctid = new_tid;
/*
* We don't need the hash entry anymore, but don't free its
* Insert the correct position into CTID of the stored tuple, too, if the
* caller didn't supply a valid CTID.
*/
- if (!ItemPointerIsValid(&tup->t_data->t_ctid))
+ if (!ItemPointerIsValid(&tup->t_data->t_tidstate.t_ctid))
{
ItemId newitemid;
HeapTupleHeader onpage_tup;
newitemid = PageGetItemId(page, newoff);
onpage_tup = (HeapTupleHeader) PageGetItem(page, newitemid);
- onpage_tup->t_ctid = tup->t_self;
+ onpage_tup->t_tidstate.t_ctid = tup->t_self;
}
/* If heaptup is a private copy, release it. */
new_tuple->t_tableOid = tup->t_tableOid;
new_tuple->t_data->t_choice = tup->t_data->t_choice;
- new_tuple->t_data->t_ctid = tup->t_data->t_ctid;
+ new_tuple->t_data->t_tidstate.t_ctid = tup->t_data->t_tidstate.t_ctid;
new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK;
new_tuple->t_data->t_infomask |=
tup->t_data->t_infomask & HEAP_XACT_MASK;
static TransactionId _bt_check_unique(Relation rel, IndexTuple itup,
Relation heapRel, Buffer buf, OffsetNumber offset,
ScanKey itup_scankey,
- IndexUniqueCheck checkUnique, bool *is_unique);
+ IndexUniqueCheck checkUnique, bool *is_unique,
+ uint32 *speculativeToken);
static void _bt_findinsertloc(Relation rel,
Buffer *bufptr,
OffsetNumber *offsetptr,
*/
if (checkUnique != UNIQUE_CHECK_NO)
{
- TransactionId xwait;
+ TransactionId xwait;
+ uint32 speculativeToken;
offset = _bt_binsrch(rel, buf, natts, itup_scankey, false);
xwait = _bt_check_unique(rel, itup, heapRel, buf, offset, itup_scankey,
- checkUnique, &is_unique);
+ checkUnique, &is_unique, &speculativeToken);
if (TransactionIdIsValid(xwait))
{
/* Have to wait for the other guy ... */
_bt_relbuf(rel, buf);
- XactLockTableWait(xwait, rel, &itup->t_tid, XLTW_InsertIndex);
+ /*
+ * If it's a speculative insertion, wait for it to finish (ie.
+ * to go ahead with the insertion, or kill the tuple). Otherwise
+ * wait for the transaction to finish as usual.
+ */
+ if (speculativeToken)
+ SpeculativeInsertionWait(xwait, speculativeToken);
+ else
+ XactLockTableWait(xwait, rel, &itup->t_tid, XLTW_InsertIndex);
+
/* start over... */
_bt_freestack(stack);
goto top;
* also point to end-of-page, which means that the first tuple to check
* is the first tuple on the next page.
*
- * Returns InvalidTransactionId if there is no conflict, else an xact ID
- * we must wait for to see if it commits a conflicting tuple. If an actual
- * conflict is detected, no return --- just ereport().
+ * Returns InvalidTransactionId if there is no conflict, else an xact ID we
+ * must wait for to see if it commits a conflicting tuple. If an actual
+ * conflict is detected, no return --- just ereport(). If an xact ID is
+ * returned, and the conflicting tuple still has a speculative insertion in
+ * progress, *speculativeToken is set to non-zero, and the caller can wait for
+ * the verdict on the insertion using SpeculativeInsertionWait().
*
* However, if checkUnique == UNIQUE_CHECK_PARTIAL, we always return
* InvalidTransactionId because we don't want to wait. In this case we
static TransactionId
_bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
Buffer buf, OffsetNumber offset, ScanKey itup_scankey,
- IndexUniqueCheck checkUnique, bool *is_unique)
+ IndexUniqueCheck checkUnique, bool *is_unique,
+ uint32 *speculativeToken)
{
TupleDesc itupdesc = RelationGetDescr(rel);
int natts = rel->rd_rel->relnatts;
if (nbuf != InvalidBuffer)
_bt_relbuf(rel, nbuf);
/* Tell _bt_doinsert to wait... */
+ *speculativeToken = SnapshotDirty.speculativeToken;
return xwait;
}
xlrec->new_offnum,
xlrec->new_xmax);
}
+ else if (info == XLOG_HEAP_AFFIRM)
+ {
+ xl_heap_affirm *xlrec = (xl_heap_affirm *) rec;
+
+ appendStringInfo(buf, "off %u", xlrec->offnum);
+ }
else if (info == XLOG_HEAP_LOCK)
{
xl_heap_lock *xlrec = (xl_heap_lock *) rec;
case XLOG_HEAP_HOT_UPDATE | XLOG_HEAP_INIT_PAGE:
id = "HOT_UPDATE+INIT";
break;
+ case XLOG_HEAP_AFFIRM:
+ id = "HEAP_AFFIRM";
+ break;
case XLOG_HEAP_LOCK:
id = "LOCK";
break;
/* other info */
ii->ii_Unique = indexStruct->indisunique;
ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
+ /* assume not doing speculative insertion for now */
+ ii->ii_UniqueOps = NULL;
+ ii->ii_UniqueProcs = NULL;
+ ii->ii_UniqueStrats = NULL;
/* initialize index-build state to default */
ii->ii_Concurrent = false;
return ii;
}
+/* ----------------
+ * IndexInfoSpeculative
+ * Append extra state to IndexInfo record
+ *
+ * For unique indexes, we usually don't want to add info to the IndexInfo for
+ * checking uniqueness, since the B-Tree AM handles that directly. However, in
+ * the case of speculative insertion, external support is required.
+ *
+ * Do this processing here rather than in BuildIndexInfo() to save the common
+ * non-speculative cases the overhead they'd otherwise incur.
+ * ----------------
+ */
+void
+IndexInfoSpeculative(Relation index, IndexInfo *ii)
+{
+ int ncols = index->rd_rel->relnatts;
+ int i;
+
+ /*
+ * fetch info for checking unique indexes
+ */
+ Assert(ii->ii_Unique);
+
+ if (index->rd_rel->relam != BTREE_AM_OID)
+ elog(ERROR, "unexpected non-btree speculative unique index");
+
+ ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * ncols);
+ ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * ncols);
+ ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * ncols);
+
+ /*
+ * We have to look up the operator's strategy number. This
+ * provides a cross-check that the operator does match the index.
+ */
+ /* We need the func OIDs and strategy numbers too */
+ for (i = 0; i < ncols; i++)
+ {
+ ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
+ ii->ii_UniqueOps[i] =
+ get_opfamily_member(index->rd_opfamily[i],
+ index->rd_opcintype[i],
+ index->rd_opcintype[i],
+ ii->ii_UniqueStrats[i]);
+ ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
+ }
+}
+
/* ----------------
* FormIndexDatum
* Construct values[] and isnull[] arrays for a new index tuple.
/*
* Check that this tuple has no conflicts.
*/
- check_exclusion_constraint(heapRelation,
- indexRelation, indexInfo,
- &(heapTuple->t_self), values, isnull,
- estate, true, false);
+ check_exclusion_or_unique_constraint(heapRelation, indexRelation,
+ indexInfo, &(heapTuple->t_self),
+ values, isnull, estate, true,
+ false, true, NULL);
}
heap_endscan(scan);
resultRelInfo->ri_RelationDesc = heapRel;
resultRelInfo->ri_TrigDesc = NULL; /* we don't fire triggers */
- ExecOpenIndices(resultRelInfo);
+ ExecOpenIndices(resultRelInfo, false);
return resultRelInfo;
}
* For exclusion constraints we just do the normal check, but now it's
* okay to throw error.
*/
- check_exclusion_constraint(trigdata->tg_relation, indexRel, indexInfo,
- &(new_row->t_self), values, isnull,
- estate, false, false);
+ check_exclusion_or_unique_constraint(trigdata->tg_relation, indexRel,
+ indexInfo, &(new_row->t_self),
+ values, isnull, estate, false,
+ false, true, NULL);
}
/*
1, /* dummy rangetable index */
0);
- ExecOpenIndices(resultRelInfo);
+ ExecOpenIndices(resultRelInfo, false);
estate->es_result_relations = resultRelInfo;
estate->es_num_result_relations = 1;
if (resultRelInfo->ri_NumIndices > 0)
recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
- estate);
+ estate, false,
+ NIL);
/* AFTER ROW INSERT Triggers */
ExecARInsertTriggers(estate, resultRelInfo, tuple,
ExecStoreTuple(bufferedTuples[i], myslot, InvalidBuffer, false);
recheckIndexes =
ExecInsertIndexTuples(myslot, &(bufferedTuples[i]->t_self),
- estate);
+ estate, false, NIL);
ExecARInsertTriggers(estate, resultRelInfo,
bufferedTuples[i],
recheckIndexes);
const char *foperation;
bool labeltargets;
int j;
+ List *idxNames = NIL;
+ ListCell *lst;
switch (node->operation)
{
}
}
+ foreach(lst, node->arbiterIndexes)
+ {
+ char *indexname = get_rel_name(lfirst_oid(lst));
+
+ idxNames = lappend(idxNames, indexname);
+ }
+
+ /*
+ * Make sure that there is still an arbiter property list when ON CONFLICT
+ * IGNORE is used, and an inference specification is omitted (Non-text
+ * format explains will show an empty array, which seems appropriate
+ * there).
+ */
+ if (node->spec == SPEC_IGNORE && idxNames == NIL &&
+ es->format == EXPLAIN_FORMAT_TEXT)
+ idxNames = lappend(idxNames, "(All)");
+
+ if (node->spec == SPEC_IGNORE)
+ ExplainPropertyList("Conflict Arbiter Indexes", idxNames, es);
+
if (labeltargets)
ExplainCloseGroup("Target Tables", "Target Tables", false, es);
}
HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId);
HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId);
tuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
- ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber);
+ ItemPointerSet(&tuple->t_data->t_tidstate.t_ctid, 0, FirstOffsetNumber);
/* check the comment above nextval_internal()'s equivalent call. */
if (RelationNeedsWAL(rel))
that only the first result row of an SRF counts, because all subsequent
rows will result in attempts to re-update an already updated target row.
This is historical behavior and seems not worth changing.)
+
+Speculative insertion
+---------------------
+
+Speculative insertion is a process that the executor manages for the benefit of
+INSERT...ON CONFLICT IGNORE. Supported indexes include nbtree unique
+indexes (nbtree is currently the only amcanunique index access method), or
+exclusion constraint indexes (exclusion constraints are considered a
+generalization of unique constraints).
+
+The primary user-visible goal for INSERT ... ON CONFLICT is to guarantee either
+an insert, or a conclusive determination that an insert cannot go ahead (due to
+a conclusively committed/visible conflict). A would-be conflict (and the
+associated index) are the arbiters of whether or not the alternative (IGNORE)
+path is taken. The implementation more or less tries to insert until one or
+the other of those two outcomes is reached. There are some non-obvious hazards
+involved that are carefully avoided. These hazards relate to concurrent
+activity causing conflicts for the implementation, which must be handled.
+
+The index is the authoritative source of truth for whether there is or is not a
+conflict, for unique index enforcement in general, and for speculative
+insertion in particular. The heap must still be considered, though, not least
+since it alone has authoritative visibility information. Through looping, we
+hope to overcome the disconnect between the heap and the arbiter index.
+Theoretically, some individual session could loop forever, although under high
+concurrency one session always proceeds.
+
+The first step in the loop is to perform a pre-check. The indexes are scanned
+for existing conflicting values. At this point, we may have to wait until the
+end of another xact (or xact's promise token -- more on that later), iff it
+isn't immediately conclusive that there is or is not a conflict (when we finish
+the pre-check, there is a conclusion about there either being or
+not being a conflict).
+
+The second step (skipped when a conflict is found) is to insert a heap tuple
+and related index tuples opportunistically. This uses the same mechanism as
+deferred unique indexes, and so we never wait for a possibly conflicting xact
+to commit or abort (unlike with conventional unique index insertion) -- we
+simply detect a possible conflict.
+
+When opportunistically inserting during the second step, we are not logically
+inserting a tuple as such. Rather, the process is somewhat similar to the
+conventional unique index insertion steps taken within the nbtree AM, where we
+must briefly lock the *value* being inserted: in that codepath, the value
+proposed for insertion is for an instant locked *in the abstract*, by way of a
+buffer lock on "the first leaf page the value could be on". Then, having
+established the right to physically insert, do so (or throw an error). For
+speculative insertion, if no conflict occurs during the insertion (which is
+usually the case, since it was just determined in the first step that there was
+no conflict), then we're done. Otherwise, we must restart (and likely find the
+same conflict tuple during the first step of the new iteration). But a
+counter-intuitive step must be taken first (which is what makes this whole
+dance similar to conventional nbtree "value locking").
+
+We must "super delete" the tuple when the opportunistic insertion finds a
+conflict. This means that it immediately becomes invisible to all snapshot
+types, and immediately becomes reclaimable by VACUUM. Other backends
+(speculative inserters or ordinary inserters) know to not wait on our
+transaction end when they encounter an optimistically inserted "promise tuple".
+Rather, they wait on a corresponding promise token lock, which we hold only for
+as long as opportunistically inserting. We release the lock when done
+opportunistically inserting (and after "super deleting", if that proved
+necessary), releasing our waiters (who will ordinarily re-find our promise
+tuple as a bona fide tuple, or occasionally will find that they can insert
+after all). It's important that other xacts not wait on the end of our xact
+until we've established that we've successfully and conclusively inserted
+logically (or established that there was an insertion conflict, and cleaned up
+after it by "super deleting"). Otherwise, concurrent speculative inserters
+could be involved in "unprincipled deadlocks": deadlocks where there is no
+user-visible mutual dependency, and yet an implementation related mutual
+dependency is unexpectedly introduced. The user might be left with no
+reasonable way of avoiding these deadlocks, which would not be okay.
* the latest version of the row was deleted, so we need do
* nothing. (Should be safe to examine xmin without getting
* buffer's content lock, since xmin never changes in an existing
- * tuple.)
+ * non-promise tuple.)
*/
if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
priorXmax))
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
+
+ /* Should not encounter speculative tuple on recheck */
+ Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data));
if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
{
/* it was updated, so look at the updated version */
* As above, it should be safe to examine xmax and t_ctid without the
* buffer content lock, because they can't be changing.
*/
- if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
+ if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_tidstate.t_ctid))
{
/* deleted, so forget about it */
ReleaseBuffer(buffer);
}
/* updated, so look at the updated row */
- tuple.t_self = tuple.t_data->t_ctid;
+ tuple.t_self = tuple.t_data->t_tidstate.t_ctid;
/* updated row should have xmin matching this xmax */
priorXmax = HeapTupleHeaderGetUpdateXid(tuple.t_data);
ReleaseBuffer(buffer);
#include "access/relscan.h"
#include "access/transam.h"
+#include "access/xact.h"
#include "catalog/index.h"
#include "executor/execdebug.h"
#include "nodes/nodeFuncs.h"
* ----------------------------------------------------------------
*/
void
-ExecOpenIndices(ResultRelInfo *resultRelInfo)
+ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
{
Relation resultRelation = resultRelInfo->ri_RelationDesc;
List *indexoidlist;
/* extract index key information from the index's pg_index info */
ii = BuildIndexInfo(indexDesc);
+ /*
+ * Iff the indexes are to be used for speculative insertion, add extra
+ * information required by unique index entries
+ */
+ if (speculative && ii->ii_Unique)
+ IndexInfoSpeculative(indexDesc, ii);
+
relationDescs[i] = indexDesc;
indexInfoArray[i] = ii;
i++;
*
* This returns a list of index OIDs for any unique or exclusion
* constraints that are deferred and that had
- * potential (unconfirmed) conflicts.
+ * potential (unconfirmed) conflicts. (if noDupErr == true, the
+ * same is done for non-deferred constraints)
*
* CAUTION: this must not be called for a HOT update.
* We can't defend against that here for lack of info.
List *
ExecInsertIndexTuples(TupleTableSlot *slot,
ItemPointer tupleid,
- EState *estate)
+ EState *estate,
+ bool noDupErr,
+ List *arbiterIndexes)
{
List *result = NIL;
ResultRelInfo *resultRelInfo;
IndexInfo *indexInfo;
IndexUniqueCheck checkUnique;
bool satisfiesConstraint;
+ bool arbiter;
if (indexRelation == NULL)
continue;
indexInfo = indexInfoArray[i];
+ /* Record if speculative insertion arbiter */
+ arbiter = list_member_oid(arbiterIndexes,
+ indexRelation->rd_index->indexrelid);
+
/* If the index is marked as read-only, ignore it */
if (!indexInfo->ii_ReadyForInserts)
continue;
/* Skip this index-update if the predicate isn't satisfied */
if (!ExecQual(predicate, econtext, false))
+ {
+ if (arbiter)
+ ereport(ERROR,
+ (errcode(ERRCODE_TRIGGERED_ACTION_EXCEPTION),
+ errmsg("inferred arbiter partial unique index has predicate that fails to cover tuple proposed for insertion"),
+ errdetail("ON CONFLICT inference clause implies that the tuple proposed for insertion must be covered by predicate for partial index \"%s\".",
+ RelationGetRelationName(indexRelation)),
+ errtableconstraint(heapRelation,
+ RelationGetRelationName(indexRelation))));
continue;
+ }
}
/*
* For a deferrable unique index, we tell the index AM to just detect
* possible non-uniqueness, and we add the index OID to the result
* list if further checking is needed.
+ *
+ * For a speculative insertion (used by INSERT ... ON CONFLICT), just
+ * detect possible non-uniqueness, and tell the caller if it failed.
*/
if (!indexRelation->rd_index->indisunique)
checkUnique = UNIQUE_CHECK_NO;
+ else if (noDupErr && (arbiterIndexes == NIL || arbiter))
+ checkUnique = UNIQUE_CHECK_PARTIAL;
else if (indexRelation->rd_index->indimmediate)
checkUnique = UNIQUE_CHECK_YES;
else
* If the index has an associated exclusion constraint, check that.
* This is simpler than the process for uniqueness checks since we
* always insert first and then check. If the constraint is deferred,
- * we check now anyway, but don't throw error on violation; instead
- * we'll queue a recheck event.
+ * we check now anyway, but don't throw error on violation or wait for
+ * a conclusive outcome from a concurrent insertion; instead we'll
+ * queue a recheck event. Similarly, noDupErr callers (speculative
+ * inserters) will recheck later, and wait for a conclusive outcome
+ * then.
*
* An index for an exclusion constraint can't also be UNIQUE (not an
* essential property, we just don't allow it in the grammar), so no
*/
if (indexInfo->ii_ExclusionOps != NULL)
{
- bool errorOK = !indexRelation->rd_index->indimmediate;
+ bool violationOK = (!indexRelation->rd_index->indimmediate ||
+ noDupErr);
satisfiesConstraint =
- check_exclusion_constraint(heapRelation,
- indexRelation, indexInfo,
- tupleid, values, isnull,
- estate, false, errorOK);
+ check_exclusion_or_unique_constraint(heapRelation,
+ indexRelation, indexInfo,
+ tupleid, values, isnull,
+ estate, false,
+ violationOK, false, NULL);
}
if ((checkUnique == UNIQUE_CHECK_PARTIAL ||
return result;
}
+/* ----------------------------------------------------------------
+ * ExecCheckIndexConstraints
+ *
+ * This routine checks if a tuple violates any unique or
+ * exclusion constraints. If no conflict, returns true.
+ * Otherwise returns false, and the TID of the conflicting
+ * tuple is returned in *conflictTid
+ *
+ * Note that this doesn't lock the values in any way, so it's
+ * possible that a conflicting tuple is inserted immediately
+ * after this returns, and a later insert with the same values
+ * still conflicts. But this can be used for a pre-check before
+ * insertion.
+ * ----------------------------------------------------------------
+ */
+bool
+ExecCheckIndexConstraints(TupleTableSlot *slot,
+ EState *estate, ItemPointer conflictTid,
+ List *arbiterIndexes)
+{
+ ResultRelInfo *resultRelInfo;
+ int i;
+ int numIndices;
+ RelationPtr relationDescs;
+ Relation heapRelation;
+ IndexInfo **indexInfoArray;
+ ExprContext *econtext;
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ ItemPointerData invalidItemPtr;
+ bool checkedIndex = false;
+
+ ItemPointerSetInvalid(conflictTid);
+ ItemPointerSetInvalid(&invalidItemPtr);
+
+ /*
+ * Get information from the result relation info structure.
+ */
+ resultRelInfo = estate->es_result_relation_info;
+ numIndices = resultRelInfo->ri_NumIndices;
+ relationDescs = resultRelInfo->ri_IndexRelationDescs;
+ indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+ heapRelation = resultRelInfo->ri_RelationDesc;
+
+ /*
+ * We will use the EState's per-tuple context for evaluating predicates
+ * and index expressions (creating it if it's not already there).
+ */
+ econtext = GetPerTupleExprContext(estate);
+
+ /* Arrange for econtext's scan tuple to be the tuple under test */
+ econtext->ecxt_scantuple = slot;
+
+ /*
+ * for each index, form and insert the index tuple
+ */
+ for (i = 0; i < numIndices; i++)
+ {
+ Relation indexRelation = relationDescs[i];
+ IndexInfo *indexInfo;
+ bool satisfiesConstraint;
+ bool arbiter;
+
+ if (indexRelation == NULL)
+ continue;
+
+ indexInfo = indexInfoArray[i];
+
+ if (!indexInfo->ii_Unique && !indexInfo->ii_ExclusionOps)
+ continue;
+
+ /* Record if speculative insertion arbiter */
+ arbiter = list_member_oid(arbiterIndexes,
+ indexRelation->rd_index->indexrelid);
+
+ if (!indexRelation->rd_index->indimmediate)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ON CONFLICT is not supported on relations with deferred unique constraints/exclusion constraints"),
+ errtableconstraint(heapRelation,
+ RelationGetRelationName(indexRelation))));
+
+ /* If the index is marked as read-only, ignore it */
+ if (!indexInfo->ii_ReadyForInserts)
+ continue;
+
+ /* When specific arbiter indexes requested, only examine them */
+ if (arbiterIndexes != NIL && !arbiter)
+ continue;
+
+ checkedIndex = true;
+
+ /* Check for partial index */
+ if (indexInfo->ii_Predicate != NIL)
+ {
+ List *predicate;
+
+ /*
+ * If predicate state not set up yet, create it (in the estate's
+ * per-query context)
+ */
+ predicate = indexInfo->ii_PredicateState;
+ if (predicate == NIL)
+ {
+ predicate = (List *)
+ ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
+ estate);
+ indexInfo->ii_PredicateState = predicate;
+ }
+
+ /* Skip this index-update if the predicate isn't satisfied */
+ if (!ExecQual(predicate, econtext, false))
+ continue;
+ }
+
+ /*
+ * FormIndexDatum fills in its values and isnull parameters with the
+ * appropriate values for the column(s) of the index.
+ */
+ FormIndexDatum(indexInfo,
+ slot,
+ estate,
+ values,
+ isnull);
+
+ satisfiesConstraint =
+ check_exclusion_or_unique_constraint(heapRelation, indexRelation,
+ indexInfo, &invalidItemPtr,
+ values, isnull, estate, false,
+ true, true, conflictTid);
+ if (!satisfiesConstraint)
+ return false;
+ }
+
+ if (arbiterIndexes != NIL && !checkedIndex)
+ elog(ERROR, "unexpected failure to find arbiter unique index");
+
+ return true;
+}
+
/*
- * Check for violation of an exclusion constraint
+ * Check for violation of an exclusion or unique constraint
*
* heap: the table containing the new tuple
* index: the index supporting the exclusion constraint
* indexInfo: info about the index, including the exclusion properties
- * tupleid: heap TID of the new tuple we have just inserted
+ * tupleid: heap TID of the new tuple we have just inserted (invalid if we
+ * haven't inserted a new tuple yet)
* values, isnull: the *index* column values computed for the new tuple
* estate: an EState we can do evaluation in
* newIndex: if true, we are trying to build a new index (this affects
* only the wording of error messages)
* errorOK: if true, don't throw error for violation
+ * wait: if true, wait for conflicting transaction to finish, even if !errorOK
+ * conflictTid: if not-NULL, the TID of conflicting tuple is returned here.
*
* Returns true if OK, false if actual or potential violation
*
* is convenient for deferred exclusion checks; we need not bother queuing
* a deferred event if there is definitely no conflict at insertion time.
*
- * When errorOK is false, we'll throw error on violation, so a false result
+ * When violationOK is false, we'll throw error on violation, so a false result
* is impossible.
+ *
+ * Note: The indexam is normally responsible for checking unique constraints,
+ * so this normally only needs to be used for exclusion constraints. But this
+ * function is also called when doing a "pre-check" for conflicts, for the
+ * benefit of speculative insertion. Caller may request that conflict TID be
+ * set, to take further steps.
*/
bool
-check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo,
- ItemPointer tupleid, Datum *values, bool *isnull,
- EState *estate, bool newIndex, bool errorOK)
+check_exclusion_or_unique_constraint(Relation heap, Relation index,
+ IndexInfo *indexInfo, ItemPointer tupleid,
+ Datum *values, bool *isnull,
+ EState *estate, bool newIndex,
+ bool violationOK, bool wait,
+ ItemPointer conflictTid)
{
- Oid *constr_procs = indexInfo->ii_ExclusionProcs;
- uint16 *constr_strats = indexInfo->ii_ExclusionStrats;
+ Oid *constr_procs;
+ uint16 *constr_strats;
Oid *index_collations = index->rd_indcollation;
int index_natts = index->rd_index->indnatts;
IndexScanDesc index_scan;
TupleTableSlot *existing_slot;
TupleTableSlot *save_scantuple;
+ if (indexInfo->ii_ExclusionOps)
+ {
+ constr_procs = indexInfo->ii_ExclusionProcs;
+ constr_strats = indexInfo->ii_ExclusionStrats;
+ }
+ else
+ {
+ constr_procs = indexInfo->ii_UniqueProcs;
+ constr_strats = indexInfo->ii_UniqueStrats;
+ }
+
/*
* If any of the input values are NULL, the constraint check is assumed to
* pass (i.e., we assume the operators are strict).
/*
* Ignore the entry for the tuple we're trying to check.
*/
- if (ItemPointerEquals(tupleid, &tup->t_self))
+ if (ItemPointerIsValid(tupleid) &&
+ ItemPointerEquals(tupleid, &tup->t_self))
{
if (found_self) /* should not happen */
elog(ERROR, "found self tuple multiple times in index \"%s\"",
* conflict */
}
- /*
- * At this point we have either a conflict or a potential conflict. If
- * we're not supposed to raise error, just return the fact of the
- * potential conflict without waiting to see if it's real.
- */
- if (errorOK)
- {
- conflict = true;
- break;
- }
-
/*
* If an in-progress transaction is affecting the visibility of this
* tuple, we need to wait for it to complete and then recheck. For
xwait = TransactionIdIsValid(DirtySnapshot.xmin) ?
DirtySnapshot.xmin : DirtySnapshot.xmax;
+ /*
+ * At this point we have either a conflict or a potential conflict. If
+ * we're not supposed to raise error, just return the fact of the
+ * potential conflict without waiting to see if it's real.
+ */
+ if (violationOK && !wait)
+ {
+ /*
+ * For unique indexes, detecting conflict is coupled with physical
+ * index tuple insertion, so we won't be called for recheck
+ */
+ Assert(!indexInfo->ii_Unique);
+
+ conflict = true;
+ if (conflictTid)
+ *conflictTid = tup->t_self;
+
+ /*
+ * Livelock insurance.
+ *
+ * When doing a speculative insertion pre-check, we cannot have an
+ * "unprincipled deadlock" with another session, fundamentally
+ * because there is no possible mutual dependency, since we only
+ * hold a lock on our token, without attempting to lock anything
+ * else (maybe this is not the first iteration, but no matter;
+ * we'll have super deleted and released insertion token lock if
+ * so, and all locks needed are already held. Also, our XID lock
+ * is irrelevant.)
+ *
+ * In the second phase, where there is a re-check for conflicts,
+ * we can't deadlock either (we never lock another thing, since we
+ * don't wait in that phase). However, a theoretical livelock
+ * hazard exists: Two sessions could each see each other's
+ * conflicting tuple, and each could go and delete, retrying
+ * forever.
+ *
+ * To break the mutual dependency, we may wait on the other xact
+ * here over our caller's request to not do so (in the second
+ * phase). This does not imply the risk of unprincipled deadlocks
+ * either, because if we end up unexpectedly waiting, the other
+ * session will super delete its own tuple *before* releasing its
+ * token lock and freeing us, and without attempting to wait on us
+ * to release our token lock. We'll take another iteration here,
+ * after waiting on the other session's token, not find a conflict
+ * this time, and then proceed (assuming we're the oldest XID).
+ *
+ * N.B.: Unprincipled deadlocks are still theoretically possible
+ * with non-speculative insertion with exclusion constraints, but
+ * this seems inconsequential, since an error was inevitable for
+ * one of the sessions anyway. We only worry about speculative
+ * insertion's problems, since they're likely with idiomatic usage.
+ *
+ * We don't bother with this for the nbtree AM, because it
+ * guarantees that one session will make progress during concurrent
+ * insertion of duplicate values (the unique index enforcement
+ * mechanism ensures this).
+ */
+ if (index->rd_rel->relam != BTREE_AM_OID &&
+ TransactionIdPrecedes(xwait, GetCurrentTransactionId()))
+ break; /* go and super delete/restart speculative insertion */
+ }
+
if (TransactionIdIsValid(xwait))
{
- ctid_wait = tup->t_data->t_ctid;
+ ctid_wait = tup->t_data->t_tidstate.t_ctid;
index_endscan(index_scan);
- XactLockTableWait(xwait, heap, &ctid_wait,
- XLTW_RecheckExclusionConstr);
+ if (DirtySnapshot.speculativeToken)
+ SpeculativeInsertionWait(DirtySnapshot.xmin,
+ DirtySnapshot.speculativeToken);
+ else
+ XactLockTableWait(xwait, heap, &ctid_wait,
+ XLTW_RecheckExclusionConstr);
goto retry;
}
/*
- * We have a definite conflict. Report it.
+ * We have a definite conflict. Return it to caller, or report it.
*/
+ if (violationOK)
+ {
+ conflict = true;
+ if (conflictTid)
+ *conflictTid = tup->t_self;
+ break;
+ }
+
error_new = BuildIndexValueDescription(index, values, isnull);
error_existing = BuildIndexValueDescription(index, existing_values,
existing_isnull);
* However, it is possible to define exclusion constraints for which that
* wouldn't be true --- for instance, if the operator is <>. So we no
* longer complain if found_self is still false.
+ *
+ * It would also not be true in the pre-check mode, when we haven't
+ * inserted a tuple yet.
*/
econtext->ecxt_scantuple = save_scantuple;
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/rel.h"
return ExecProject(projectReturning, NULL);
}
+/*
+ * ExecCheckHeapTupleVisible -- verify heap tuple is visible
+ *
+ * It would not be consistent with guarantees of the higher isolation levels to
+ * proceed with avoiding insertion (taking speculative insertion's alternative
+ * path) on the basis of another tuple that is not visible. Check for the need
+ * to raise a serialization failure, and do so as necessary.
+ */
+static void
+ExecCheckHeapTupleVisible(EState *estate,
+ ResultRelInfo *relinfo,
+ ItemPointer tid)
+{
+ Relation rel = relinfo->ri_RelationDesc;
+ Buffer buffer;
+ HeapTupleData tuple;
+
+ if (!IsolationUsesXactSnapshot())
+ return;
+
+ tuple.t_self = *tid;
+ if (!heap_fetch(rel, estate->es_snapshot, &tuple, &buffer, false, NULL))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("could not serialize access due to concurrent insert or update directing alternative ON CONFLICT path")));
+
+ ReleaseBuffer(buffer);
+}
+
/* ----------------------------------------------------------------
* ExecInsert
*
static TupleTableSlot *
ExecInsert(TupleTableSlot *slot,
TupleTableSlot *planSlot,
+ List *arbiterIndexes,
+ SpecCmd spec,
EState *estate,
bool canSetTag)
{
if (resultRelationDesc->rd_rel->relhasoids)
HeapTupleSetOid(tuple, InvalidOid);
- /* BEFORE ROW INSERT Triggers */
+ /*
+ * BEFORE ROW INSERT Triggers.
+ *
+ * We don't suppress the effects (or, perhaps, side-effects) of BEFORE ROW
+ * INSERT triggers when performing speculative insertion. We cannot
+ * proceed with even considering violations until these triggers fire on
+ * the one hand, but on the other hand they have the ability to execute
+ * arbitrary user-defined code which may perform operations entirely
+ * outside the system's ability to nullify.
+ */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_insert_before_row)
{
}
else
{
+ uint32 specToken = 0;
+
/*
* Constraints might reference the tableoid column, so initialize
* t_tableOid before evaluating them.
if (resultRelationDesc->rd_att->constr)
ExecConstraints(resultRelInfo, slot, estate);
+ /*
+ * If we are performing speculative insertion, do a non-conclusive
+ * check for conflicts.
+ *
+ * See the executor README for a full discussion of speculative
+ * insertion.
+ */
+ if (spec != SPEC_NONE && resultRelInfo->ri_NumIndices > 0)
+ {
+ ItemPointerData conflictTid;
+vlock:
+ /*
+ * Check if it's required to proceed with the second phase
+ * ("insertion proper") of speculative insertion in respect of the
+ * slot. The check may involve a wait for another session's EOX.
+ */
+ if (!ExecCheckIndexConstraints(slot, estate, &conflictTid,
+ arbiterIndexes))
+ {
+ /*
+ * For the SPEC_IGNORE case, it's necessary to verify that the
+ * tuple is visible to the executor's MVCC snapshot at higher
+ * isolation levels
+ */
+ if (spec == SPEC_IGNORE)
+ ExecCheckHeapTupleVisible(estate, resultRelInfo, &conflictTid);
+
+ /*
+ * The IGNORE path projects no tuples
+ */
+ return NULL;
+ }
+
+ /*
+ * Before we start insertion proper, acquire our "promise tuple
+ * insertion lock". Others can use that (rather than an XID lock,
+ * which is appropriate only for non-promise tuples) to wait for us
+ * to decide if we're going to go ahead with the insertion.
+ */
+ specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
+ SpeculativeTokenSet(tuple->t_data->t_tidstate, specToken);
+ }
+
/*
* insert the tuple
*
* the t_self field.
*/
newId = heap_insert(resultRelationDesc, tuple,
- estate->es_output_cid, 0, NULL);
+ estate->es_output_cid,
+ specToken == 0 ? 0 : HEAP_INSERT_SPECULATIVE,
+ NULL);
/*
* insert index entries for tuple
*/
if (resultRelInfo->ri_NumIndices > 0)
recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
- estate);
+ estate, specToken != 0,
+ arbiterIndexes);
+
+ if (specToken != 0)
+ {
+ bool conflict = recheckIndexes != NIL;
+
+ /*
+ * Speculative insertion does not support deferred constraints,
+ * so the specific index involved in the violation is
+ * immaterial
+ */
+ if (recheckIndexes)
+ list_free(recheckIndexes);
+
+ /*
+ * Consider possible race: concurrent insertion conflicts with
+ * our speculative heap tuple (note that this type of
+ * speculative conflict never involves waiting until EOX).
+ *
+ * Update tuple in-place to indicate if there was (or was not) a
+ * concurrent insertion conflict.
+ */
+ heap_finish_speculative(resultRelationDesc, tuple, conflict);
+
+ /*
+ * Now that heap tuple was marked, waiters woken up by this lmgr
+ * lock release cannot livelock due to observing the same token on
+ * our speculatively inserted tuple repeatedly. They also cannot
+ * deadlock by allowing this session to wait on their XID (or other
+ * heavyweight lock) while this session waits on theirs.
+ */
+ SpeculativeInsertionLockRelease(GetCurrentTransactionId());
+
+ if (conflict)
+ goto vlock;
+
+ /* Since there was no insertion conflict, we're done */
+ }
}
if (canSetTag)
estate->es_output_cid,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
- &hufd);
+ &hufd,
+ false);
switch (result)
{
case HeapTupleSelfUpdated:
*/
if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple))
recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
- estate);
+ estate, false, NIL);
}
if (canSetTag)
switch (operation)
{
case CMD_INSERT:
- slot = ExecInsert(slot, planSlot, estate, node->canSetTag);
+ slot = ExecInsert(slot, planSlot, node->arbiterIndexes,
+ node->spec, estate, node->canSetTag);
break;
case CMD_UPDATE:
slot = ExecUpdate(tupleid, oldtuple, slot, planSlot,
mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;
mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans);
mtstate->mt_nplans = nplans;
+ mtstate->spec = node->spec;
+ mtstate->arbiterIndexes = node->arbiterIndexes;
/* set up epqstate with dummy subplan data for the moment */
EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);
if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&
operation != CMD_DELETE &&
resultRelInfo->ri_IndexRelationDescs == NULL)
- ExecOpenIndices(resultRelInfo);
+ ExecOpenIndices(resultRelInfo, mtstate->spec != SPEC_NONE);
/* Now init the plan for this result rel */
estate->es_result_relation_info = resultRelInfo;
* copy the identification info of the old tuple: t_ctid, t_self, and
* OID (if any)
*/
- mtuple->t_data->t_ctid = tuple->t_data->t_ctid;
+ mtuple->t_data->t_tidstate.t_ctid = tuple->t_data->t_tidstate.t_ctid;
mtuple->t_self = tuple->t_self;
mtuple->t_tableOid = tuple->t_tableOid;
if (rel->rd_att->tdhasoid)
COPY_NODE_FIELD(resultRelations);
COPY_SCALAR_FIELD(resultRelIndex);
COPY_NODE_FIELD(plans);
+ COPY_SCALAR_FIELD(spec);
+ COPY_NODE_FIELD(arbiterIndexes);
COPY_NODE_FIELD(withCheckOptionLists);
COPY_NODE_FIELD(returningLists);
COPY_NODE_FIELD(fdwPrivLists);
return newnode;
}
+/*
+ * _copyInferenceElem
+ */
+static InferenceElem *
+_copyInferenceElem(const InferenceElem *from)
+{
+ InferenceElem *newnode = makeNode(InferenceElem);
+
+ COPY_NODE_FIELD(expr);
+ COPY_SCALAR_FIELD(infercollid);
+ COPY_SCALAR_FIELD(inferopfamily);
+ COPY_SCALAR_FIELD(inferopcinputtype);
+
+ return newnode;
+}
+
/*
* _copyTargetEntry
*/
return newnode;
}
+static InferClause *
+_copyInferClause(const InferClause *from)
+{
+ InferClause *newnode = makeNode(InferClause);
+
+ COPY_NODE_FIELD(indexElems);
+ COPY_NODE_FIELD(whereClause);
+ COPY_LOCATION_FIELD(location);
+
+ return newnode;
+}
+
+static ConflictClause *
+_copyConflictClause(const ConflictClause *from)
+{
+ ConflictClause *newnode = makeNode(ConflictClause);
+
+ COPY_SCALAR_FIELD(specclause);
+ COPY_NODE_FIELD(infer);
+ COPY_LOCATION_FIELD(location);
+
+ return newnode;
+}
+
static CommonTableExpr *
_copyCommonTableExpr(const CommonTableExpr *from)
{
COPY_NODE_FIELD(jointree);
COPY_NODE_FIELD(targetList);
COPY_NODE_FIELD(withCheckOptions);
+ COPY_SCALAR_FIELD(specClause);
+ COPY_NODE_FIELD(arbiterElems);
+ COPY_NODE_FIELD(arbiterWhere);
COPY_NODE_FIELD(returningList);
COPY_NODE_FIELD(groupClause);
COPY_NODE_FIELD(havingQual);
COPY_NODE_FIELD(relation);
COPY_NODE_FIELD(cols);
COPY_NODE_FIELD(selectStmt);
+ COPY_NODE_FIELD(confClause);
COPY_NODE_FIELD(returningList);
COPY_NODE_FIELD(withClause);
case T_CurrentOfExpr:
retval = _copyCurrentOfExpr(from);
break;
+ case T_InferenceElem:
+ retval = _copyInferenceElem(from);
+ break;
case T_TargetEntry:
retval = _copyTargetEntry(from);
break;
case T_WithClause:
retval = _copyWithClause(from);
break;
+ case T_InferClause:
+ retval = _copyInferClause(from);
+ break;
+ case T_ConflictClause:
+ retval = _copyConflictClause(from);
+ break;
case T_CommonTableExpr:
retval = _copyCommonTableExpr(from);
break;
return true;
}
+static bool
+_equalInferenceElem(const InferenceElem *a, const InferenceElem *b)
+{
+ COMPARE_NODE_FIELD(expr);
+ COMPARE_SCALAR_FIELD(infercollid);
+ COMPARE_SCALAR_FIELD(inferopfamily);
+ COMPARE_SCALAR_FIELD(inferopcinputtype);
+
+ return true;
+}
+
static bool
_equalTargetEntry(const TargetEntry *a, const TargetEntry *b)
{
COMPARE_NODE_FIELD(jointree);
COMPARE_NODE_FIELD(targetList);
COMPARE_NODE_FIELD(withCheckOptions);
+ COMPARE_SCALAR_FIELD(specClause);
+ COMPARE_NODE_FIELD(arbiterElems);
+ COMPARE_NODE_FIELD(arbiterWhere);
COMPARE_NODE_FIELD(returningList);
COMPARE_NODE_FIELD(groupClause);
COMPARE_NODE_FIELD(havingQual);
COMPARE_NODE_FIELD(relation);
COMPARE_NODE_FIELD(cols);
COMPARE_NODE_FIELD(selectStmt);
+ COMPARE_NODE_FIELD(confClause);
COMPARE_NODE_FIELD(returningList);
COMPARE_NODE_FIELD(withClause);
return true;
}
+static bool
+_equalInferClause(const InferClause *a, const InferClause *b)
+{
+ COMPARE_NODE_FIELD(indexElems);
+ COMPARE_NODE_FIELD(whereClause);
+ COMPARE_LOCATION_FIELD(location);
+
+ return true;
+}
+
+static bool
+_equalConflictClause(const ConflictClause *a, const ConflictClause *b)
+{
+ COMPARE_SCALAR_FIELD(specclause);
+ COMPARE_NODE_FIELD(infer);
+ COMPARE_LOCATION_FIELD(location);
+
+ return true;
+}
+
static bool
_equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b)
{
case T_CurrentOfExpr:
retval = _equalCurrentOfExpr(a, b);
break;
+ case T_InferenceElem:
+ retval = _equalInferenceElem(a, b);
+ break;
case T_TargetEntry:
retval = _equalTargetEntry(a, b);
break;
case T_WithClause:
retval = _equalWithClause(a, b);
break;
+ case T_InferClause:
+ retval = _equalInferClause(a, b);
+ break;
+ case T_ConflictClause:
+ retval = _equalConflictClause(a, b);
+ break;
case T_CommonTableExpr:
retval = _equalCommonTableExpr(a, b);
break;
case T_CurrentOfExpr:
type = BOOLOID;
break;
+ case T_InferenceElem:
+ {
+ const InferenceElem *n = (const InferenceElem *) expr;
+
+ type = exprType((Node *) n->expr);
+ }
+ break;
case T_PlaceHolderVar:
type = exprType((Node *) ((const PlaceHolderVar *) expr)->phexpr);
break;
case T_CurrentOfExpr:
coll = InvalidOid; /* result is always boolean */
break;
+ case T_InferenceElem:
+ coll = exprCollation((Node *) ((const InferenceElem *) expr)->expr);
+ break;
case T_PlaceHolderVar:
coll = exprCollation((Node *) ((const PlaceHolderVar *) expr)->phexpr);
break;
case T_WithClause:
loc = ((const WithClause *) expr)->location;
break;
+ case T_InferClause:
+ loc = ((const InferClause *) expr)->location;
+ break;
+ case T_ConflictClause:
+ loc = ((const ConflictClause *) expr)->location;
+ break;
case T_CommonTableExpr:
loc = ((const CommonTableExpr *) expr)->location;
break;
/* just use argument's location */
loc = exprLocation((Node *) ((const PlaceHolderVar *) expr)->phexpr);
break;
+ case T_InferenceElem:
+ /* just use nested expr's location */
+ loc = exprLocation((Node *) ((const InferenceElem *) expr)->expr);
+ break;
default:
/* for any other node type it's just unknown... */
loc = -1;
break;
case T_PlaceHolderVar:
return walker(((PlaceHolderVar *) node)->phexpr, context);
+ case T_InferenceElem:
+ return walker(((InferenceElem *) node)->expr, context);
case T_AppendRelInfo:
{
AppendRelInfo *appinfo = (AppendRelInfo *) node;
return true;
if (walker((Node *) query->withCheckOptions, context))
return true;
+ if (walker((Node *) query->arbiterElems, context))
+ return true;
+ if (walker(query->arbiterWhere, context))
+ return true;
if (walker((Node *) query->returningList, context))
return true;
if (walker((Node *) query->jointree, context))
return (Node *) newnode;
}
break;
+ case T_InferenceElem:
+ {
+ InferenceElem *inferenceelemdexpr = (InferenceElem *) node;
+ InferenceElem *newnode;
+
+ FLATCOPY(newnode, inferenceelemdexpr, InferenceElem);
+ MUTATE(newnode->expr, newnode->expr, Node *);
+ return (Node *) newnode;
+ }
+ break;
case T_AppendRelInfo:
{
AppendRelInfo *appinfo = (AppendRelInfo *) node;
MUTATE(query->targetList, query->targetList, List *);
MUTATE(query->withCheckOptions, query->withCheckOptions, List *);
+ MUTATE(query->arbiterElems, query->arbiterElems, List *);
+ MUTATE(query->arbiterWhere, query->arbiterWhere, Node *);
MUTATE(query->returningList, query->returningList, List *);
MUTATE(query->jointree, query->jointree, FromExpr *);
MUTATE(query->setOperations, query->setOperations, Node *);
return true;
if (walker(stmt->selectStmt, context))
return true;
+ if (walker(stmt->confClause, context))
+ return true;
if (walker(stmt->returningList, context))
return true;
if (walker(stmt->withClause, context))
break;
case T_WithClause:
return walker(((WithClause *) node)->ctes, context);
+ case T_InferClause:
+ {
+ InferClause *stmt = (InferClause *) node;
+
+ if (walker(stmt->indexElems, context))
+ return true;
+ if (walker(stmt->whereClause, context))
+ return true;
+ }
+ break;
+ case T_ConflictClause:
+ {
+ ConflictClause *stmt = (ConflictClause *) node;
+
+ if (walker(stmt->infer, context))
+ return true;
+ }
+ break;
case T_CommonTableExpr:
return walker(((CommonTableExpr *) node)->ctequery, context);
default:
WRITE_NODE_FIELD(resultRelations);
WRITE_INT_FIELD(resultRelIndex);
WRITE_NODE_FIELD(plans);
+ WRITE_ENUM_FIELD(spec, SpecType);
+ WRITE_NODE_FIELD(arbiterIndexes);
WRITE_NODE_FIELD(withCheckOptionLists);
WRITE_NODE_FIELD(returningLists);
WRITE_NODE_FIELD(fdwPrivLists);
WRITE_INT_FIELD(cursor_param);
}
+static void
+_outInferenceElem(StringInfo str, const InferenceElem *node)
+{
+ WRITE_NODE_TYPE("INFERENCEELEM");
+
+ WRITE_NODE_FIELD(expr);
+ WRITE_OID_FIELD(infercollid);
+ WRITE_OID_FIELD(inferopfamily);
+ WRITE_OID_FIELD(inferopcinputtype);
+}
+
static void
_outTargetEntry(StringInfo str, const TargetEntry *node)
{
WRITE_NODE_FIELD(jointree);
WRITE_NODE_FIELD(targetList);
WRITE_NODE_FIELD(withCheckOptions);
+ WRITE_ENUM_FIELD(specClause, SpecType);
+ WRITE_NODE_FIELD(arbiterElems);
+ WRITE_NODE_FIELD(arbiterWhere);
WRITE_NODE_FIELD(returningList);
WRITE_NODE_FIELD(groupClause);
WRITE_NODE_FIELD(havingQual);
case T_CurrentOfExpr:
_outCurrentOfExpr(str, obj);
break;
+ case T_InferenceElem:
+ _outInferenceElem(str, obj);
+ break;
case T_TargetEntry:
_outTargetEntry(str, obj);
break;
READ_NODE_FIELD(jointree);
READ_NODE_FIELD(targetList);
READ_NODE_FIELD(withCheckOptions);
+ READ_ENUM_FIELD(specClause, SpecCmd);
+ READ_NODE_FIELD(arbiterElems);
+ READ_NODE_FIELD(arbiterWhere);
READ_NODE_FIELD(returningList);
READ_NODE_FIELD(groupClause);
READ_NODE_FIELD(havingQual);
READ_DONE();
}
+/*
+ * _readInferenceElem
+ */
+static InferenceElem *
+_readInferenceElem(void)
+{
+ READ_LOCALS(InferenceElem);
+
+ READ_NODE_FIELD(expr);
+ READ_OID_FIELD(infercollid);
+ READ_OID_FIELD(inferopfamily);
+ READ_OID_FIELD(inferopcinputtype);
+
+ READ_DONE();
+}
+
/*
* _readTargetEntry
*/
return_value = _readSetToDefault();
else if (MATCH("CURRENTOFEXPR", 13))
return_value = _readCurrentOfExpr();
+ else if (MATCH("INFERENCEELEM", 13))
+ return_value = _readInferenceElem();
else if (MATCH("TARGETENTRY", 11))
return_value = _readTargetEntry();
else if (MATCH("RANGETBLREF", 11))
Index nominalRelation,
List *resultRelations, List *subplans,
List *withCheckOptionLists, List *returningLists,
- List *rowMarks, int epqParam)
+ List *rowMarks, SpecCmd spec, int epqParam)
{
ModifyTable *node = makeNode(ModifyTable);
Plan *plan = &node->plan;
node->resultRelations = resultRelations;
node->resultRelIndex = -1; /* will be set correctly in setrefs.c */
node->plans = subplans;
+ node->spec = spec;
+ node->arbiterIndexes = NIL;
node->withCheckOptionLists = withCheckOptionLists;
node->returningLists = returningLists;
node->rowMarks = rowMarks;
}
node->fdwPrivLists = fdw_private_list;
+ /*
+ * If a set of unique index inference elements was provided (an INSERT...ON
+ * CONFLICT "inference specification"), then infer appropriate unique
+ * indexes (or throw an error if none are available).
+ */
+ if (root->parse->arbiterElems)
+ node->arbiterIndexes = infer_arbiter_indexes(root);
+
return node;
}
withCheckOptionLists,
returningLists,
rowMarks,
+ parse->specClause,
SS_assign_special_param(root));
}
}
withCheckOptionLists,
returningLists,
rowMarks,
+ parse->specClause,
SS_assign_special_param(root));
}
get_relation_info_hook_type get_relation_info_hook = NULL;
+static bool infer_collation_opclass_match(InferenceElem *elem, Relation idxRel,
+ Bitmapset *inferAttrs, List *idxExprs);
static int32 get_rel_data_width(Relation rel, int32 *attr_widths);
static List *get_relation_constraints(PlannerInfo *root,
Oid relationObjectId, RelOptInfo *rel,
(*get_relation_info_hook) (root, relationObjectId, inhparent, rel);
}
+/*
+ * infer_arbiter_indexes -
+ * Retrieves unique indexes to arbitrate speculative insertion.
+ *
+ * Uses user-supplied inference clause expressions and predicate to match a
+ * unique index from those defined and ready on the heap relation (target). An
+ * exact match is required on columns/expressions (although they can appear in
+ * any order). However, the predicate given by the user need only restrict
+ * insertion to a subset of some part of the table covered by some particular
+ * unique index (in particular, a partial unique index) in order to be
+ * inferred.
+ *
+ * The implementation does not consider which B-Tree operator class any
+ * particular available unique index attribute use, unless one appeared in the
+ * user-supplied inference specification (the same is true of collations). In
+ * particular, there is no system dependency on the default operator class for
+ * the purposes of inference. If no opclass (or collation) is specified, then
+ * all matching indexes (that may or may not match the default in terms of each
+ * attribute opclass/collation) are used for inference.
+ *
+ * This logic somewhat mirrors get_relation_info(). This process is not
+ * deferred to a get_relation_info() call while planning because there may not
+ * be any such call.
+ */
+List *
+infer_arbiter_indexes(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+
+ /* Iteration state */
+ Relation relation;
+ Oid relationObjectId;
+ List *indexList;
+ ListCell *l;
+
+ /* Normalized inference attributes and inference expressions: */
+ Bitmapset *inferAttrs = NULL;
+ List *inferElems = NIL;
+
+ /* Result */
+ List *candidates = NIL;
+
+ Assert(parse->specClause == SPEC_IGNORE);
+
+ /*
+ * We need not lock the relation since it was already locked, either by
+ * the rewriter or when expand_inherited_rtentry() added it to the query's
+ * rangetable.
+ */
+ relationObjectId = rt_fetch(parse->resultRelation, parse->rtable)->relid;
+
+ relation = heap_open(relationObjectId, NoLock);
+
+ /*
+ * Build normalized/BMS representation of plain indexed attributes, as well
+ * as direct list of inference elements. This is required for matching the
+ * cataloged definition of indexes.
+ */
+ foreach(l, parse->arbiterElems)
+ {
+ InferenceElem *elem;
+ Var *var;
+ int attno;
+
+ elem = (InferenceElem *) lfirst(l);
+
+ /*
+ * Parse analysis of inference elements performs full parse analysis
+ * of Vars, even for non-expression indexes (in contrast with utility
+ * command related use of IndexElem). However, indexes are cataloged
+ * with simple attribute numbers for non-expression indexes. Those are
+ * handled later.
+ */
+ if (!IsA(elem->expr, Var))
+ {
+ inferElems = lappend(inferElems, elem->expr);
+ continue;
+ }
+
+ var = (Var *) elem->expr;
+ attno = var->varattno;
+
+ if (attno < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("system columns may not appear in unique index inference specification")));
+ else if (attno == 0)
+ elog(ERROR, "whole row unique index inference specifications are not valid");
+
+ inferAttrs = bms_add_member(inferAttrs, attno);
+ }
+
+ indexList = RelationGetIndexList(relation);
+
+ /*
+ * Using that representation, iterate through the list of indexes on the
+ * target relation to try and find a match
+ */
+ foreach(l, indexList)
+ {
+ Oid indexoid = lfirst_oid(l);
+ Relation idxRel;
+ Form_pg_index idxForm;
+ Bitmapset *indexedAttrs = NULL;
+ List *idxExprs;
+ List *predExprs;
+ List *whereExplicit;
+ AttrNumber natt;
+ ListCell *el;
+
+ /*
+ * Extract info from the relation descriptor for the index. We know
+ * that this is a target, so get lock type it is known will ultimately
+ * be required by the executor.
+ *
+ * Let executor complain about !indimmediate case directly.
+ */
+ idxRel = index_open(indexoid, RowExclusiveLock);
+ idxForm = idxRel->rd_index;
+
+ if (!idxForm->indisunique ||
+ !IndexIsValid(idxForm))
+ goto next;
+
+ /*
+ * If the index is valid, but cannot yet be used, ignore it. See
+ * src/backend/access/heap/README.HOT for discussion.
+ */
+ if (idxForm->indcheckxmin &&
+ !TransactionIdPrecedes(HeapTupleHeaderGetXmin(idxRel->rd_indextuple->t_data),
+ TransactionXmin))
+ goto next;
+
+ /* Build BMS representation of cataloged index attributes */
+ for (natt = 0; natt < idxForm->indnatts; natt++)
+ {
+ int attno = idxRel->rd_index->indkey.values[natt];
+
+ if (attno < 0)
+ elog(ERROR, "system column in index");
+
+ if (attno != 0)
+ indexedAttrs = bms_add_member(indexedAttrs, attno);
+ }
+
+ /* Non-expression attributes (if any) must match */
+ if (!bms_equal(indexedAttrs, inferAttrs))
+ goto next;
+
+ /* Expression attributes (if any) must match */
+ idxExprs = RelationGetIndexExpressions(idxRel);
+ foreach(el, parse->arbiterElems)
+ {
+ InferenceElem *elem = (InferenceElem *) lfirst(el);
+
+ /*
+ * Ensure that collation/opclass aspects of inference expression
+ * element match. Even though this loop is primarily concerned
+ * with matching expressions, it is a convenient point to check
+ * this for both expressions and ordinary (non-expression)
+ * attributes appearing as inference elements.
+ */
+ if (!infer_collation_opclass_match(elem, idxRel, inferAttrs,
+ idxExprs))
+ goto next;
+
+ /*
+ * Plain Vars don't factor into count of expression elements, and
+ * the question of whether or not they satisfy the index definition
+ * has already been considered (they must)
+ */
+ if (IsA(elem->expr, Var))
+ continue;
+
+ /*
+ * Might as well avoid redundant check in the rare cases where
+ * infer_collation_opclass_match() is required to do real work.
+ * Otherwise, check that element expression appears in cataloged
+ * index definition.
+ */
+ if (elem->infercollid != InvalidOid ||
+ elem->inferopfamily != InvalidOid ||
+ list_member(idxExprs, elem->expr))
+ continue;
+
+ goto next;
+ }
+
+ /*
+ * Now that all inference elements were matched, ensure that the
+ * expression elements from inference clause are not missing any
+ * cataloged expressions. This does the right thing when unique
+ * indexes redundantly repeat the same attribute, or if attributes
+ * redundantly appear multiple times within an inference clause.
+ */
+ if (list_difference(idxExprs, inferElems) != NIL)
+ goto next;
+
+ /*
+ * Any user-supplied ON CONFLICT unique index inference WHERE clause
+ * need only be implied by the cataloged index definitions predicate
+ */
+ predExprs = RelationGetIndexPredicate(idxRel);
+ whereExplicit = make_ands_implicit((Expr *) parse->arbiterWhere);
+
+ if (!predicate_implied_by(predExprs, whereExplicit))
+ goto next;
+
+ candidates = lappend_oid(candidates, idxForm->indexrelid);
+next:
+ index_close(idxRel, NoLock);
+ }
+
+ list_free(indexList);
+ heap_close(relation, NoLock);
+
+ if (candidates == NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("could not infer which unique index to use from expressions/columns and predicate provided for ON CONFLICT")));
+
+ return candidates;
+}
+
+/*
+ * infer_collation_opclass_match - ensure infer element opclass/collation match
+ *
+ * Given unique index inference element from inference specification, if
+ * collation was specified, or if opclass (represented here as opfamily +
+ * opcintype) was specified, verify that there is at least one matching indexed
+ * attribute (occasionally, there may be more). Skip this in the common case
+ * where inference specification does not include collation or opclass (instead
+ * matching everything, regardless of cataloged collation/opclass of indexed
+ * attribute).
+ *
+ * At least historically, Postgres has not offered collations or opclasses with
+ * alternative-to-default notions of equality, so these additional criteria
+ * should only actually be matched on infrequently.
+ *
+ * Don't give up immediately when an inference element matches some attribute
+ * cataloged as indexed but not matching additional opclass/collation criteria.
+ * This is done so that the implementation is as forgiving as possible of
+ * redundancy within cataloged index attributes (or, less usefully, within
+ * inference specification elements). If collations actually differ between
+ * apparently redundantly indexed attributes (redundant within or across
+ * indexes), then there really is no redundancy as such.
+ *
+ * Note that if an inference element specifies an opclass and a collation at
+ * once, both must match in at least one particular attribute within index
+ * catalog definition in order for that inference element to be considered
+ * inferred/satisfied.
+ */
+static bool
+infer_collation_opclass_match(InferenceElem *elem, Relation idxRel,
+ Bitmapset *inferAttrs, List *idxExprs)
+{
+ AttrNumber natt;
+
+ /*
+ * If inference specification element lacks collation/opclass, then no need
+ * to check for exact match
+ */
+ if (elem->infercollid == InvalidOid && elem->inferopfamily == InvalidOid)
+ return true;
+
+ for (natt = 1; natt <= idxRel->rd_att->natts; natt++)
+ {
+ Oid opfamily = idxRel->rd_opfamily[natt - 1];
+ Oid opcinputtype = idxRel->rd_opcintype[natt - 1];
+ Oid collation = idxRel->rd_indcollation[natt - 1];
+
+ if (elem->inferopfamily != InvalidOid &&
+ (elem->inferopfamily != opfamily ||
+ elem->inferopcinputtype != opcinputtype))
+ {
+ /* Attribute needed to match opclass, but didn't */
+ continue;
+ }
+
+ if (elem->infercollid != InvalidOid &&
+ elem->infercollid != collation)
+ {
+ /* Attribute needed to match collation, but didn't */
+ continue;
+ }
+
+ if ((IsA(elem->expr, Var) &&
+ bms_is_member(((Var *) elem->expr)->varattno, inferAttrs)) ||
+ list_member(idxExprs, elem->expr))
+ {
+ /* Found one match - good enough */
+ return true;
+ }
+ }
+
+ return false;
+}
+
/*
* estimate_rel_size - estimate # pages and # tuples in a table or index
*
/* done building the range table and jointree */
qry->rtable = pstate->p_rtable;
qry->jointree = makeFromExpr(pstate->p_joinlist, qual);
+ qry->specClause = SPEC_NONE;
qry->hasSubLinks = pstate->p_hasSubLinks;
qry->hasWindowFuncs = pstate->p_hasWindowFuncs;
{
Query *qry = makeNode(Query);
SelectStmt *selectStmt = (SelectStmt *) stmt->selectStmt;
+ SpecCmd spec = stmt->confClause ? stmt->confClause->specclause : SPEC_NONE;
List *exprList = NIL;
bool isGeneralSelect;
List *sub_rtable;
}
/*
- * If we have a RETURNING clause, we need to add the target relation to
- * the query namespace before processing it, so that Var references in
- * RETURNING will work. Also, remove any namespace entries added in a
- * sub-SELECT or VALUES list.
+ * If we have a RETURNING clause, or there are inference elements used as
+ * for ON CONFLICT, we need to add the target relation to the query
+ * namespace before processing it, so that Var references in RETURNING
+ * and/or the inference specification will work. Also, remove any
+ * namespace entries added in a sub-SELECT or VALUES list.
*/
- if (stmt->returningList)
+ if (stmt->returningList || stmt->confClause)
{
pstate->p_namespace = NIL;
addRTEtoQuery(pstate, pstate->p_target_rangetblentry,
qry->rtable = pstate->p_rtable;
qry->jointree = makeFromExpr(pstate->p_joinlist, NULL);
+ qry->specClause = spec;
qry->hasSubLinks = pstate->p_hasSubLinks;
+ if (stmt->confClause)
+ {
+ /*
+ * Perform parse analysis of arbiter columns/expressions. These are
+ * later used to infer a unique index which arbitrates whether or not
+ * to take the alternative ON CONFLICT path (i.e. whether or not to
+ * INSERT or take the alternative path in respect of each slot proposed
+ * for insertion).
+ */
+ transformConflictClause(pstate, stmt->confClause, &qry->arbiterElems,
+ &qry->arbiterWhere);
+ }
+
assign_query_collations(pstate, qry);
return qry;
qry->rtable = pstate->p_rtable;
qry->jointree = makeFromExpr(pstate->p_joinlist, qual);
+ qry->specClause = SPEC_NONE;
qry->hasSubLinks = pstate->p_hasSubLinks;
qry->hasWindowFuncs = pstate->p_hasWindowFuncs;
RangeVar *range;
IntoClause *into;
WithClause *with;
+ InferClause *infer;
+ ConflictClause *conf;
A_Indices *aind;
ResTarget *target;
struct PrivTarget *privtarget;
%type <defelt> SeqOptElem
%type <istmt> insert_rest
+%type <infer> opt_conf_expr
+%type <conf> opt_on_conflict
%type <vsetstmt> generic_set set_rest set_rest_more generic_reset reset_rest
SetResetClause FunctionSetResetClause
CACHE CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
CLUSTER COALESCE COLLATE COLLATION COLUMN COMMENT COMMENTS COMMIT
- COMMITTED CONCURRENTLY CONFIGURATION CONNECTION CONSTRAINT CONSTRAINTS
- CONTENT_P CONTINUE_P CONVERSION_P COPY COST CREATE
+ COMMITTED CONCURRENTLY CONFIGURATION CONFLICT CONNECTION CONSTRAINT
+ CONSTRAINTS CONTENT_P CONTINUE_P CONVERSION_P COPY COST CREATE
CROSS CSV CURRENT_P
CURRENT_CATALOG CURRENT_DATE CURRENT_ROLE CURRENT_SCHEMA
CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE
HANDLER HAVING HEADER_P HOLD HOUR_P
- IDENTITY_P IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P
+ IDENTITY_P IF_P IGNORE_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P
INCLUDING INCREMENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P
INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER
INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION
%nonassoc IS ISNULL NOTNULL /* IS sets precedence for IS NULL, etc */
%nonassoc '<' '>' '=' LESS_EQUALS GREATER_EQUALS NOT_EQUALS
%nonassoc BETWEEN IN_P LIKE ILIKE SIMILAR NOT_LA
+%nonassoc DISTINCT
+%nonassoc ON
%nonassoc ESCAPE /* ESCAPE must be just above LIKE/ILIKE/SIMILAR */
%nonassoc OVERLAPS
%left POSTFIXOP /* dummy for postfix Op rules */
*****************************************************************************/
InsertStmt:
- opt_with_clause INSERT INTO qualified_name insert_rest returning_clause
+ opt_with_clause INSERT INTO qualified_name insert_rest
+ opt_on_conflict returning_clause
{
$5->relation = $4;
- $5->returningList = $6;
+ $5->confClause = $6;
+ $5->returningList = $7;
$5->withClause = $1;
$$ = (Node *) $5;
}
}
;
+opt_on_conflict:
+ ON CONFLICT opt_conf_expr IGNORE_P
+ {
+ $$ = makeNode(ConflictClause);
+ $$->specclause = SPEC_IGNORE;
+ $$->infer = $3;
+ $$->location = @1;
+ }
+ | /*EMPTY*/
+ {
+ $$ = NULL;
+ }
+ ;
+
+opt_conf_expr:
+ '(' index_params where_clause ')'
+ {
+ $$ = makeNode(InferClause);
+ $$->indexElems = $2;
+ $$->whereClause = $3;
+ $$->location = @1;
+ }
+ | /*EMPTY*/
+ {
+ $$ = NULL;
+ }
+ ;
+
returning_clause:
RETURNING target_list { $$ = $2; }
| /* EMPTY */ { $$ = NIL; }
| COMMIT
| COMMITTED
| CONFIGURATION
+ | CONFLICT
| CONNECTION
| CONSTRAINTS
| CONTENT_P
| HOUR_P
| IDENTITY_P
| IF_P
+ | IGNORE_P
| IMMEDIATE
| IMMUTABLE
| IMPLICIT_P
#include "postgres.h"
#include "access/heapam.h"
+#include "catalog/catalog.h"
#include "catalog/heap.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
#include "parser/parse_oper.h"
#include "parser/parse_relation.h"
#include "parser/parse_target.h"
+#include "parser/parse_type.h"
#include "rewrite/rewriteManip.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
List **tlist, ParseExprKind exprKind);
static int get_matching_location(int sortgroupref,
List *sortgrouprefs, List *exprs);
+static List *resolve_unique_index_expr(ParseState *pstate, InferClause * infer,
+ Relation heapRel);
static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
List *grouplist, List *targetlist, int location,
bool resolveUnknown);
return -1; /* keep compiler quiet */
}
+/*
+ * resolve_unique_index_expr
+ * Infer a unique index from a list of indexElems, for ON
+ * CONFLICT clause
+ *
+ * Perform parse analysis of expressions and columns appearing within ON
+ * CONFLICT clause. During planning, the returned list of expressions is used
+ * to infer which unique index to use.
+ */
+static List *
+resolve_unique_index_expr(ParseState *pstate, InferClause *infer,
+ Relation heapRel)
+{
+ List *result = NIL;
+ ListCell *l;
+
+ foreach(l, infer->indexElems)
+ {
+ IndexElem *ielem = (IndexElem *) lfirst(l);
+ InferenceElem *pInfer = makeNode(InferenceElem);
+ Node *parse;
+
+ /*
+ * Raw grammar re-uses CREATE INDEX infrastructure for unique index
+ * inference clause, and so will accept opclasses by name and so on.
+ *
+ * Make no attempt to match ASC or DESC ordering or NULLS FIRST/NULLS
+ * LAST ordering, since those are not significant for inference
+ * purposes (any unique index matching the inference specification in
+ * other regards is accepted indifferently). Actively reject this as
+ * wrong-headed.
+ */
+ if (ielem->ordering != SORTBY_DEFAULT ||
+ ielem->nulls_ordering != SORTBY_NULLS_DEFAULT)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("ON CONFLICT does not accept ordering or NULLS FIRST/LAST specifications"),
+ errhint("These factors do not affect uniqueness of indexed datums."),
+ parser_errposition(pstate,
+ exprLocation((Node *) infer))));
+
+ if (!ielem->expr)
+ {
+ /* Simple index attribute */
+ ColumnRef *n;
+
+ /*
+ * Grammar won't have built raw expression for us in event of plain
+ * column reference. Create one directly, and perform expression
+ * transformation. Planner expects this, and performs its own
+ * normalization for the purposes of matching against pg_index.
+ */
+ n = makeNode(ColumnRef);
+ n->fields = list_make1(makeString(ielem->name));
+ /* Location is approximately that of inference specification */
+ n->location = infer->location;
+ parse = (Node *) n;
+ }
+ else
+ {
+ /* Do parse transformation of the raw expression */
+ parse = (Node *) ielem->expr;
+ }
+
+ /*
+ * transformExpr() should have already rejected subqueries,
+ * aggregates, and window functions, based on the EXPR_KIND_ for an
+ * index expression. Expressions returning sets won't have been
+ * rejected, but don't bother doing so here; there should be no
+ * available expression unique index to match any such expression
+ * against anyway.
+ */
+ pInfer->expr = transformExpr(pstate, parse, EXPR_KIND_INDEX_EXPRESSION);
+
+ /* Perform lookup of collation and operator class as required */
+ if (!ielem->collation)
+ pInfer->infercollid = InvalidOid;
+ else
+ pInfer->infercollid = LookupCollation(pstate, ielem->collation,
+ exprLocation(pInfer->expr));
+
+ if (!ielem->opclass)
+ {
+ pInfer->inferopfamily = InvalidOid;
+ pInfer->inferopcinputtype = InvalidOid;
+ }
+ else
+ {
+ Oid opclass = get_opclass_oid(BTREE_AM_OID, ielem->opclass,
+ false);
+
+ pInfer->inferopfamily = get_opclass_family(opclass);
+ pInfer->inferopcinputtype = get_opclass_input_type(opclass);
+ }
+
+ result = lappend(result, pInfer);
+ }
+
+ return result;
+}
+
+/*
+ * transformConflictClauseExpr -
+ * transform expressions of ON CONFLICT.
+ *
+ * Transformed expressions used to infer one unique index relation to serve as
+ * an ON CONFLICT arbiter. Partial unique indexes may be inferred using WHERE
+ * clause from inference specification clause.
+ */
+void
+transformConflictClause(ParseState *pstate, ConflictClause *confClause,
+ List **arbiterExpr, Node **arbiterWhere)
+{
+ InferClause *infer = confClause->infer;
+
+ /*
+ * To simplify certain aspects of its design, speculative insertion into
+ * system catalogs is disallowed
+ */
+ if (IsCatalogRelation(pstate->p_target_relation))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ON CONFLICT not supported with catalog relations"),
+ parser_errposition(pstate,
+ exprLocation((Node *) confClause))));
+
+ /* ON CONFLICT IGNORE does not require an inference clause */
+ if (infer)
+ {
+ *arbiterExpr = resolve_unique_index_expr(pstate, infer,
+ pstate->p_target_relation);
+
+ /*
+ * Handling inference WHERE clause (for partial unique index
+ * inference)
+ */
+ if (infer->whereClause)
+ *arbiterWhere = transformExpr(pstate, infer->whereClause,
+ EXPR_KIND_INDEX_PREDICATE);
+ }
+
+ /*
+ * It's convenient to form a list of expressions based on the
+ * representation used by CREATE INDEX, since the same restrictions are
+ * appropriate (e.g. on subqueries). However, from here on, a dedicated
+ * primnode representation is used for inference elements, and so
+ * assign_query_collations() can be trusted to do the right thing with the
+ * post parse analysis query tree inference clause representation.
+ */
+}
+
/*
* addTargetToSortList
* If the given targetlist entry isn't already in the SortGroupClause
case T_JoinExpr:
case T_FromExpr:
case T_SortGroupClause:
+ case T_InferenceElem:
(void) expression_tree_walker(node,
assign_collations_walker,
(void *) &loccontext);
ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
break;
+ case XLOG_HEAP_AFFIRM:
+ /*
+ * Speculative assertion is actually affirmed by the absence of
+ * super deletion; do nothing with this
+ */
+ break;
case XLOG_HEAP_LOCK:
/* we don't care about row level locks for now */
break;
return;
change = ReorderBufferGetChange(ctx->reorder);
- change->action = REORDER_BUFFER_CHANGE_INSERT;
+ if (!(xlrec->flags & XLOG_HEAP_SPECULATIVE_TUPLE))
+ change->action = REORDER_BUFFER_CHANGE_INSERT;
+ else
+ change->action = REORDER_BUFFER_CHANGE_INTERNAL_INSERT;
+
memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
return;
change = ReorderBufferGetChange(ctx->reorder);
- change->action = REORDER_BUFFER_CHANGE_DELETE;
+ if (!(xlrec->flags & XLOG_HEAP_SPECULATIVE_TUPLE))
+ change->action = REORDER_BUFFER_CHANGE_DELETE;
+ else
+ change->action = REORDER_BUFFER_CHANGE_INTERNAL_DELETE;
memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
case REORDER_BUFFER_CHANGE_INSERT:
case REORDER_BUFFER_CHANGE_UPDATE:
case REORDER_BUFFER_CHANGE_DELETE:
+ case REORDER_BUFFER_CHANGE_INTERNAL_INSERT:
+ case REORDER_BUFFER_CHANGE_INTERNAL_DELETE:
if (change->data.tp.newtuple)
{
ReorderBufferReturnTupleBuf(rb, change->data.tp.newtuple);
PG_TRY();
{
ReorderBufferChange *change;
+ ReorderBufferChange *specinsert = NULL;
if (using_subtxn)
BeginInternalSubTransaction("replay");
case REORDER_BUFFER_CHANGE_INSERT:
case REORDER_BUFFER_CHANGE_UPDATE:
case REORDER_BUFFER_CHANGE_DELETE:
+ case REORDER_BUFFER_CHANGE_INTERNAL_INSERT:
+ case REORDER_BUFFER_CHANGE_INTERNAL_DELETE:
Assert(snapshot_now);
reloid = RelidByRelfilenode(change->data.tp.relnode.spcNode,
/* user-triggered change */
else if (!IsToastRelation(relation))
{
+ /*
+ * Previous speculative insertion's success
+ * affirmed by a new (non-superdelete) DML change
+ */
+ if (specinsert &&
+ change->action !=
+ REORDER_BUFFER_CHANGE_INTERNAL_DELETE)
+ {
+ /* Report as proper insert to client */
+ specinsert->action = REORDER_BUFFER_CHANGE_INSERT;
+ rb->apply_change(rb, txn, relation, specinsert);
+
+ /* Free memory from pending tuple */
+ Assert(specinsert->data.tp.oldtuple == NULL);
+ ReorderBufferReturnTupleBuf(rb, specinsert->data.tp.newtuple);
+ specinsert = NULL;
+ }
+
ReorderBufferToastReplace(rb, txn, relation, change);
- rb->apply_change(rb, txn, relation, change);
+
+ /*
+ * Kludge: Speculative insertion occasionally
+ * makes use of "super deletion" -- an
+ * implementation defined delete of a speculatively
+ * inserted tuple. Neither the super deletion, nor
+ * the insertion (which must be the prior record
+ * type) are included in the final assembly when
+ * the tuple was super-deleted. Otherwise, an
+ * ordinary insertion is assembled.
+ */
+ if (change->action == REORDER_BUFFER_CHANGE_INTERNAL_INSERT)
+ {
+ /*
+ * Need to ensure the memory used by promise
+ * tuple isn't freed till we're done verifying
+ * that there is no super deletion that
+ * immediately follows. Otherwise it could get
+ * freed/reused while restoring spooled data
+ * from disk.
+ */
+ dlist_delete(&change->node);
+ specinsert = change;
+ /* Don't clear reassembled toast chunks */
+ continue;
+ }
+ else if (change->action ==
+ REORDER_BUFFER_CHANGE_INTERNAL_DELETE)
+ {
+ Assert(RelFileNodeEquals(change->data.tp.relnode,
+ specinsert->data.tp.relnode));
+
+ /*
+ * Free memory from pending tuple. Do not
+ * report as logical delete to encoding plugin.
+ */
+ Assert(specinsert->data.tp.oldtuple == NULL);
+ ReorderBufferReturnTupleBuf(rb, specinsert->data.tp.newtuple);
+ specinsert = NULL;
+ }
+ else
+ {
+ /*
+ * Handle non-speculative insertion related
+ * changes
+ */
+ rb->apply_change(rb, txn, relation, change);
+ }
/*
* Only clear reassembled toast chunks if we're
}
}
+ /*
+ * Previous speculative insertion's success affirmed by reaching end of
+ * xact's changes
+ */
+ if (specinsert)
+ {
+ Relation relation;
+ Oid reloid;
+
+ reloid = RelidByRelfilenode(specinsert->data.tp.relnode.spcNode,
+ specinsert->data.tp.relnode.relNode);
+
+ /*
+ * Catalog tuple without data, emitted while catalog was
+ * in the process of being rewritten.
+ */
+ if (reloid == InvalidOid)
+ elog(ERROR, "could not map filenode \"%s\" to relation OID",
+ relpathperm(specinsert->data.tp.relnode,
+ MAIN_FORKNUM));
+
+ relation = RelationIdGetRelation(reloid);
+ /* Report as proper insert to client */
+ specinsert->action = REORDER_BUFFER_CHANGE_INSERT;
+ rb->apply_change(rb, txn, relation, specinsert);
+ /* Free memory from pending tuple */
+ Assert(specinsert->data.tp.oldtuple == NULL);
+ ReorderBufferReturnTupleBuf(rb, specinsert->data.tp.newtuple);
+ }
+
/* clean up the iterator */
ReorderBufferIterTXNFinish(rb, iterstate);
iterstate = NULL;
case REORDER_BUFFER_CHANGE_UPDATE:
/* fall through */
case REORDER_BUFFER_CHANGE_DELETE:
+ /* fall through */
+ case REORDER_BUFFER_CHANGE_INTERNAL_INSERT:
+ /* fall through */
+ case REORDER_BUFFER_CHANGE_INTERNAL_DELETE:
{
char *data;
ReorderBufferTupleBuf *oldtup,
case REORDER_BUFFER_CHANGE_UPDATE:
/* fall through */
case REORDER_BUFFER_CHANGE_DELETE:
+ /* fall through */
+ case REORDER_BUFFER_CHANGE_INTERNAL_INSERT:
+ /* fall through */
+ case REORDER_BUFFER_CHANGE_INTERNAL_DELETE:
if (change->data.tp.newtuple)
{
Size len = offsetof(ReorderBufferTupleBuf, t_data) +
LockClauseStrength strength, LockWaitPolicy waitPolicy,
bool pushedDown);
static List *matchLocks(CmdType event, RuleLock *rulelocks,
- int varno, Query *parsetree);
+ int varno, Query *parsetree, bool *hasUpdate);
static Query *fireRIRrules(Query *parsetree, List *activeRIRs,
bool forUpdatePushedDown);
static bool view_has_instead_trigger(Relation view, CmdType event);
matchLocks(CmdType event,
RuleLock *rulelocks,
int varno,
- Query *parsetree)
+ Query *parsetree,
+ bool *hasUpdate)
{
List *matching_locks = NIL;
int nlocks;
{
RewriteRule *oneLock = rulelocks->rules[i];
+ if (oneLock->event == CMD_UPDATE)
+ *hasUpdate = true;
+
/*
* Suppress ON INSERT/UPDATE/DELETE rules that are disabled or
* configured to not fire during the current sessions replication
CmdType event = parsetree->commandType;
bool instead = false;
bool returning = false;
+ bool updatableview = false;
Query *qual_product = NULL;
List *rewritten = NIL;
ListCell *lc1;
Relation rt_entry_relation;
List *locks;
List *product_queries;
+ bool hasUpdate = false;
result_relation = parsetree->resultRelation;
Assert(result_relation != 0);
* Collect and apply the appropriate rules.
*/
locks = matchLocks(event, rt_entry_relation->rd_rules,
- result_relation, parsetree);
+ result_relation, parsetree, &hasUpdate);
product_queries = fireRules(parsetree,
result_relation,
*/
instead = true;
returning = true;
+ updatableview = true;
}
/*
}
}
+ /*
+ * Updatable views are supported by ON CONFLICT IGNORE, so don't
+ * prevent that case from proceeding
+ */
+ if (parsetree->specClause != SPEC_NONE &&
+ (product_queries != NIL || hasUpdate) &&
+ !updatableview)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("INSERT with ON CONFLICT clause may not target relation with INSERT or UPDATE rules")));
+
heap_close(rt_entry_relation, NoLock);
}
return true;
}
+/*
+ * Per-backend final disambiguator of an attempt to insert speculatively.
+ *
+ * This may wraparound, but since it is only a final disambiguator (speculative
+ * waiters also check TID and relfilenode), this is deemed to be acceptable.
+ * There is only a theoretical, vanishingly small chance of a backend
+ * spuriously considering that it must wait on another backend's
+ * end-of-speculative insertion (call to SpeculativeInsertionLockRelease())
+ * when that isn't strictly necessary, and even this is likely to be
+ * inconsequential. At worst, unprincipled deadlocks are not entirely
+ * eliminated in extreme corner cases.
+ */
+static uint32 speculativeInsertionToken = 0;
+
+/*
+ * SpeculativeInsertionLockAcquire
+ *
+ * Insert a lock showing that the given transaction ID is inserting a tuple,
+ * but hasn't yet decided whether it's going to keep it. The lock can then be
+ * used to wait for the decision to go ahead with the insertion, or aborting
+ * it.
+ *
+ * The token is used to distinguish multiple insertions by the same
+ * transaction. It is returned to caller.
+ */
+uint32
+SpeculativeInsertionLockAcquire(TransactionId xid)
+{
+ LOCKTAG tag;
+
+ speculativeInsertionToken++;
+
+ /*
+ * A zero speculative insertion lock indicates no token is held; Don't
+ * allow the token to overflow to zero
+ */
+ if (speculativeInsertionToken == 0)
+ speculativeInsertionToken = 1;
+
+ SET_LOCKTAG_SPECULATIVE_INSERTION(tag, xid, speculativeInsertionToken);
+
+ (void) LockAcquire(&tag, ExclusiveLock, false, false);
+
+ return speculativeInsertionToken;
+}
+
+/*
+ * SpeculativeInsertionLockRelease
+ *
+ * Delete the lock showing that the given transaction is speculatively
+ * inserting a tuple.
+ */
+void
+SpeculativeInsertionLockRelease(TransactionId xid)
+{
+ LOCKTAG tag;
+
+ SET_LOCKTAG_SPECULATIVE_INSERTION(tag, xid, speculativeInsertionToken);
+
+ LockRelease(&tag, ExclusiveLock, false);
+}
+
+/*
+ * SpeculativeInsertionWait
+ *
+ * Wait for the specified transaction to finish or abort the insertion of a
+ * tuple.
+ */
+void
+SpeculativeInsertionWait(TransactionId xid, uint32 token)
+{
+ LOCKTAG tag;
+
+ SET_LOCKTAG_SPECULATIVE_INSERTION(tag, xid, token);
+
+ Assert(TransactionIdIsValid(xid));
+ Assert(token != 0);
+
+ (void) LockAcquire(&tag, ShareLock, false, false);
+ LockRelease(&tag, ShareLock, false);
+}
+
/*
* XactLockTableWaitErrorContextCb
* Error context callback for transaction lock waits.
tag->locktag_field1,
tag->locktag_field2);
break;
+ case LOCKTAG_SPECULATIVE_TOKEN:
+ appendStringInfo(buf,
+ _("speculative token %u of transaction %u"),
+ tag->locktag_field2,
+ tag->locktag_field1);
+ break;
case LOCKTAG_OBJECT:
appendStringInfo(buf,
_("object %u of class %u of database %u"),
"tuple",
"transactionid",
"virtualxid",
+ "speculative token",
"object",
"userlock",
"advisory"
}
}
- /* by here, the inserting transaction has committed */
+ if (HeapTupleHeaderSuperDeleted(tuple))
+ return false;
+
+ /*
+ * By here, the inserting transaction has committed. Once committed,
+ * tuples can never be speculative.
+ */
+ Assert(!HeapTupleHeaderIsSpeculative(tuple));
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
return true;
Assert(ItemPointerIsValid(&htup->t_self));
Assert(htup->t_tableOid != InvalidOid);
+ Assert(!HeapTupleHeaderSuperDeleted(tuple));
if (!HeapTupleHeaderXminCommitted(tuple))
{
Assert(ItemPointerIsValid(&htup->t_self));
Assert(htup->t_tableOid != InvalidOid);
+ Assert(!HeapTupleHeaderSuperDeleted(tuple));
if (!HeapTupleHeaderXminCommitted(tuple))
{
Assert(htup->t_tableOid != InvalidOid);
snapshot->xmin = snapshot->xmax = InvalidTransactionId;
+ snapshot->speculativeToken = 0;
if (!HeapTupleHeaderXminCommitted(tuple))
{
}
else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
{
+ RelFileNode rnode;
+ ForkNumber forkno;
+ BlockNumber blockno;
+
+ BufferGetTag(buffer, &rnode, &forkno, &blockno);
+
+ /* tuples can only be in the main fork */
+ Assert(forkno == MAIN_FORKNUM);
+ Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
+
+ /*
+ * Set speculative token. Caller can worry about xmax, since it
+ * requires a conclusively locked row version, and a concurrent
+ * update to this tuple is a conflict of its purposes.
+ */
+ if (HeapTupleHeaderIsSpeculative(tuple))
+ {
+ snapshot->speculativeToken =
+ SpeculativeTokenGetTokenNumber(&tuple->t_tidstate.t_token);
+
+ Assert(snapshot->speculativeToken != 0);
+ }
+
snapshot->xmin = HeapTupleHeaderGetRawXmin(tuple);
/* XXX shouldn't we fall through to look at xmax? */
return true; /* in insertion by other */
}
}
- /* by here, the inserting transaction has committed */
+ if (HeapTupleHeaderSuperDeleted(tuple))
+ return false;
+
+ /*
+ * By here, the inserting transaction has committed. Once committed,
+ * tuples can never be speculative.
+ */
+ Assert(!HeapTupleHeaderIsSpeculative(tuple));
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
return true;
}
}
+ if (HeapTupleHeaderSuperDeleted(tuple))
+ return false;
+
/*
- * By here, the inserting transaction has committed - have to check
- * when...
+ * By here, the inserting transaction has committed. Once committed,
+ * tuples can never be speculative.
*/
+ Assert(!HeapTupleHeaderIsSpeculative(tuple));
+
+ /* have to check when committed */
if (!HeapTupleHeaderXminFrozen(tuple)
&& XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot))
return false; /* treat as still in progress */
*/
}
+ if (HeapTupleHeaderSuperDeleted(tuple))
+ return HEAPTUPLE_DEAD;
+
/*
- * Okay, the inserter committed, so it was good at some point. Now what
- * about the deleting transaction?
+ * Okay, the inserter committed, so it was good at some point. Once
+ * committed, tuples can never be speculative.
*/
+ Assert(!HeapTupleHeaderIsSpeculative(tuple));
+
+ /* Now what about the deleting transaction? */
if (tuple->t_infomask & HEAP_XMAX_INVALID)
return HEAPTUPLE_LIVE;
if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
return false;
- /* Deleter committed, so tuple is dead if the XID is old enough. */
+ /*
+ * Deleter committed, so tuple is dead if the XID is old enough. This
+ * handles super deleted tuples correctly.
+ */
return TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin);
}
{
TransactionId xmax;
+ Assert(!HeapTupleHeaderSuperDeleted(tuple));
+
/* if there's no valid Xmax, then there's obviously no update either */
if (tuple->t_infomask & HEAP_XMAX_INVALID)
return true;
* We don't need to support HEAP_MOVED_(IN|OFF) for now because we only support
* reading catalog pages which couldn't have been created in an older version.
*
+ * We don't support speculative insertion into catalogs, and so there are no
+ * checks for speculative/super deleted tuples.
+ *
* We don't set any hint bits in here as it seems unlikely to be beneficial as
* those should already be set by normal access and it seems to be too
* dangerous to do so as the semantics of doing so during timetravel are more
Assert(ItemPointerIsValid(&htup->t_self));
Assert(htup->t_tableOid != InvalidOid);
+ Assert(!HeapTupleHeaderIsSpeculative(tuple));
+ Assert(!HeapTupleHeaderSuperDeleted(tuple));
/* inserting transaction aborted */
if (HeapTupleHeaderXminInvalid(tuple))
#define HEAP_INSERT_SKIP_WAL 0x0001
#define HEAP_INSERT_SKIP_FSM 0x0002
#define HEAP_INSERT_FROZEN 0x0004
+#define HEAP_INSERT_SPECULATIVE 0x0008
typedef struct BulkInsertStateData *BulkInsertState;
CommandId cid, int options, BulkInsertState bistate);
extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait,
- HeapUpdateFailureData *hufd);
+ HeapUpdateFailureData *hufd, bool speculative);
+extern void heap_finish_speculative(Relation relation, HeapTuple tuple,
+ bool conflict);
extern HTSU_Result heap_update(Relation relation, ItemPointer otid,
HeapTuple newtup,
CommandId cid, Snapshot crosscheck, bool wait,
#define XLOG_HEAP_UPDATE 0x20
/* 0x030 is free, was XLOG_HEAP_MOVE */
#define XLOG_HEAP_HOT_UPDATE 0x40
-/* 0x050 is free, was XLOG_HEAP_NEWPAGE */
+#define XLOG_HEAP_AFFIRM 0x50
#define XLOG_HEAP_LOCK 0x60
#define XLOG_HEAP_INPLACE 0x70
#define XLOG_HEAP_SUFFIX_FROM_OLD (1<<6)
/* last xl_heap_multi_insert record for one heap_multi_insert() call */
#define XLOG_HEAP_LAST_MULTI_INSERT (1<<7)
+/* reuse xl_heap_multi_insert-only bit for xl_heap_insert and xl_heap_delete */
+#define XLOG_HEAP_SPECULATIVE_TUPLE XLOG_HEAP_LAST_MULTI_INSERT
/* convenience macro for checking whether any form of old tuple was logged */
#define XLOG_HEAP_CONTAINS_OLD \
#define SizeOfHeapLockUpdated (offsetof(xl_heap_lock_updated, infobits_set) + sizeof(uint8))
+/* This is what we need to know about affirmation of speculative insertion */
+typedef struct xl_heap_affirm
+{
+ OffsetNumber offnum; /* affirmed tuple's offset on page */
+} xl_heap_affirm;
+
+#define SizeOfHeapAffirm (offsetof(xl_heap_affirm, offnum) + sizeof(OffsetNumber))
+
/* This is what we need to know about in-place update */
typedef struct xl_heap_inplace
{
extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
- HeapTuple tuple);
+ HeapTuple tuple, bool token);
extern Buffer RelationGetBufferForTuple(Relation relation, Size len,
Buffer otherBuffer, int options,
BulkInsertState bistate,
* unrelated tuple stored into a slot recently freed by VACUUM. If either
* check fails, one may assume that there is no live descendant version.
*
+ * t_ctid is sometimes used to store a speculative token, for speculative
+ * inserters. Code paths that follow t_ctid chains must also consider that the
+ * apparently pointed to t_ctid is in fact such a token, that should similarly
+ * not be followed.
+ *
* Following the fixed header fields, the nulls bitmap is stored (beginning
* at t_bits). The bitmap is *not* stored if t_infomask shows that there
* are no nulls in the tuple. If an OID field is present (as indicated by
DatumTupleFields t_datum;
} t_choice;
- ItemPointerData t_ctid; /* current TID of this or newer tuple */
+ union
+ {
+ ItemPointerData t_ctid; /* current TID of this or newer tuple */
+ SpeculativeToken t_token; /* Speculative insertion token */
+ } t_tidstate;
/* Fields below here must match MinimalTupleData! */
((tup)->t_infomask |= HEAP_XMIN_FROZEN) \
)
+/*
+ * Was tuple "super deleted" following unsuccessful speculative insertion (i.e.
+ * conflict was detected at insertion time)? Is is not sufficient to set
+ * HEAP_XMIN_INVALID to super delete because it is only a hint, and because it
+ * interacts with transaction commit status. Speculative insertion decouples
+ * visibility from transaction duration for one special purpose.
+ */
+#define HeapTupleHeaderSuperDeleted(tup) \
+( \
+ (!TransactionIdIsValid(HeapTupleHeaderGetRawXmin(tup))) \
+)
+
/*
* HeapTupleHeaderGetRawXmax gets you the raw Xmax field. To find out the Xid
* that updated a tuple, you might need to resolve the MultiXactId if certain
(tup)->t_infomask2 &= ~HEAP_ONLY_TUPLE \
)
+#define HeapTupleHeaderIsSpeculative(tup) \
+( \
+ (tup)->t_tidstate.t_ctid.ip_posid == MagicOffsetNumber \
+)
+
#define HeapTupleHeaderHasMatch(tup) \
( \
(tup)->t_infomask2 & HEAP_TUPLE_HAS_MATCH \
extern IndexInfo *BuildIndexInfo(Relation index);
+extern void IndexInfoSpeculative(Relation index, IndexInfo *ii);
+
extern void FormIndexDatum(IndexInfo *indexInfo,
TupleTableSlot *slot,
EState *estate,
extern Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags);
extern void ExecCloseScanRelation(Relation scanrel);
-extern void ExecOpenIndices(ResultRelInfo *resultRelInfo);
+extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative);
extern void ExecCloseIndices(ResultRelInfo *resultRelInfo);
extern List *ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid,
- EState *estate);
-extern bool check_exclusion_constraint(Relation heap, Relation index,
- IndexInfo *indexInfo,
- ItemPointer tupleid,
- Datum *values, bool *isnull,
- EState *estate,
- bool newIndex, bool errorOK);
+ EState *estate, bool noDupErr, List *arbiterIndexes);
+extern bool ExecCheckIndexConstraints(TupleTableSlot *slot, EState *estate,
+ ItemPointer conflictTid, List *arbiterIndexes);
+extern bool check_exclusion_or_unique_constraint(Relation heap, Relation index,
+ IndexInfo *indexInfo,
+ ItemPointer tupleid,
+ Datum *values, bool *isnull,
+ EState *estate,
+ bool newIndex, bool errorOK,
+ bool wait, ItemPointer conflictTid);
extern void RegisterExprContextCallback(ExprContext *econtext,
ExprContextCallbackFunction function,
* ExclusionOps Per-column exclusion operators, or NULL if none
* ExclusionProcs Underlying function OIDs for ExclusionOps
* ExclusionStrats Opclass strategy numbers for ExclusionOps
+ * UniqueOps Theses are like Exclusion*, but for unique indexes
+ * UniqueProcs
+ * UniqueStrats
* Unique is it a unique index?
* ReadyForInserts is it valid for inserts?
* Concurrent are we doing a concurrent index build?
Oid *ii_ExclusionOps; /* array with one entry per column */
Oid *ii_ExclusionProcs; /* array with one entry per column */
uint16 *ii_ExclusionStrats; /* array with one entry per column */
+ Oid *ii_UniqueOps; /* array with one entry per column */
+ Oid *ii_UniqueProcs; /* array with one entry per column */
+ uint16 *ii_UniqueStrats; /* array with one entry per column */
bool ii_Unique;
bool ii_ReadyForInserts;
bool ii_Concurrent;
int mt_whichplan; /* which one is being executed (0..n-1) */
ResultRelInfo *resultRelInfo; /* per-subplan target relations */
List **mt_arowmarks; /* per-subplan ExecAuxRowMark lists */
+ SpecCmd spec; /* reason for speculative insertion */
+ List *arbiterIndexes; /* unique index OIDs to arbitrate taking alt path */
EPQState mt_epqstate; /* for evaluating EvalPlanQual rechecks */
bool fireBSTriggers; /* do we need to fire stmt triggers? */
} ModifyTableState;
T_CoerceToDomainValue,
T_SetToDefault,
T_CurrentOfExpr,
+ T_InferenceElem,
T_TargetEntry,
T_RangeTblRef,
T_JoinExpr,
T_RowMarkClause,
T_XmlSerialize,
T_WithClause,
+ T_InferClause,
+ T_ConflictClause,
T_CommonTableExpr,
T_RoleSpec,
(1 << JOIN_RIGHT) | \
(1 << JOIN_ANTI))) != 0)
+/*
+ * SpecCmd -
+ * "Speculative insertion" clause
+ *
+ * This is needed in both parsenodes.h and plannodes.h, so put it here...
+ */
+typedef enum
+{
+ SPEC_NONE, /* Not involved in speculative insertion */
+ SPEC_IGNORE /* INSERT of "ON CONFLICT IGNORE" */
+} SpecCmd;
+
#endif /* NODES_H */
List *withCheckOptions; /* a list of WithCheckOption's */
+ SpecCmd specClause; /* speculative insertion clause */
+ List *arbiterElems; /* unique index arbiter list (of InferenceElem's) */
+ Node *arbiterWhere; /* unique index arbiter WHERE clause */
+
List *returningList; /* return-values list (of TargetEntry) */
List *groupClause; /* a list of SortGroupClause's */
} TableLikeOption;
/*
- * IndexElem - index parameters (used in CREATE INDEX)
+ * IndexElem - index parameters (used in CREATE INDEX, and in ON CONFLICT)
*
* For a plain index attribute, 'name' is the name of the table column to
* index, and 'expr' is NULL. For an index expression, 'name' is NULL and
int location; /* token location, or -1 if unknown */
} WithClause;
+/*
+ * InferClause -
+ * ON CONFLICT unique index inference clause
+ *
+ * Note: InferClause does not propagate into the Query representation.
+ */
+typedef struct InferClause
+{
+ NodeTag type;
+ List *indexElems; /* IndexElems to infer unique index */
+ Node *whereClause; /* qualification (partial-index predicate) */
+ int location; /* token location, or -1 if unknown */
+} InferClause;
+
+/*
+ * ConflictClause -
+ * representation of ON CONFLICT clause
+ *
+ * Note: ConflictClause does not propagate into the Query representation.
+ */
+typedef struct ConflictClause
+{
+ NodeTag type;
+ SpecCmd specclause; /* Variant specified */
+ InferClause *infer; /* Optional index inference clause */
+ int location; /* token location, or -1 if unknown */
+} ConflictClause;
+
/*
* CommonTableExpr -
* representation of WITH list element
RangeVar *relation; /* relation to insert into */
List *cols; /* optional: names of the target columns */
Node *selectStmt; /* the source SELECT/VALUES, or NULL */
+ ConflictClause *confClause; /* ON CONFLICT clause */
List *returningList; /* list of expressions to return */
WithClause *withClause; /* WITH clause */
} InsertStmt;
List *resultRelations; /* integer list of RT indexes */
int resultRelIndex; /* index of first resultRel in plan's list */
List *plans; /* plan(s) producing source data */
+ SpecCmd spec; /* speculative insertion specification */
+ List *arbiterIndexes; /* List of ON CONFLICT arbiter index OIDs */
List *withCheckOptionLists; /* per-target-table WCO lists */
List *returningLists; /* per-target-table RETURNING tlists */
List *fdwPrivLists; /* per-target-table FDW private data lists */
int cursor_param; /* refcursor parameter number, or 0 */
} CurrentOfExpr;
+/*
+ * InferenceElem - an element of a unique index inference specification
+ *
+ * This mostly matches the structure of IndexElems, but having a dedicated
+ * primnode allows for a clean separation between the use of index parameters
+ * by utility commands, and this node.
+ */
+typedef struct InferenceElem
+{
+ Expr xpr;
+ Node *expr; /* expression to infer from, or NULL */
+ Oid infercollid; /* OID of collation, or InvalidOid */
+ Oid inferopfamily; /* OID of att opfamily, or InvalidOid */
+ Oid inferopcinputtype; /* OID of att input type, or InvalidOid */
+} InferenceElem;
+
/*--------------------
* TargetEntry -
* a target entry (used in query target lists)
extern void get_relation_info(PlannerInfo *root, Oid relationObjectId,
bool inhparent, RelOptInfo *rel);
+extern List *infer_arbiter_indexes(PlannerInfo *root);
+
extern void estimate_rel_size(Relation rel, int32 *attr_widths,
BlockNumber *pages, double *tuples, double *allvisfrac);
Index nominalRelation,
List *resultRelations, List *subplans,
List *withCheckOptionLists, List *returningLists,
- List *rowMarks, int epqParam);
+ List *rowMarks, SpecCmd spec, int epqParam);
extern bool is_projection_capable_plan(Plan *plan);
/*
PG_KEYWORD("committed", COMMITTED, UNRESERVED_KEYWORD)
PG_KEYWORD("concurrently", CONCURRENTLY, TYPE_FUNC_NAME_KEYWORD)
PG_KEYWORD("configuration", CONFIGURATION, UNRESERVED_KEYWORD)
+PG_KEYWORD("conflict", CONFLICT, UNRESERVED_KEYWORD)
PG_KEYWORD("connection", CONNECTION, UNRESERVED_KEYWORD)
PG_KEYWORD("constraint", CONSTRAINT, RESERVED_KEYWORD)
PG_KEYWORD("constraints", CONSTRAINTS, UNRESERVED_KEYWORD)
PG_KEYWORD("hour", HOUR_P, UNRESERVED_KEYWORD)
PG_KEYWORD("identity", IDENTITY_P, UNRESERVED_KEYWORD)
PG_KEYWORD("if", IF_P, UNRESERVED_KEYWORD)
+PG_KEYWORD("ignore", IGNORE_P, UNRESERVED_KEYWORD)
PG_KEYWORD("ilike", ILIKE, TYPE_FUNC_NAME_KEYWORD)
PG_KEYWORD("immediate", IMMEDIATE, UNRESERVED_KEYWORD)
PG_KEYWORD("immutable", IMMUTABLE, UNRESERVED_KEYWORD)
List **targetlist, List *sortClause, bool is_agg);
extern List *transformDistinctOnClause(ParseState *pstate, List *distinctlist,
List **targetlist, List *sortClause);
+extern void transformConflictClause(ParseState *pstate, ConflictClause *confClause,
+ List **arbiterExpr, Node **arbiterWhere);
extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
List *sortlist, List *targetlist, SortBy *sortby,
* and ComboCids in the same list with the user visible INSERT/UPDATE/DELETE
* changes. Users of the decoding facilities will never see changes with
* *_INTERNAL_* actions.
+ *
+ * The REORDER_BUFFER_CHANGE_INTERNAL_INSERT and
+ * REORDER_BUFFER_CHANGE_INTERNAL_DELETE changes concern "speculative
+ * insertions", and their "super deletion" respectively. Super deletion is a
+ * mechanism that speculative insertion makes use of to handle conflicts.
+ *
+ * At transaction reassembly these will be consolidated, and so decoding
+ * plugins will only ever handle REORDER_BUFFER_CHANGE_INSERT changes here too
+ * (in the common case where speculative insertion works out).
*/
enum ReorderBufferChangeType
{
REORDER_BUFFER_CHANGE_DELETE,
REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT,
REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID,
- REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID
+ REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID,
+ REORDER_BUFFER_CHANGE_INTERNAL_INSERT,
+ REORDER_BUFFER_CHANGE_INTERNAL_DELETE
};
/*
} BlockIdData;
typedef BlockIdData *BlockId; /* block identifier */
+typedef BlockIdData SpeculativeToken; /* token */
/* ----------------
* support macros
(blockId)->bi_lo = (blockNumber) & 0xffff \
)
+/*
+ * SpeculativeTokenSet
+ * Sets a speculative token to the specified value.
+ */
+#define SpeculativeTokenSet(tidstate, tokenNumber) \
+( \
+ BlockIdSet(&(tidstate).t_token, tokenNumber), \
+ (tidstate).t_ctid.ip_posid = MagicOffsetNumber \
+)
+
/*
* BlockIdCopy
* Copy a block identifier.
(BlockNumber) (((blockId)->bi_hi << 16) | ((uint16) (blockId)->bi_lo)) \
)
+/*
+ * SpeculativeTokenGetTokenNumber
+ * Retrieve the token number from a token identifier.
+ */
+#define SpeculativeTokenGetTokenNumber(tokenId) \
+ BlockIdGetBlockNumber(tokenId)
+
#endif /* BLOCK_H */
extern void WaitForLockers(LOCKTAG heaplocktag, LOCKMODE lockmode);
extern void WaitForLockersMultiple(List *locktags, LOCKMODE lockmode);
+/* Lock an XID for tuple insertion (used to wait for an insertion to finish) */
+extern uint32 SpeculativeInsertionLockAcquire(TransactionId xid);
+extern void SpeculativeInsertionLockRelease(TransactionId xid);
+extern void SpeculativeInsertionWait(TransactionId xid, uint32 token);
+
/* Lock a general object (other than a relation) of the current database */
extern void LockDatabaseObject(Oid classid, Oid objid, uint16 objsubid,
LOCKMODE lockmode);
/* ID info for a transaction is its TransactionId */
LOCKTAG_VIRTUALTRANSACTION, /* virtual transaction (ditto) */
/* ID info for a virtual transaction is its VirtualTransactionId */
+ LOCKTAG_SPECULATIVE_TOKEN, /* speculative insertion Xid and token */
+ /* ID info for a transaction is its TransactionId */
LOCKTAG_OBJECT, /* non-relation database object */
/* ID info for an object is DB OID + CLASS OID + OBJECT OID + SUBID */
(locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+#define SET_LOCKTAG_SPECULATIVE_INSERTION(locktag,xid,token) \
+ ((locktag).locktag_field1 = (xid), \
+ (locktag).locktag_field2 = (token), \
+ (locktag).locktag_field3 = 0, \
+ (locktag).locktag_field4 = 0, \
+ (locktag).locktag_type = LOCKTAG_SPECULATIVE_TOKEN, \
+ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
#define SET_LOCKTAG_OBJECT(locktag,dboid,classoid,objoid,objsubid) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (classoid), \
#define InvalidOffsetNumber ((OffsetNumber) 0)
#define FirstOffsetNumber ((OffsetNumber) 1)
#define MaxOffsetNumber ((OffsetNumber) (BLCKSZ / sizeof(ItemIdData)))
+#define MagicOffsetNumber (0xfffe)
#define OffsetNumberMask (0xffff) /* valid uint16 bits */
/* ----------------
bool takenDuringRecovery; /* recovery-shaped snapshot? */
bool copied; /* false if it's a static snapshot */
+ /*
+ * Snapshot's speculative token is value set only by
+ * HeapTupleSatisfiesDirty, indicating that the tuple is being inserted
+ * speculatively, and may yet be "super-deleted" before inserter's EOX.
+ *
+ * The caller may use the value and the inserting tuple 'xmin' with
+ * SpeculativeInsertionWait to wait for the inserter to decide. It is only
+ * set when a valid 'xmin' is also set by HeapTupleSatisifiesDirty. By
+ * convention, when speculativeToken is zero, the caller must assume that
+ * it should wait on a non-speculative tuple (i.e. wait for xmin/xmax to
+ * commit, since speculative insertion either isn't in play anymore, or
+ * never was).
+ */
+ uint32 speculativeToken;
+
/*
* note: all ids in subxip[] are >= xmin, but we don't bother filtering
* out any that are >= xmax
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: ignore1 ignore2 c1 select2 c2
+step ignore1: INSERT INTO ints(key, val) VALUES(1, 'ignore1') ON CONFLICT IGNORE;
+step ignore2: INSERT INTO ints(key, val) VALUES(1, 'ignore2') ON CONFLICT IGNORE; <waiting ...>
+step c1: COMMIT;
+step ignore2: <... completed>
+step select2: SELECT * FROM ints;
+key val
+
+1 ignore1
+step c2: COMMIT;
+
+starting permutation: ignore1 ignore2 a1 select2 c2
+step ignore1: INSERT INTO ints(key, val) VALUES(1, 'ignore1') ON CONFLICT IGNORE;
+step ignore2: INSERT INTO ints(key, val) VALUES(1, 'ignore2') ON CONFLICT IGNORE; <waiting ...>
+step a1: ABORT;
+step ignore2: <... completed>
+step select2: SELECT * FROM ints;
+key val
+
+1 ignore2
+step c2: COMMIT;
test: eval-plan-qual
test: lock-update-delete
test: lock-update-traversal
+test: insert-conflict-ignore
test: delete-abort-savept
test: delete-abort-savept-2
test: aborted-keyrevoke
--- /dev/null
+# INSERT...ON CONFLICT IGNORE test
+#
+# This test tries to expose problems with the interaction between concurrent
+# sessions during INSERT...ON CONFLICT IGNORE.
+#
+# The convention here is that session 1 always ends up inserting, and session 2
+# always ends up ignoring.
+
+setup
+{
+ CREATE TABLE ints (key int primary key, val text);
+}
+
+teardown
+{
+ DROP TABLE ints;
+}
+
+session "s1"
+setup
+{
+ BEGIN ISOLATION LEVEL READ COMMITTED;
+}
+step "ignore1" { INSERT INTO ints(key, val) VALUES(1, 'ignore1') ON CONFLICT IGNORE; }
+step "c1" { COMMIT; }
+step "a1" { ABORT; }
+
+session "s2"
+setup
+{
+ BEGIN ISOLATION LEVEL READ COMMITTED;
+}
+step "ignore2" { INSERT INTO ints(key, val) VALUES(1, 'ignore2') ON CONFLICT IGNORE; }
+step "select2" { SELECT * FROM ints; }
+step "c2" { COMMIT; }
+step "a2" { ABORT; }
+
+# Regular case where one session block-waits on another to determine if it
+# should proceed with an insert or ignore.
+permutation "ignore1" "ignore2" "c1" "select2" "c2"
+permutation "ignore1" "ignore2" "a1" "select2" "c2"
--- /dev/null
+--
+-- insert...on conflict unique index inference
+--
+create table insertconflicttest(key int4, fruit text);
+--
+-- Test unique index inference with operator class specifications and
+-- named collations
+--
+create unique index op_index_key on insertconflicttest(key, fruit text_pattern_ops);
+create unique index collation_index_key on insertconflicttest(key, fruit collate "C");
+create unique index both_index_key on insertconflicttest(key, fruit collate "C" text_pattern_ops);
+create unique index both_index_expr_key on insertconflicttest(key, lower(fruit) collate "C" text_pattern_ops);
+-- fails
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key) ignore;
+ERROR: could not infer which unique index to use from expressions/columns and predicate provided for ON CONFLICT
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit) ignore;
+ERROR: could not infer which unique index to use from expressions/columns and predicate provided for ON CONFLICT
+-- succeeds
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key, fruit) ignore;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Arbiter Indexes: op_index_key, collation_index_key, both_index_key
+ -> Result
+(3 rows)
+
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit, key, fruit, key) ignore;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Arbiter Indexes: op_index_key, collation_index_key, both_index_key
+ -> Result
+(3 rows)
+
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (lower(fruit), key, lower(fruit), key) ignore;
+ QUERY PLAN
+-------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Arbiter Indexes: both_index_expr_key
+ -> Result
+(3 rows)
+
+-- Neither collation nor operator class specifications are required --
+-- supplying them merely *limits* matches to indexes with matching opclasses
+-- used for relevant indexes
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key, fruit text_pattern_ops) ignore;
+ QUERY PLAN
+----------------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Arbiter Indexes: op_index_key, both_index_key
+ -> Result
+(3 rows)
+
+-- Okay, arbitrates using both index where text_pattern_ops opclass does and
+-- does not appear.
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key, fruit collate "C") ignore;
+ QUERY PLAN
+-----------------------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Arbiter Indexes: collation_index_key, both_index_key
+ -> Result
+(3 rows)
+
+-- Okay, but only accepts the single index where both opclass and collation are
+-- specified
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit collate "C" text_pattern_ops, key) ignore;
+ QUERY PLAN
+--------------------------------------------
+ Insert on insertconflicttest
+ Conflict Arbiter Indexes: both_index_key
+ -> Result
+(3 rows)
+
+-- Okay, but only accepts the single index where both opclass and collation are
+-- specified (plus expression variant)
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (lower(fruit) collate "C", key, key) ignore;
+ QUERY PLAN
+-------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Arbiter Indexes: both_index_expr_key
+ -> Result
+(3 rows)
+
+-- Attribute appears twice, while not all attributes/expressions on attributes
+-- appearing within index definition match in terms of both opclass and
+-- collation.
+--
+-- Works because every attribute in inference specification needs to be
+-- satisfied once or more by cataloged index attribute, and as always when an
+-- attribute in the cataloged definition has a non-default opclass/collation,
+-- it still satisfied some inference attribute lacking any particular
+-- opclass/collation specification.
+--
+-- The implementation is liberal in accepting inference specifications on the
+-- assumption that multiple inferred unique indexes will prevent problematic
+-- cases. It rolls with unique indexes where attributes redundantly appear
+-- multiple times, too (which is not tested here).
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit, key, fruit text_pattern_ops, key) ignore;
+ QUERY PLAN
+----------------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Arbiter Indexes: op_index_key, both_index_key
+ -> Result
+(3 rows)
+
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (lower(fruit) collate "C" text_pattern_ops, key, key) ignore;
+ QUERY PLAN
+-------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Arbiter Indexes: both_index_expr_key
+ -> Result
+(3 rows)
+
+drop index op_index_key;
+drop index collation_index_key;
+drop index both_index_key;
+drop index both_index_expr_key;
+--
+-- Test partial unique index inference
+--
+create unique index partial_key_index on insertconflicttest(key) where fruit like '%berry';
+-- Succeeds
+insert into insertconflicttest values (23, 'Blackberry') on conflict (key where fruit like '%berry' and fruit = 'inconsequential') ignore;
+-- fails
+insert into insertconflicttest values (23, 'Blackberry') on conflict (key where fruit like '%berry' or fruit = 'consequential') ignore;
+ERROR: could not infer which unique index to use from expressions/columns and predicate provided for ON CONFLICT
+insert into insertconflicttest values (23, 'Uncovered by Index') on conflict (key where fruit like '%berry') ignore;
+ERROR: inferred arbiter partial unique index has predicate that fails to cover tuple proposed for insertion
+DETAIL: ON CONFLICT inference clause implies that the tuple proposed for insertion must be covered by predicate for partial index "partial_key_index".
+drop index partial_key_index;
+-- Cleanup
+drop table insertconflicttest;
+-- ******************************************************************
+-- * *
+-- * Test inheritance (example taken from tutorial) *
+-- * *
+-- ******************************************************************
+create table cities (
+ name text,
+ population float8,
+ altitude int -- (in ft)
+);
+create table capitals (
+ state char(2)
+) inherits (cities);
+-- Create unique indexes. Due to a general limitation of inheritance,
+-- uniqueness is only enforced per-relation. Unique index inference
+-- specification will do the right thing, though.
+create unique index cities_names_unique on cities (name);
+create unique index capitals_names_unique on capitals (name);
+-- prepopulate the tables.
+insert into cities values ('San Francisco', 7.24E+5, 63);
+insert into cities values ('Las Vegas', 2.583E+5, 2174);
+insert into cities values ('Mariposa', 1200, 1953);
+insert into capitals values ('Sacramento', 3.694E+5, 30, 'CA');
+insert into capitals values ('Madison', 1.913E+5, 845, 'WI');
+-- Tests proper for inheritance:
+select * from capitals;
+ name | population | altitude | state
+------------+------------+----------+-------
+ Sacramento | 369400 | 30 | CA
+ Madison | 191300 | 845 | WI
+(2 rows)
+
+-- Succeeds:
+insert into cities values ('Las Vegas', 2.583E+5, 2174) on conflict ignore;
+-- Wrong "Sacramento", ignored:
+insert into capitals values ('Sacramento', 50, 2267, 'NE') on conflict (name) ignore;
+select * from capitals;
+ name | population | altitude | state
+------------+------------+----------+-------
+ Sacramento | 369400 | 30 | CA
+ Madison | 191300 | 845 | WI
+(2 rows)
+
+-- clean up
+drop table capitals;
+drop table cities;
SELECT * FROM shoelace_obsolete WHERE sl_avail = 0;
insert into shoelace values ('sl9', 0, 'pink', 35.0, 'inch', 0.0);
insert into shoelace values ('sl10', 1000, 'magenta', 40.0, 'inch', 0.0);
+-- Unsupported (even though a similar updatable view construct is)
+insert into shoelace values ('sl10', 1000, 'magenta', 40.0, 'inch', 0.0)
+ on conflict ignore;
+ERROR: INSERT with ON CONFLICT clause may not target relation with INSERT or UPDATE rules
SELECT * FROM shoelace_obsolete ORDER BY sl_len_cm;
sl_name | sl_avail | sl_color | sl_len | sl_unit | sl_len_cm
------------+----------+------------+--------+----------+-----------
insert into rule_and_refint_t3 values (1, 13, 11, 'row6');
ERROR: insert or update on table "rule_and_refint_t3" violates foreign key constraint "rule_and_refint_t3_id3a_fkey"
DETAIL: Key (id3a, id3b)=(1, 13) is not present in table "rule_and_refint_t1".
+-- Ordinary table
+insert into rule_and_refint_t3 values (1, 13, 11, 'row6')
+ on conflict ignore;
+ERROR: insert or update on table "rule_and_refint_t3" violates foreign key constraint "rule_and_refint_t3_id3a_fkey"
+DETAIL: Key (id3a, id3b)=(1, 13) is not present in table "rule_and_refint_t1".
create rule rule_and_refint_t3_ins as on insert to rule_and_refint_t3
where (exists (select 1 from rule_and_refint_t3
where (((rule_and_refint_t3.id3a = new.id3a)
ERROR: cannot insert into column "upper" of view "rw_view15"
DETAIL: View columns that are not columns of their base relation are not updatable.
INSERT INTO rw_view15 (a) VALUES (3); -- should be OK
+INSERT INTO rw_view15 (a) VALUES (3) ON CONFLICT IGNORE; -- succeeds
+INSERT INTO rw_view15 (a) VALUES (3) ON CONFLICT (a) IGNORE; -- succeeds
ALTER VIEW rw_view15 ALTER COLUMN upper SET DEFAULT 'NOT SET';
INSERT INTO rw_view15 (a) VALUES (4); -- should fail
ERROR: cannot insert into column "upper" of view "rw_view15"
# These four each depend on the previous one
# ----------
test: insert
+test: insert_conflict
test: create_function_1
test: create_type
test: create_table
test: type_sanity
test: opr_sanity
test: insert
+test: insert_conflict
test: create_function_1
test: create_type
test: create_table
--- /dev/null
+--
+-- insert...on conflict unique index inference
+--
+create table insertconflicttest(key int4, fruit text);
+
+--
+-- Test unique index inference with operator class specifications and
+-- named collations
+--
+create unique index op_index_key on insertconflicttest(key, fruit text_pattern_ops);
+create unique index collation_index_key on insertconflicttest(key, fruit collate "C");
+create unique index both_index_key on insertconflicttest(key, fruit collate "C" text_pattern_ops);
+create unique index both_index_expr_key on insertconflicttest(key, lower(fruit) collate "C" text_pattern_ops);
+
+-- fails
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key) ignore;
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit) ignore;
+
+-- succeeds
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key, fruit) ignore;
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit, key, fruit, key) ignore;
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (lower(fruit), key, lower(fruit), key) ignore;
+-- Neither collation nor operator class specifications are required --
+-- supplying them merely *limits* matches to indexes with matching opclasses
+-- used for relevant indexes
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key, fruit text_pattern_ops) ignore;
+-- Okay, arbitrates using both index where text_pattern_ops opclass does and
+-- does not appear.
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key, fruit collate "C") ignore;
+-- Okay, but only accepts the single index where both opclass and collation are
+-- specified
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit collate "C" text_pattern_ops, key) ignore;
+-- Okay, but only accepts the single index where both opclass and collation are
+-- specified (plus expression variant)
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (lower(fruit) collate "C", key, key) ignore;
+-- Attribute appears twice, while not all attributes/expressions on attributes
+-- appearing within index definition match in terms of both opclass and
+-- collation.
+--
+-- Works because every attribute in inference specification needs to be
+-- satisfied once or more by cataloged index attribute, and as always when an
+-- attribute in the cataloged definition has a non-default opclass/collation,
+-- it still satisfied some inference attribute lacking any particular
+-- opclass/collation specification.
+--
+-- The implementation is liberal in accepting inference specifications on the
+-- assumption that multiple inferred unique indexes will prevent problematic
+-- cases. It rolls with unique indexes where attributes redundantly appear
+-- multiple times, too (which is not tested here).
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit, key, fruit text_pattern_ops, key) ignore;
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (lower(fruit) collate "C" text_pattern_ops, key, key) ignore;
+
+drop index op_index_key;
+drop index collation_index_key;
+drop index both_index_key;
+drop index both_index_expr_key;
+
+--
+-- Test partial unique index inference
+--
+create unique index partial_key_index on insertconflicttest(key) where fruit like '%berry';
+
+-- Succeeds
+insert into insertconflicttest values (23, 'Blackberry') on conflict (key where fruit like '%berry' and fruit = 'inconsequential') ignore;
+
+-- fails
+insert into insertconflicttest values (23, 'Blackberry') on conflict (key where fruit like '%berry' or fruit = 'consequential') ignore;
+insert into insertconflicttest values (23, 'Uncovered by Index') on conflict (key where fruit like '%berry') ignore;
+
+drop index partial_key_index;
+
+-- Cleanup
+drop table insertconflicttest;
+
+-- ******************************************************************
+-- * *
+-- * Test inheritance (example taken from tutorial) *
+-- * *
+-- ******************************************************************
+create table cities (
+ name text,
+ population float8,
+ altitude int -- (in ft)
+);
+
+create table capitals (
+ state char(2)
+) inherits (cities);
+
+-- Create unique indexes. Due to a general limitation of inheritance,
+-- uniqueness is only enforced per-relation. Unique index inference
+-- specification will do the right thing, though.
+create unique index cities_names_unique on cities (name);
+create unique index capitals_names_unique on capitals (name);
+
+-- prepopulate the tables.
+insert into cities values ('San Francisco', 7.24E+5, 63);
+insert into cities values ('Las Vegas', 2.583E+5, 2174);
+insert into cities values ('Mariposa', 1200, 1953);
+
+insert into capitals values ('Sacramento', 3.694E+5, 30, 'CA');
+insert into capitals values ('Madison', 1.913E+5, 845, 'WI');
+
+-- Tests proper for inheritance:
+select * from capitals;
+
+-- Succeeds:
+insert into cities values ('Las Vegas', 2.583E+5, 2174) on conflict ignore;
+-- Wrong "Sacramento", ignored:
+insert into capitals values ('Sacramento', 50, 2267, 'NE') on conflict (name) ignore;
+select * from capitals;
+
+-- clean up
+drop table capitals;
+drop table cities;
insert into shoelace values ('sl9', 0, 'pink', 35.0, 'inch', 0.0);
insert into shoelace values ('sl10', 1000, 'magenta', 40.0, 'inch', 0.0);
+-- Unsupported (even though a similar updatable view construct is)
+insert into shoelace values ('sl10', 1000, 'magenta', 40.0, 'inch', 0.0)
+ on conflict ignore;
SELECT * FROM shoelace_obsolete ORDER BY sl_len_cm;
SELECT * FROM shoelace_candelete;
insert into rule_and_refint_t3 values (1, 12, 12, 'row4');
insert into rule_and_refint_t3 values (1, 11, 13, 'row5');
insert into rule_and_refint_t3 values (1, 13, 11, 'row6');
+-- Ordinary table
+insert into rule_and_refint_t3 values (1, 13, 11, 'row6')
+ on conflict ignore;
create rule rule_and_refint_t3_ins as on insert to rule_and_refint_t3
where (exists (select 1 from rule_and_refint_t3
-- Partially updatable view
INSERT INTO rw_view15 VALUES (3, 'ROW 3'); -- should fail
INSERT INTO rw_view15 (a) VALUES (3); -- should be OK
+INSERT INTO rw_view15 (a) VALUES (3) ON CONFLICT IGNORE; -- succeeds
+INSERT INTO rw_view15 (a) VALUES (3) ON CONFLICT (a) IGNORE; -- succeeds
ALTER VIEW rw_view15 ALTER COLUMN upper SET DEFAULT 'NOT SET';
INSERT INTO rw_view15 (a) VALUES (4); -- should fail
UPDATE rw_view15 SET upper='ROW 3' WHERE a=3; -- should fail