#include "access/xact.h"
#include "catalog/dependency.h"
+#include "catalog/index.h"
#include "catalog/namespace.h"
#include "catalog/pg_type.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
+
#include "parser/parse_relation.h"
#include "parser/parse_type.h"
bool changed[MaxTupleAttributeNumber];
} BDRTupleData;
-static void build_scan_key(ScanKey skey, Relation rel, Relation idx_rel, HeapTuple key);
-static bool find_pkey_tuple(ScanKey skey, Relation rel, Relation idx_rel, TupleTableSlot *slot, bool lock);
+static void build_index_scan_keys(EState *estate, ScanKey *scan_keys, TupleTableSlot *slot);
+static void build_index_scan_key(ScanKey skey, Relation rel, Relation idx_rel, TupleTableSlot *slot);
+static bool find_pkey_tuple(ScanKey skey, Relation rel, Relation idx_rel, TupleTableSlot *slot,
+ bool lock, LockTupleMode mode);
static void UserTableUpdateIndexes(EState *estate, TupleTableSlot *slot);
+static void UserTableUpdateOpenIndexes(EState *estate, TupleTableSlot *slot);
static Relation read_rel(StringInfo s, LOCKMODE mode);
extern void read_tuple_parts(StringInfo s, Relation rel, BDRTupleData *tup);
static void read_tuple(StringInfo s, Relation rel, TupleTableSlot *slot);
char action;
EState *estate;
TupleTableSlot *slot;
+ TupleTableSlot *oldslot;
Relation rel;
bool started_tx;
#ifdef VERBOSE_INSERT
StringInfoData o;
#endif
+ ResultRelInfo *relinfo;
+ ItemPointer conflicts;
+ bool conflict = false;
+ ScanKey *index_keys;
+ int i;
+ ItemPointerData conflicting_tid;
+
+ ItemPointerSetInvalid(&conflicting_tid);
started_tx = bdr_performing_work();
estate = bdr_create_rel_estate(rel);
slot = ExecInitExtraTupleSlot(estate);
+ oldslot = ExecInitExtraTupleSlot(estate);
ExecSetSlotDescriptor(slot, RelationGetDescr(rel));
+ ExecSetSlotDescriptor(oldslot, RelationGetDescr(rel));
read_tuple(s, rel, slot);
elog(ERROR, "unexpected relkind '%c' rel \"%s\"",
rel->rd_rel->relkind, RelationGetRelationName(rel));
- PushActiveSnapshot(GetTransactionSnapshot());
-
/* debug output */
#ifdef VERBOSE_INSERT
initStringInfo(&o);
resetStringInfo(&o);
#endif
- simple_heap_insert(rel, slot->tts_tuple);
- UserTableUpdateIndexes(estate, slot);
- bdr_count_insert();
+ /*
+ * Search for conflicting tuples.
+ */
+ ExecOpenIndices(estate->es_result_relation_info);
+ relinfo = estate->es_result_relation_info;
+ index_keys = palloc0(relinfo->ri_NumIndices * sizeof(ScanKeyData*));
+ conflicts = palloc0(relinfo->ri_NumIndices * sizeof(ItemPointerData));
- PopActiveSnapshot();
+ build_index_scan_keys(estate, index_keys, slot);
+
+ /* do a SnapshotDirty search for conflicting tuples */
+ for (i = 0; i < relinfo->ri_NumIndices; i++)
+ {
+ IndexInfo *ii = relinfo->ri_IndexRelationInfo[i];
+ bool found = false;
+
+ Assert(ii->ii_Expressions == NIL);
+
+ if (!ii->ii_Unique)
+ continue;
+
+ /* if conflict: wait */
+ found = find_pkey_tuple(index_keys[i],
+ rel, relinfo->ri_IndexRelationDescs[i],
+ oldslot, true, LockTupleExclusive);
+
+ /* alert if there's more than one conflicting unique key */
+ if (found &&
+ ItemPointerIsValid(&conflicting_tid) &&
+ !ItemPointerEquals(&oldslot->tts_tuple->t_self,
+ &conflicting_tid))
+ {
+ /* FIXME: improve logging here */
+ elog(ERROR, "diverging uniqueness conflict");
+ }
+ else if (found)
+ {
+ ItemPointerCopy(&oldslot->tts_tuple->t_self, &conflicting_tid);
+ conflict = true;
+ break;
+ }
+ else
+ ItemPointerSetInvalid(&conflicts[i]);
+
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ /*
+ * If there's a conflict use the version created later, otherwise do a
+ * plain insert.
+ */
+ if (conflict)
+ {
+ TransactionId xmin;
+ TimestampTz local_ts;
+ RepNodeId local_node_id;
+ bool apply_update;
+ bool log_update;
+ CommitExtraData local_node_id_raw;
+
+ /* refetch tuple, check for old commit ts & origin */
+ xmin = HeapTupleHeaderGetXmin(oldslot->tts_tuple->t_data);
+
+ /*
+ * We now need to determine whether to keep the original version of
+ * the row, or apply the insert (as an update) we received. We use
+ * the last-update-wins strategy for this, except when the new update
+ * comes from the same node that originated the previous version of
+ * the tuple.
+ */
+ TransactionIdGetCommitTsData(xmin, &local_ts, &local_node_id_raw);
+ local_node_id = local_node_id_raw;
+
+ check_apply_update(local_node_id, local_ts,
+ &apply_update, &log_update);
+
+ elog(LOG, "insert vs insert conflict: %s",
+ apply_update ? "update" : "ignore");
+
+ if (apply_update)
+ {
+ simple_heap_update(rel,
+ &oldslot->tts_tuple->t_self,
+ slot->tts_tuple);
+ /* races will be resolved by abort/retry */
+ UserTableUpdateOpenIndexes(estate, slot);
+ }
+ }
+ else
+ {
+ simple_heap_insert(rel, slot->tts_tuple);
+ /* races will be resolved by abort/retry */
+ UserTableUpdateOpenIndexes(estate, slot);
+ }
+
+ ExecCloseIndices(estate->es_result_relation_info);
check_sequencer_wakeup(rel);
LockRelId lockid = rel->rd_lockInfo.lockRelId;
TransactionId oldxid = GetTopTransactionId();
+ /* there never should be conflicts on these */
+ Assert(!conflict);
+
/*
* Release transaction bound resources for CONCURRENTLY support.
*/
Assert(idxrel->rd_index->indisunique);
- build_scan_key(skey, rel, idxrel, oldslot->tts_tuple);
+ build_index_scan_key(skey, rel, idxrel, oldslot);
PushActiveSnapshot(GetTransactionSnapshot());
/* look for tuple identified by the (old) primary key */
- found_tuple = find_pkey_tuple(skey, rel, idxrel, oldslot, true);
+ found_tuple = find_pkey_tuple(skey, rel, idxrel, oldslot, true,
+ pkey_sent ? LockTupleExclusive : LockTupleNoKeyExclusive);
if (found_tuple)
{
else if (local_tli < remote_tli)
*perform_update = true;
else if (local_tli > remote_tli)
- *perform_update = false;
+ *perform_update = false;
else
/* shouldn't happen */
elog(ERROR, "unsuccessful node comparison");
PushActiveSnapshot(GetTransactionSnapshot());
- build_scan_key(skey, rel, idxrel, slot->tts_tuple);
+ build_index_scan_key(skey, rel, idxrel, slot);
/* try to find tuple via a (candidate|primary) key */
- found_old = find_pkey_tuple(skey, rel, idxrel, slot, true);
+ found_old = find_pkey_tuple(skey, rel, idxrel, slot, true, LockTupleExclusive);
if (found_old)
{
static void
UserTableUpdateIndexes(EState *estate, TupleTableSlot *slot)
{
- List *recheckIndexes = NIL;
-
/* HOT update does not require index inserts */
if (HeapTupleIsHeapOnly(slot->tts_tuple))
return;
ExecOpenIndices(estate->es_result_relation_info);
+ UserTableUpdateOpenIndexes(estate, slot);
+ ExecCloseIndices(estate->es_result_relation_info);
+}
+
+static void
+UserTableUpdateOpenIndexes(EState *estate, TupleTableSlot *slot)
+{
+ List *recheckIndexes = NIL;
+
+ /* HOT update does not require index inserts */
+ if (HeapTupleIsHeapOnly(slot->tts_tuple))
+ return;
if (estate->es_result_relation_info->ri_NumIndices > 0)
{
/* FIXME: recheck the indexes */
list_free(recheckIndexes);
+}
- ExecCloseIndices(estate->es_result_relation_info);
+static void
+build_index_scan_keys(EState *estate, ScanKey *scan_keys, TupleTableSlot *slot)
+{
+ ResultRelInfo *relinfo;
+ int i;
+
+ relinfo = estate->es_result_relation_info;
+
+ /* build scankeys for each index */
+ for (i = 0; i < relinfo->ri_NumIndices; i++)
+ {
+ IndexInfo *ii = relinfo->ri_IndexRelationInfo[i];
+
+ if (!ii->ii_Unique)
+ continue;
+
+ scan_keys[i] = palloc(ii->ii_NumIndexAttrs * sizeof(ScanKeyData));
+ build_index_scan_key(scan_keys[i],
+ relinfo->ri_RelationDesc,
+ relinfo->ri_IndexRelationDescs[i],
+ slot);
+ }
}
/*
* is setup to match 'rel' (*NOT* idxrel!).
*/
static void
-build_scan_key(ScanKey skey, Relation rel, Relation idxrel, HeapTuple key)
+build_index_scan_key(ScanKey skey, Relation rel, Relation idxrel, TupleTableSlot *slot)
{
int attoff;
Datum indclassDatum;
bool isnull;
oidvector *opclass;
int2vector *indkey;
+ HeapTuple key = slot->tts_tuple;
indclassDatum = SysCacheGetAttr(INDEXRELID, idxrel->rd_indextuple,
Anum_pg_index_indclass, &isnull);
*/
static bool
find_pkey_tuple(ScanKey skey, Relation rel, Relation idxrel,
- TupleTableSlot *slot, bool lock)
+ TupleTableSlot *slot, bool lock, LockTupleMode mode)
{
HeapTuple scantuple;
- bool found = false;
+ bool found;
IndexScanDesc scan;
- Snapshot snap = GetActiveSnapshot();
-
- /*
- * XXX: should we use a different snapshot here to be able to get more
- * information about concurrent activity? For now we use a snapshot
- * isolation snapshot...
- */
+ SnapshotData snap;
+ TransactionId xwait;
+ InitDirtySnapshot(snap);
scan = index_beginscan(rel, idxrel,
- snap,
+ &snap,
RelationGetNumberOfAttributes(idxrel),
0);
+
+retry:
+ found = false;
+
index_rescan(scan, skey, RelationGetNumberOfAttributes(idxrel), NULL, 0);
- while ((scantuple = index_getnext(scan, ForwardScanDirection)) != NULL)
+ if ((scantuple = index_getnext(scan, ForwardScanDirection)) != NULL)
{
HeapTuple ht;
- if (found)
- elog(ERROR, "WTF, more than one tuple found via pk???");
+
found = true;
/*
* Ugly trick to track the HeapTupleData pointing into a buffer in the
memcpy(ht, scantuple, sizeof(HeapTupleData));
ExecStoreTuple(ht, slot, scan->xs_cbuf, false);
slot->tts_shouldFree = true;
- }
- index_endscan(scan);
+ xwait = TransactionIdIsValid(snap.xmin) ?
+ snap.xmin : snap.xmax;
+
+ if (TransactionIdIsValid(xwait))
+ {
+ XactLockTableWait(xwait);
+ goto retry;
+ }
+ }
if (lock && found)
{
ItemPointerCopy(&slot->tts_tuple->t_self, &locktup.t_self);
- res = heap_lock_tuple(rel, &locktup, snap->curcid, LockTupleExclusive,
+ PushActiveSnapshot(GetLatestSnapshot());
+
+ res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false), mode,
false /* wait */,
false /* don't follow updates */,
&buf, &hufd);
+ /* the tuple slot already has the buffer pinned */
+ ReleaseBuffer(buf);
+
+ PopActiveSnapshot();
+
switch (res)
{
case HeapTupleMayBeUpdated:
break;
case HeapTupleUpdated:
/* XXX: Improve handling here */
- ereport(ERROR,
+ ereport(LOG,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("concurrent update, retrying")));
+ goto retry;
default:
elog(ERROR, "unexpected HTSU_Result after locking: %u", res);
break;
}
- ReleaseBuffer(buf);
}
+ index_endscan(scan);
+
return found;
}