LockTupleMode mode, bool is_update,
TransactionId *result_xmax, uint16 *result_infomask,
uint16 *result_infomask2);
-static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple,
- ItemPointer ctid, TransactionId xid,
+static TM_Result heap_lock_updated_tuple(Relation rel,
+ uint16 prior_infomask,
+ TransactionId prior_rawxmax,
+ const ItemPointerData *prior_ctid,
+ TransactionId xid,
LockTupleMode mode);
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask,
uint16 *new_infomask2);
* If there are updates, follow the update chain; bail out if
* that cannot be done.
*/
- if (follow_updates && updated)
+ if (follow_updates && updated &&
+ !ItemPointerEquals(&tuple->t_self, &t_ctid))
{
TM_Result res;
- res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
+ res = heap_lock_updated_tuple(relation,
+ infomask, xwait, &t_ctid,
GetCurrentTransactionId(),
mode);
if (res != TM_Ok)
}
/* if there are updates, follow the update chain */
- if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
+ if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
+ !ItemPointerEquals(&tuple->t_self, &t_ctid))
{
TM_Result res;
- res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
+ res = heap_lock_updated_tuple(relation,
+ infomask, xwait, &t_ctid,
GetCurrentTransactionId(),
mode);
if (res != TM_Ok)
* version as well.
*/
static TM_Result
-heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid,
+heap_lock_updated_tuple_rec(Relation rel, TransactionId priorXmax,
+ const ItemPointerData *tid, TransactionId xid,
LockTupleMode mode)
{
TM_Result result;
old_infomask2;
TransactionId xmax,
new_xmax;
- TransactionId priorXmax = InvalidTransactionId;
bool cleared_all_frozen = false;
bool pinned_desired_page;
Buffer vmbuffer = InvalidBuffer;
* Follow update chain when locking an updated tuple, acquiring locks (row
* marks) on the updated versions.
*
- * The initial tuple is assumed to be already locked.
+ * 'prior_infomask', 'prior_raw_xmax' and 'prior_ctid' are the corresponding
+ * fields from the initial tuple. We will lock the tuples starting from the
+ * one that 'prior_ctid' points to. Note: This function does not lock the
+ * initial tuple itself.
*
* This function doesn't check visibility, it just unconditionally marks the
* tuple(s) as locked. If any tuple in the updated chain is being deleted
* levels, because that would lead to a serializability failure.
*/
static TM_Result
-heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid,
+heap_lock_updated_tuple(Relation rel,
+ uint16 prior_infomask,
+ TransactionId prior_raw_xmax,
+ const ItemPointerData *prior_ctid,
TransactionId xid, LockTupleMode mode)
{
+ INJECTION_POINT("heap_lock_updated_tuple", NULL);
+
/*
- * If the tuple has not been updated, or has moved into another partition
- * (effectively a delete) stop here.
+ * If the tuple has moved into another partition (effectively a delete)
+ * stop here.
*/
- if (!HeapTupleHeaderIndicatesMovedPartitions(tuple->t_data) &&
- !ItemPointerEquals(&tuple->t_self, ctid))
+ if (!ItemPointerIndicatesMovedPartitions(prior_ctid))
{
+ TransactionId prior_xmax;
+
/*
* If this is the first possibly-multixact-able operation in the
* current transaction, set my per-backend OldestMemberMXactId
*/
MultiXactIdSetOldestMember();
- return heap_lock_updated_tuple_rec(rel, ctid, xid, mode);
+ prior_xmax = (prior_infomask & HEAP_XMAX_IS_MULTI) ?
+ MultiXactIdGetUpdateXid(prior_raw_xmax, prior_infomask) : prior_raw_xmax;
+ return heap_lock_updated_tuple_rec(rel, prior_xmax, prior_ctid, xid, mode);
}
/* nothing to lock */
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: s1begin s1update s2lock s1abort vacuum reinsert wake
+step s1begin: BEGIN;
+step s1update: UPDATE t SET id = 10000 WHERE id = 1 RETURNING ctid;
+ctid
+-----
+(1,2)
+(1 row)
+
+step s2lock: select * from t where id = 1 for update; <waiting ...>
+step s1abort: ABORT;
+step vacuum: VACUUM t;
+step reinsert:
+ INSERT INTO t VALUES (10001) RETURNING ctid;
+ UPDATE t SET id = 10002 WHERE id = 10001 RETURNING ctid;
+
+ctid
+-----
+(1,2)
+(1 row)
+
+ctid
+-----
+(1,3)
+(1 row)
+
+step wake:
+ SELECT FROM injection_points_detach('heap_lock_updated_tuple');
+ SELECT FROM injection_points_wakeup('heap_lock_updated_tuple');
+ <waiting ...>
+step s2lock: <... completed>
+id
+--
+ 1
+(1 row)
+
+step wake: <... completed>
+
+starting permutation: s1begin s1update s2lock s1abort vacuum reinsert_and_lock wake
+step s1begin: BEGIN;
+step s1update: UPDATE t SET id = 10000 WHERE id = 1 RETURNING ctid;
+ctid
+-----
+(1,2)
+(1 row)
+
+step s2lock: select * from t where id = 1 for update; <waiting ...>
+step s1abort: ABORT;
+step vacuum: VACUUM t;
+step reinsert_and_lock:
+ BEGIN;
+ INSERT INTO t VALUES (10001) RETURNING ctid;
+ SELECT ctid, * FROM t WHERE id = 1 FOR UPDATE;
+ COMMIT;
+ UPDATE t SET id = 10002 WHERE id = 10001 returning ctid;
+
+ctid
+-----
+(1,2)
+(1 row)
+
+ctid |id
+-----+--
+(0,1)| 1
+(1 row)
+
+ctid
+-----
+(1,3)
+(1 row)
+
+step wake:
+ SELECT FROM injection_points_detach('heap_lock_updated_tuple');
+ SELECT FROM injection_points_wakeup('heap_lock_updated_tuple');
+ <waiting ...>
+step s2lock: <... completed>
+id
+--
+ 1
+(1 row)
+
+step wake: <... completed>
--- /dev/null
+# Test race condition in tuple locking
+#
+# This is a reproducer for the bug reported at:
+# https://www.postgresql.org/message-id/CAOG%2BRQ74x0q%3DkgBBQ%3DmezuvOeZBfSxM1qu_o0V28bwDz3dHxLw%40mail.gmail.com
+#
+# The bug was that when following an update chain when locking tuples,
+# we sometimes failed to check that the xmin on the next tuple matched
+# the prior's xmax. If the updated tuple version was vacuumed away and
+# the slot was reused for an unrelated tuple, we'd incorrectly follow
+# and lock the unrelated tuple.
+
+
+# Set up a test table with enough rows to fill a page. We need the
+# UPDATE used in the test to put the new tuple on a different page,
+# because otherwise the VACUUM cannot remove the aborted tuple because
+# we hold a pin on the first page.
+#
+# The exact number of rows inserted doesn't otherwise matter, but we
+# arrange things in a deterministic fashion so that the last inserted
+# tuple goes to (1,1), and the updated and aborted tuple goes to
+# (1,2). That way we can just memorize those ctids in the expected
+# output, to verify that the test exercises the scenario we want.
+setup
+{
+ CREATE EXTENSION injection_points;
+
+ CREATE TABLE t (id int PRIMARY KEY);
+ do $$
+ DECLARE
+ i int;
+ tid tid;
+ BEGIN
+ FOR i IN 1..5000 LOOP
+ INSERT INTO t VALUES (i) RETURNING ctid INTO tid;
+ IF tid = '(1,1)' THEN
+ RETURN;
+ END IF;
+ END LOOP;
+ RAISE 'expected to insert tuple to (1,1)';
+ END;
+ $$;
+}
+teardown
+{
+ DROP TABLE t;
+ DROP EXTENSION injection_points;
+}
+
+session s1
+step s1begin { BEGIN; }
+step s1update { UPDATE t SET id = 10000 WHERE id = 1 RETURNING ctid; }
+step s1abort { ABORT; }
+step vacuum { VACUUM t; }
+
+# Insert a new tuple, and update it.
+step reinsert {
+ INSERT INTO t VALUES (10001) RETURNING ctid;
+ UPDATE t SET id = 10002 WHERE id = 10001 RETURNING ctid;
+}
+
+# Same as the 'reinsert' step, but for extra confusion, we also stamp
+# the original tuple with the same 'xmax' as the re-inserted one.
+step reinsert_and_lock {
+ BEGIN;
+ INSERT INTO t VALUES (10001) RETURNING ctid;
+ SELECT ctid, * FROM t WHERE id = 1 FOR UPDATE;
+ COMMIT;
+ UPDATE t SET id = 10002 WHERE id = 10001 returning ctid;
+}
+
+step wake {
+ SELECT FROM injection_points_detach('heap_lock_updated_tuple');
+ SELECT FROM injection_points_wakeup('heap_lock_updated_tuple');
+}
+
+session s2
+setup {
+ SELECT FROM injection_points_set_local();
+ SELECT FROM injection_points_attach('heap_lock_updated_tuple', 'wait');
+}
+step s2lock { select * from t where id = 1 for update; }
+
+permutation
+ # Begin transaction, update a row. Because of how we set up the
+ # test table, the updated tuple lands at (1,2)
+ s1begin
+ s1update
+
+ # While the updating transaction is open, start a new session that
+ # tries to lock the row. This blocks on the open transaction.
+ s2lock
+
+ # Abort the updating transaction. This unblocks session 2, but it
+ # will immediately hit the injection point and block on that.
+ s1abort
+ # Vacuum away the updated, aborted tuple.
+ vacuum
+
+ # Insert a new tuple. It lands at the same location where the
+ # updated tuple was.
+ reinsert
+
+ # Let the locking transaction continue. It should lock the
+ # original tuple, ignoring the re-inserted tuple.
+ wake(s2lock)
+
+# Variant where the re-inserted tuple is also locked by the inserting
+# transaction. This failed an earlier version of the fix during
+# development.
+permutation
+ s1begin
+ s1update
+ s2lock
+ s1abort
+ vacuum
+ reinsert_and_lock
+ wake(s2lock)