From: Dean Rasheed Date: Fri, 5 Sep 2025 07:23:57 +0000 (+0100) Subject: Fix concurrent update issue with MERGE. X-Git-Url: http://waps.l3s.uni-hannover.de/gitweb/?a=commitdiff_plain;h=21a61b87f47b6a1b359021bf9f71b14bbf805330;p=postgresql.git Fix concurrent update issue with MERGE. When executing a MERGE UPDATE action, if there is more than one concurrent update of the target row, the lock-and-retry code would sometimes incorrectly identify the latest version of the target tuple, leading to incorrect results. This was caused by using the ctid field from the TM_FailureData returned by table_tuple_lock() in a case where the result was TM_Ok, which is unsafe because the TM_FailureData struct is not guaranteed to be fully populated in that case. Instead, it should use the tupleid passed to (and updated by) table_tuple_lock(). To reduce the chances of similar errors in the future, improve the commentary for table_tuple_lock() and TM_FailureData to make it clearer that table_tuple_lock() updates the tid passed to it, and most fields of TM_FailureData should not be relied on in non-failure cases. An exception to this is the "traversed" field, which is set in both success and failure cases. Reported-by: Dmitry Author: Yugo Nagata Reviewed-by: Dean Rasheed Reviewed-by: Chao Li Discussion: https://postgr.es/m/1570d30e-2b95-4239-b9c3-f7bf2f2f8556@yandex.ru Backpatch-through: 15 --- diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index e0dc62d31be..6597356c018 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -3146,7 +3146,7 @@ lmerge_matched: * the tuple moved, and setting our current * resultRelInfo to that. */ - if (ItemPointerIndicatesMovedPartitions(&context->tmfd.ctid)) + if (ItemPointerIndicatesMovedPartitions(tupleid)) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("tuple to be deleted was already moved to another partition due to concurrent update"))); @@ -3158,14 +3158,14 @@ lmerge_matched: * that the first qualifying WHEN MATCHED action * is executed. * - * Update tupleid to that of the new tuple, for - * the refetch we do at the top. + * tupleid has been updated to that of the new + * tuple, as required for the refetch we do at the + * top. */ if (resultRelInfo->ri_needLockTagTuple) UnlockTuple(resultRelInfo->ri_RelationDesc, &lockedtid, InplaceUpdateTupleLock); - ItemPointerCopy(&context->tmfd.ctid, tupleid); goto lmerge_matched; case TM_Deleted: diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 2e919d569af..1a399e24460 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -121,7 +121,9 @@ typedef enum TU_UpdateIndexes /* * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail * because the target tuple is already outdated, they fill in this struct to - * provide information to the caller about what happened. + * provide information to the caller about what happened. When those functions + * succeed, the contents of this struct should not be relied upon, except for + * `traversed`, which may be set in both success and failure cases. * * ctid is the target's ctid link: it is the same as the target's TID if the * target was deleted, or the location of the replacement tuple if the target @@ -137,6 +139,9 @@ typedef enum TU_UpdateIndexes * tuple); otherwise cmax is zero. (We make this restriction because * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other * transactions.) + * + * traversed indicates if an update chain was followed in order to try to lock + * the target tuple. (This may be set in both success and failure cases.) */ typedef struct TM_FailureData { @@ -1544,7 +1549,7 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, * * Input parameters: * relation: relation containing tuple (caller must hold suitable lock) - * tid: TID of tuple to lock + * tid: TID of tuple to lock (updated if an update chain was followed) * snapshot: snapshot to use for visibility determinations * cid: current command ID (used for visibility test, and stored into * tuple's cmax if lock is successful) @@ -1569,8 +1574,10 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, * TM_WouldBlock: lock couldn't be acquired and wait_policy is skip * * In the failure cases other than TM_Invisible and TM_Deleted, the routine - * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax. See - * comments for struct TM_FailureData for additional info. + * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax. + * Additionally, in both success and failure cases, tmfd->traversed is set if + * an update chain was followed. See comments for struct TM_FailureData for + * additional info. */ static inline TM_Result table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot, diff --git a/src/test/isolation/expected/merge-match-recheck.out b/src/test/isolation/expected/merge-match-recheck.out index 9a44a595927..717f12b3b90 100644 --- a/src/test/isolation/expected/merge-match-recheck.out +++ b/src/test/isolation/expected/merge-match-recheck.out @@ -262,6 +262,142 @@ key|balance|status|val step c1: COMMIT; +starting permutation: update1 update6 merge_bal c2 select1 c1 +step update1: UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1; +step update6: UPDATE target t SET balance = balance - 100, val = t.val || ' updated by update6' WHERE t.key = 1; +step merge_bal: + MERGE INTO target t + USING (SELECT 1 as key) s + ON s.key = t.key + WHEN MATCHED AND balance < 100 THEN + UPDATE SET balance = balance * 2, val = t.val || ' when1' + WHEN MATCHED AND balance < 200 THEN + UPDATE SET balance = balance * 4, val = t.val || ' when2' + WHEN MATCHED AND balance < 300 THEN + UPDATE SET balance = balance * 8, val = t.val || ' when3'; + +step c2: COMMIT; +step merge_bal: <... completed> +step select1: SELECT * FROM target; +key|balance|status|val +---+-------+------+------------------------------------------------- + 1| 140|s1 |setup updated by update1 updated by update6 when1 +(1 row) + +step c1: COMMIT; + +starting permutation: update1_pa update6_pa merge_bal_pa c2 select1_pa c1 +step update1_pa: UPDATE target_pa t SET balance = balance + 10, val = t.val || ' updated by update1_pa' WHERE t.key = 1; +step update6_pa: UPDATE target_pa t SET balance = balance - 100, val = t.val || ' updated by update6_pa' WHERE t.key = 1; +step merge_bal_pa: + MERGE INTO target_pa t + USING (SELECT 1 as key) s + ON s.key = t.key + WHEN MATCHED AND balance < 100 THEN + UPDATE SET balance = balance * 2, val = t.val || ' when1' + WHEN MATCHED AND balance < 200 THEN + UPDATE SET balance = balance * 4, val = t.val || ' when2' + WHEN MATCHED AND balance < 300 THEN + UPDATE SET balance = balance * 8, val = t.val || ' when3'; + +step c2: COMMIT; +step merge_bal_pa: <... completed> +step select1_pa: SELECT * FROM target_pa; +key|balance|status|val +---+-------+------+------------------------------------------------------- + 1| 140|s1 |setup updated by update1_pa updated by update6_pa when1 +(1 row) + +step c1: COMMIT; + +starting permutation: update1_tg update6_tg merge_bal_tg c2 select1_tg c1 +s2: NOTICE: Update: (1,160,s1,setup) -> (1,170,s1,"setup updated by update1_tg") +step update1_tg: UPDATE target_tg t SET balance = balance + 10, val = t.val || ' updated by update1_tg' WHERE t.key = 1; +s2: NOTICE: Update: (1,170,s1,"setup updated by update1_tg") -> (1,70,s1,"setup updated by update1_tg updated by update6_tg") +step update6_tg: UPDATE target_tg t SET balance = balance - 100, val = t.val || ' updated by update6_tg' WHERE t.key = 1; +step merge_bal_tg: + MERGE INTO target_tg t + USING (SELECT 1 as key) s + ON s.key = t.key + WHEN MATCHED AND balance < 100 THEN + UPDATE SET balance = balance * 2, val = t.val || ' when1' + WHEN MATCHED AND balance < 200 THEN + UPDATE SET balance = balance * 4, val = t.val || ' when2' + WHEN MATCHED AND balance < 300 THEN + UPDATE SET balance = balance * 8, val = t.val || ' when3'; + +step c2: COMMIT; +s1: NOTICE: Update: (1,70,s1,"setup updated by update1_tg updated by update6_tg") -> (1,140,s1,"setup updated by update1_tg updated by update6_tg when1") +step merge_bal_tg: <... completed> +step select1_tg: SELECT * FROM target_tg; +key|balance|status|val +---+-------+------+------------------------------------------------------- + 1| 140|s1 |setup updated by update1_tg updated by update6_tg when1 +(1 row) + +step c1: COMMIT; + +starting permutation: update7 update6 merge_bal c2 select1 c1 +step update7: UPDATE target t SET balance = 350, val = t.val || ' updated by update7' WHERE t.key = 1; +step update6: UPDATE target t SET balance = balance - 100, val = t.val || ' updated by update6' WHERE t.key = 1; +step merge_bal: + MERGE INTO target t + USING (SELECT 1 as key) s + ON s.key = t.key + WHEN MATCHED AND balance < 100 THEN + UPDATE SET balance = balance * 2, val = t.val || ' when1' + WHEN MATCHED AND balance < 200 THEN + UPDATE SET balance = balance * 4, val = t.val || ' when2' + WHEN MATCHED AND balance < 300 THEN + UPDATE SET balance = balance * 8, val = t.val || ' when3'; + +step c2: COMMIT; +step merge_bal: <... completed> +step select1: SELECT * FROM target; +key|balance|status|val +---+-------+------+------------------------------------------------- + 1| 2000|s1 |setup updated by update7 updated by update6 when3 +(1 row) + +step c1: COMMIT; + +starting permutation: update1_pa_move merge_bal_pa c2 c1 +step update1_pa_move: UPDATE target_pa t SET balance = 210, val = t.val || ' updated by update1_pa_move' WHERE t.key = 1; +step merge_bal_pa: + MERGE INTO target_pa t + USING (SELECT 1 as key) s + ON s.key = t.key + WHEN MATCHED AND balance < 100 THEN + UPDATE SET balance = balance * 2, val = t.val || ' when1' + WHEN MATCHED AND balance < 200 THEN + UPDATE SET balance = balance * 4, val = t.val || ' when2' + WHEN MATCHED AND balance < 300 THEN + UPDATE SET balance = balance * 8, val = t.val || ' when3'; + +step c2: COMMIT; +step merge_bal_pa: <... completed> +ERROR: tuple to be locked was already moved to another partition due to concurrent update +step c1: COMMIT; + +starting permutation: update1_pa update1_pa_move merge_bal_pa c2 c1 +step update1_pa: UPDATE target_pa t SET balance = balance + 10, val = t.val || ' updated by update1_pa' WHERE t.key = 1; +step update1_pa_move: UPDATE target_pa t SET balance = 210, val = t.val || ' updated by update1_pa_move' WHERE t.key = 1; +step merge_bal_pa: + MERGE INTO target_pa t + USING (SELECT 1 as key) s + ON s.key = t.key + WHEN MATCHED AND balance < 100 THEN + UPDATE SET balance = balance * 2, val = t.val || ' when1' + WHEN MATCHED AND balance < 200 THEN + UPDATE SET balance = balance * 4, val = t.val || ' when2' + WHEN MATCHED AND balance < 300 THEN + UPDATE SET balance = balance * 8, val = t.val || ' when3'; + +step c2: COMMIT; +step merge_bal_pa: <... completed> +ERROR: tuple to be locked was already moved to another partition due to concurrent update +step c1: COMMIT; + starting permutation: update1 merge_delete c2 select1 c1 step update1: UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1; step merge_delete: diff --git a/src/test/isolation/specs/merge-match-recheck.spec b/src/test/isolation/specs/merge-match-recheck.spec index 298b2bfdcd6..57e32d7f80f 100644 --- a/src/test/isolation/specs/merge-match-recheck.spec +++ b/src/test/isolation/specs/merge-match-recheck.spec @@ -142,6 +142,8 @@ setup BEGIN ISOLATION LEVEL READ COMMITTED; } step "update1" { UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1; } +step "update1_pa" { UPDATE target_pa t SET balance = balance + 10, val = t.val || ' updated by update1_pa' WHERE t.key = 1; } +step "update1_pa_move" { UPDATE target_pa t SET balance = 210, val = t.val || ' updated by update1_pa_move' WHERE t.key = 1; } step "update1_tg" { UPDATE target_tg t SET balance = balance + 10, val = t.val || ' updated by update1_tg' WHERE t.key = 1; } step "update2" { UPDATE target t SET status = 's2', val = t.val || ' updated by update2' WHERE t.key = 1; } step "update2_tg" { UPDATE target_tg t SET status = 's2', val = t.val || ' updated by update2_tg' WHERE t.key = 1; } @@ -149,6 +151,10 @@ step "update3" { UPDATE target t SET status = 's3', val = t.val || ' updated by step "update3_tg" { UPDATE target_tg t SET status = 's3', val = t.val || ' updated by update3_tg' WHERE t.key = 1; } step "update5" { UPDATE target t SET status = 's5', val = t.val || ' updated by update5' WHERE t.key = 1; } step "update5_tg" { UPDATE target_tg t SET status = 's5', val = t.val || ' updated by update5_tg' WHERE t.key = 1; } +step "update6" { UPDATE target t SET balance = balance - 100, val = t.val || ' updated by update6' WHERE t.key = 1; } +step "update6_pa" { UPDATE target_pa t SET balance = balance - 100, val = t.val || ' updated by update6_pa' WHERE t.key = 1; } +step "update6_tg" { UPDATE target_tg t SET balance = balance - 100, val = t.val || ' updated by update6_tg' WHERE t.key = 1; } +step "update7" { UPDATE target t SET balance = 350, val = t.val || ' updated by update7' WHERE t.key = 1; } step "update_bal1" { UPDATE target t SET balance = 50, val = t.val || ' updated by update_bal1' WHERE t.key = 1; } step "update_bal1_pa" { UPDATE target_pa t SET balance = 50, val = t.val || ' updated by update_bal1_pa' WHERE t.key = 1; } step "update_bal1_tg" { UPDATE target_tg t SET balance = 50, val = t.val || ' updated by update_bal1_tg' WHERE t.key = 1; } @@ -175,6 +181,18 @@ permutation "update_bal1" "merge_bal" "c2" "select1" "c1" permutation "update_bal1_pa" "merge_bal_pa" "c2" "select1_pa" "c1" permutation "update_bal1_tg" "merge_bal_tg" "c2" "select1_tg" "c1" +# merge_bal sees row concurrently updated twice and rechecks WHEN conditions, different check passes, so final balance = 140 +permutation "update1" "update6" "merge_bal" "c2" "select1" "c1" +permutation "update1_pa" "update6_pa" "merge_bal_pa" "c2" "select1_pa" "c1" +permutation "update1_tg" "update6_tg" "merge_bal_tg" "c2" "select1_tg" "c1" + +# merge_bal sees row concurrently updated twice, first update would cause all checks to fail, second update causes different check to pass, so final balance = 2000 +permutation "update7" "update6" "merge_bal" "c2" "select1" "c1" + +# merge_bal sees concurrently updated row moved to new partition, so fails +permutation "update1_pa_move" "merge_bal_pa" "c2" "c1" +permutation "update1_pa" "update1_pa_move" "merge_bal_pa" "c2" "c1" + # merge_delete sees concurrently updated row and rechecks WHEN conditions, but recheck passes and row is deleted permutation "update1" "merge_delete" "c2" "select1" "c1" permutation "update1_tg" "merge_delete_tg" "c2" "select1_tg" "c1"