From 0f42206531b3646f5bcda2bd35bb53fb0488eb00 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Tue, 16 Sep 2025 06:16:23 +0000 Subject: [PATCH] Fix intermittent BF failures in 035_conflicts. This commit addresses two sources of instability in the 035_conflicts test: Unstable VACUUM usage: The test previously relied on VACUUM to remove a deleted column, which can be unreliable due to concurrent background writer or checkpoint activity that may lock the page containing the deleted tuple. Since the test already verifies that replication_slot.xmin has advanced sufficiently to confirm the feature's correctness, so, the VACUUM step is removed to improve test stability. Timing-sensitive retention resumption check: The test includes a check to confirm that retention of conflict-relevant information resumes after setting max_retention_duration to 0. However, in some cases, the apply worker resumes retention immediately after the inactive slot is removed from synchronized_standby_slots, even before max_retention_duration is updated. This can happen if remote changes are applied in under 1ms, causing the test to timeout while waiting for a later log position. To ensure consistent behavior, this commit delays the removal of synchronized_standby_slots until after max_retention_duration is set to 0. Author: Zhijie Hou Reviewed-by: shveta malik Reviewed-by: Amit Kapila Discussion: https://postgr.es/m/TY4PR01MB16907805DE4816E53C54708159414A@TY4PR01MB16907.jpnprd01.prod.outlook.com --- src/test/subscription/t/035_conflicts.pl | 32 ++++++++---------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl index f2aee0f70df..a526986c4e4 100644 --- a/src/test/subscription/t/035_conflicts.pl +++ b/src/test/subscription/t/035_conflicts.pl @@ -342,15 +342,6 @@ ok( $node_A->poll_query_until( ), "the xmin value of slot 'pg_conflict_detection' is updated on Node A"); -# Confirm that the dead tuple can be removed now -($cmdret, $stdout, $stderr) = $node_A->psql( - 'postgres', qq(VACUUM (verbose) public.tab;) -); - -ok( $stderr =~ - qr/1 removed, 1 remain, 0 are dead but not yet removable/, - 'the deleted column is removed'); - ############################################################################### # Ensure that the deleted tuple needed to detect an update_deleted conflict is # accessible via a sequential table scan. @@ -555,13 +546,6 @@ if ($injection_points_supported != 0) "the xmin value of slot 'pg_conflict_detection' is updated on subscriber" ); - # Confirm that the dead tuple can be removed now - ($cmdret, $stdout, $stderr) = - $node_A->psql('postgres', qq(VACUUM (verbose) public.tab;)); - - ok($stderr =~ qr/1 removed, 0 remain, 0 are dead but not yet removable/, - 'the deleted column is removed'); - # Get the commit timestamp for the publisher's update my $pub_ts = $node_B->safe_psql('postgres', "SELECT pg_xact_commit_timestamp(xmin) from tab where a=1;"); @@ -625,12 +609,6 @@ $result = $node_A->safe_psql('postgres', "SELECT subretentionactive FROM pg_subscription WHERE subname='$subname_AB';"); is($result, qq(f), 'retention is inactive'); -# Drop the physical slot and reset the synchronized_standby_slots setting -$node_B->safe_psql('postgres', - "SELECT * FROM pg_drop_replication_slot('blocker');"); -$node_B->adjust_conf('postgresql.conf', 'synchronized_standby_slots', "''"); -$node_B->reload; - ############################################################################### # Check that dead tuple retention resumes when the max_retention_duration is set # 0. @@ -642,6 +620,16 @@ $log_offset = -s $node_A->logfile; $node_A->safe_psql('postgres', "ALTER SUBSCRIPTION $subname_AB SET (max_retention_duration = 0);"); +# Drop the physical slot and reset the synchronized_standby_slots setting. We +# change this after setting max_retention_duration to 0, ensuring consistent +# results in the test as the resumption becomes possible immediately after +# resetting synchronized_standby_slots, due to the smaller max_retention_duration +# value of 1ms. +$node_B->safe_psql('postgres', + "SELECT * FROM pg_drop_replication_slot('blocker');"); +$node_B->adjust_conf('postgresql.conf', 'synchronized_standby_slots', "''"); +$node_B->reload; + # Confirm that the retention resumes $node_A->wait_for_log( qr/logical replication worker for subscription "tap_sub_a_b" will resume retaining the information for detecting conflicts -- 2.39.5