amcheck: Fix snapshot usage in bt_index_parent_check

author Álvaro Herrera <alvherre@kurilemu.de>

Thu, 4 Dec 2025 17:12:08 +0000 (18:12 +0100)

committer Álvaro Herrera <alvherre@kurilemu.de>

Thu, 4 Dec 2025 17:12:08 +0000 (18:12 +0100)
author Álvaro Herrera <alvherre@kurilemu.de>
Thu, 4 Dec 2025 17:12:08 +0000 (18:12 +0100)
committer Álvaro Herrera <alvherre@kurilemu.de>
Thu, 4 Dec 2025 17:12:08 +0000 (18:12 +0100)
diff --git a/contrib/amcheck/t/002_cic.pl b/contrib/amcheck/t/002_cic.pl

index 6a0c4f611258ff9612a19a2a03b242a702662225..f4a24936b2c01bb3add51aef791be98da17284db 100644 (file)
--- a/contrib/amcheck/t/002_cic.pl
+++ b/contrib/amcheck/t/002_cic.pl
@@ -64,5 +64,28 @@ $node->pgbench(
           )
     });
  
+# Test bt_index_parent_check() with indexes created with
+# CREATE INDEX CONCURRENTLY.
+$node->safe_psql('postgres', q(CREATE TABLE quebec(i int primary key)));
+# Insert two rows into index
+$node->safe_psql('postgres',
+   q(INSERT INTO quebec SELECT i FROM generate_series(1, 2) s(i);));
+
+# start background transaction
+my $in_progress_h = $node->background_psql('postgres');
+$in_progress_h->query_safe(q(BEGIN; SELECT pg_current_xact_id();));
+
+# delete one row from table, while background transaction is in progress
+$node->safe_psql('postgres', q(DELETE FROM quebec WHERE i = 1;));
+# create index concurrently, which will skip the deleted row
+$node->safe_psql('postgres', q(CREATE INDEX CONCURRENTLY oscar ON quebec(i);));
+
+# check index using bt_index_parent_check
+my $result = $node->psql('postgres',
+   q(SELECT bt_index_parent_check('oscar', heapallindexed => true)));
+is($result, '0', 'bt_index_parent_check for CIC after removed row');
+
+$in_progress_h->quit;
+
  $node->stop;
  done_testing();
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c

index 75751e2a1e90da762e6a81d852b8873e258405cd..fce8bd9f9f94fbe8c2beb3cc1fad8691a70ce642 100644 (file)
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -382,7 +382,6 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
     BTMetaPageData *metad;
     uint32      previouslevel;
     BtreeLevel  current;
-   Snapshot    snapshot = SnapshotAny;
  
     if (!readonly)
         elog(DEBUG1, "verifying consistency of tree structure for index \"%s\"",
@@ -433,54 +432,46 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
         state->heaptuplespresent = 0;
  
         /*
-        * Register our own snapshot in !readonly case, rather than asking
+        * Register our own snapshot for heapallindexed, rather than asking
          * table_index_build_scan() to do this for us later.  This needs to
          * happen before index fingerprinting begins, so we can later be
          * certain that index fingerprinting should have reached all tuples
          * returned by table_index_build_scan().
          */
-       if (!state->readonly)
-       {
-           snapshot = RegisterSnapshot(GetTransactionSnapshot());
+       state->snapshot = RegisterSnapshot(GetTransactionSnapshot());
  
-           /*
-            * GetTransactionSnapshot() always acquires a new MVCC snapshot in
-            * READ COMMITTED mode.  A new snapshot is guaranteed to have all
-            * the entries it requires in the index.
-            *
-            * We must defend against the possibility that an old xact
-            * snapshot was returned at higher isolation levels when that
-            * snapshot is not safe for index scans of the target index.  This
-            * is possible when the snapshot sees tuples that are before the
-            * index's indcheckxmin horizon.  Throwing an error here should be
-            * very rare.  It doesn't seem worth using a secondary snapshot to
-            * avoid this.
-            */
-           if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
-               !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
-                                      snapshot->xmin))
-               ereport(ERROR,
-                       (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                        errmsg("index \"%s\" cannot be verified using transaction snapshot",
-                               RelationGetRelationName(rel))));
-       }
+       /*
+        * GetTransactionSnapshot() always acquires a new MVCC snapshot in
+        * READ COMMITTED mode.  A new snapshot is guaranteed to have all the
+        * entries it requires in the index.
+        *
+        * We must defend against the possibility that an old xact snapshot
+        * was returned at higher isolation levels when that snapshot is not
+        * safe for index scans of the target index.  This is possible when
+        * the snapshot sees tuples that are before the index's indcheckxmin
+        * horizon.  Throwing an error here should be very rare.  It doesn't
+        * seem worth using a secondary snapshot to avoid this.
+        */
+       if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
+           !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+                                  state->snapshot->xmin))
+           ereport(ERROR,
+                   errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                   errmsg("index \"%s\" cannot be verified using transaction snapshot",
+                          RelationGetRelationName(rel)));
     }
  
     /*
-    * We need a snapshot to check the uniqueness of the index. For better
-    * performance take it once per index check. If snapshot already taken
-    * reuse it.
+    * We need a snapshot to check the uniqueness of the index.  For better
+    * performance, take it once per index check.  If one was already taken
+    * above, use that.
      */
     if (state->checkunique)
     {
         state->indexinfo = BuildIndexInfo(state->rel);
-       if (state->indexinfo->ii_Unique)
-       {
-           if (snapshot != SnapshotAny)
-               state->snapshot = snapshot;
-           else
-               state->snapshot = RegisterSnapshot(GetTransactionSnapshot());
-       }
+
+       if (state->indexinfo->ii_Unique && state->snapshot == InvalidSnapshot)
+           state->snapshot = RegisterSnapshot(GetTransactionSnapshot());
     }
  
     Assert(!state->rootdescend || state->readonly);
@@ -555,13 +546,12 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
         /*
          * Create our own scan for table_index_build_scan(), rather than
          * getting it to do so for us.  This is required so that we can
-        * actually use the MVCC snapshot registered earlier in !readonly
-        * case.
+        * actually use the MVCC snapshot registered earlier.
          *
          * Note that table_index_build_scan() calls heap_endscan() for us.
          */
         scan = table_beginscan_strat(state->heaprel,    /* relation */
-                                    snapshot,  /* snapshot */
+                                    state->snapshot,   /* snapshot */
                                      0, /* number of keys */
                                      NULL,  /* scan key */
                                      true,  /* buffer access strategy OK */
@@ -569,16 +559,15 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
  
         /*
          * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY
-        * behaves in !readonly case.
+        * behaves.
          *
          * It's okay that we don't actually use the same lock strength for the
-        * heap relation as any other ii_Concurrent caller would in !readonly
-        * case.  We have no reason to care about a concurrent VACUUM
-        * operation, since there isn't going to be a second scan of the heap
-        * that needs to be sure that there was no concurrent recycling of
-        * TIDs.
+        * heap relation as any other ii_Concurrent caller would.  We have no
+        * reason to care about a concurrent VACUUM operation, since there
+        * isn't going to be a second scan of the heap that needs to be sure
+        * that there was no concurrent recycling of TIDs.
          */
-       indexinfo->ii_Concurrent = !state->readonly;
+       indexinfo->ii_Concurrent = true;
  
         /*
          * Don't wait for uncommitted tuple xact commit/abort when index is a
@@ -602,14 +591,11 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
                                  state->heaptuplespresent, RelationGetRelationName(heaprel),
                                  100.0 * bloom_prop_bits_set(state->filter))));
  
-       if (snapshot != SnapshotAny)
-           UnregisterSnapshot(snapshot);
-
         bloom_free(state->filter);
     }
  
     /* Be tidy: */
-   if (snapshot == SnapshotAny && state->snapshot != InvalidSnapshot)
+   if (state->snapshot != InvalidSnapshot)
         UnregisterSnapshot(state->snapshot);
     MemoryContextDelete(state->targetcontext);
  }
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml

index 0aff0a6c8c6fc178c1293cfaa82c5b5ffbd8031d..08006856579ad115dd2c6b5a6b2c9b8c2e1452cb 100644 (file)
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -382,7 +382,7 @@ SET client_min_messages = DEBUG1;
    verification functions is <literal>true</literal>, an additional
    phase of verification is performed against the table associated with
    the target index relation.  This consists of a <quote>dummy</quote>
-  <command>CREATE INDEX</command> operation, which checks for the
+  <command>CREATE INDEX CONCURRENTLY</command> operation, which checks for the
    presence of all hypothetical new index tuples against a temporary,
    in-memory summarizing structure (this is built when needed during
    the basic first phase of verification).  The summarizing structure
author	Álvaro Herrera <alvherre@kurilemu.de>
	Thu, 4 Dec 2025 17:12:08 +0000 (18:12 +0100)
committer	Álvaro Herrera <alvherre@kurilemu.de>
	Thu, 4 Dec 2025 17:12:08 +0000 (18:12 +0100)
contrib/amcheck/t/002_cic.pl		patch \| blob \| blame \| history
contrib/amcheck/verify_nbtree.c		patch \| blob \| blame \| history
doc/src/sgml/amcheck.sgml		patch \| blob \| blame \| history