amcheck: support for GiST
Hello hackers!
In PG19 we made a step in improving amcheck and added GIN support.
This is a thread that continues previous work [0]/messages/by-id/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1@yandex-team.ru.
Please find attached two new steps for amcheck:
1. A function to verify GiST integrity. This patch is in decent shape, simply rebased from previous year.
2. Support on pg_amcheck's side for this function. This patch did not receive such review attention before. And, perhaps, should be extended to support existing GIN functions.
I'll put this thread into July commitfest.
Thanks!
Best regards, Andrey Borodin.
[0]: /messages/by-id/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1@yandex-team.ru
Attachments:
v2025-06-30-0002-Add-GiST-support-to-pg_amcheck.patchapplication/octet-stream; name=v2025-06-30-0002-Add-GiST-support-to-pg_amcheck.patch; x-unix-mode=0644Download
From 982778c382d8bf84c8832665e7cffaeb5e1cb23d Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Mon, 30 Jun 2025 14:20:13 +0300
Subject: [PATCH v2025-06-30 2/2] Add GiST support to pg_amcheck
Proof of concept patch for pg_amcheck binary support
for GIST index checks.
Author: Andrey Borodin <amborodin@acm.org>
Reviewed-by: Alexander Lakhin <exclusion@gmail.com>
Reviewed-By: Tomas Vondra <tomas.vondra@enterprisedb.com>
---
src/bin/pg_amcheck/pg_amcheck.c | 290 ++++++++++++++++-----------
src/bin/pg_amcheck/t/002_nonesuch.pl | 8 +-
src/bin/pg_amcheck/t/003_check.pl | 65 ++++--
3 files changed, 220 insertions(+), 143 deletions(-)
diff --git a/src/bin/pg_amcheck/pg_amcheck.c b/src/bin/pg_amcheck/pg_amcheck.c
index 2b1fd566c35..e6a0e6d70ab 100644
--- a/src/bin/pg_amcheck/pg_amcheck.c
+++ b/src/bin/pg_amcheck/pg_amcheck.c
@@ -40,8 +40,7 @@ typedef struct PatternInfo
* NULL */
bool heap_only; /* true if rel_regex should only match heap
* tables */
- bool btree_only; /* true if rel_regex should only match btree
- * indexes */
+ bool index_only; /* true if rel_regex should only match indexes */
bool matched; /* true if the pattern matched in any database */
} PatternInfo;
@@ -75,10 +74,9 @@ typedef struct AmcheckOptions
/*
* As an optimization, if any pattern in the exclude list applies to heap
- * tables, or similarly if any such pattern applies to btree indexes, or
- * to schemas, then these will be true, otherwise false. These should
- * always agree with what you'd conclude by grep'ing through the exclude
- * list.
+ * tables, or similarly if any such pattern applies to indexes, or to
+ * schemas, then these will be true, otherwise false. These should always
+ * agree with what you'd conclude by grep'ing through the exclude list.
*/
bool excludetbl;
bool excludeidx;
@@ -99,14 +97,14 @@ typedef struct AmcheckOptions
int64 endblock;
const char *skip;
- /* btree index checking options */
+ /* index checking options */
bool parent_check;
bool rootdescend;
bool heapallindexed;
bool checkunique;
- /* heap and btree hybrid option */
- bool no_btree_expansion;
+ /* heap and indexes hybrid option */
+ bool no_index_expansion;
} AmcheckOptions;
static AmcheckOptions opts = {
@@ -135,7 +133,7 @@ static AmcheckOptions opts = {
.rootdescend = false,
.heapallindexed = false,
.checkunique = false,
- .no_btree_expansion = false
+ .no_index_expansion = false
};
static const char *progname = NULL;
@@ -152,13 +150,15 @@ typedef struct DatabaseInfo
char *datname;
char *amcheck_schema; /* escaped, quoted literal */
bool is_checkunique;
+ bool gist_supported;
} DatabaseInfo;
typedef struct RelationInfo
{
const DatabaseInfo *datinfo; /* shared by other relinfos */
Oid reloid;
- bool is_heap; /* true if heap, false if btree */
+ Oid amoid;
+ bool is_heap; /* true if heap, false if index */
char *nspname;
char *relname;
int relpages;
@@ -179,10 +179,12 @@ static void prepare_heap_command(PQExpBuffer sql, RelationInfo *rel,
PGconn *conn);
static void prepare_btree_command(PQExpBuffer sql, RelationInfo *rel,
PGconn *conn);
+static void prepare_gist_command(PQExpBuffer sql, RelationInfo *rel,
+ PGconn *conn);
static void run_command(ParallelSlot *slot, const char *sql);
static bool verify_heap_slot_handler(PGresult *res, PGconn *conn,
void *context);
-static bool verify_btree_slot_handler(PGresult *res, PGconn *conn, void *context);
+static bool verify_index_slot_handler(PGresult *res, PGconn *conn, void *context);
static void help(const char *progname);
static void progress_report(uint64 relations_total, uint64 relations_checked,
uint64 relpages_total, uint64 relpages_checked,
@@ -196,7 +198,7 @@ static void append_relation_pattern(PatternInfoArray *pia, const char *pattern,
int encoding);
static void append_heap_pattern(PatternInfoArray *pia, const char *pattern,
int encoding);
-static void append_btree_pattern(PatternInfoArray *pia, const char *pattern,
+static void append_index_pattern(PatternInfoArray *pia, const char *pattern,
int encoding);
static void compile_database_list(PGconn *conn, SimplePtrList *databases,
const char *initial_dbname);
@@ -288,6 +290,7 @@ main(int argc, char *argv[])
enum trivalue prompt_password = TRI_DEFAULT;
int encoding = pg_get_encoding_from_locale(NULL, false);
ConnParams cparams;
+ bool gist_warn_printed = false;
pg_logging_init(argv[0]);
progname = get_progname(argv[0]);
@@ -323,11 +326,11 @@ main(int argc, char *argv[])
break;
case 'i':
opts.allrel = false;
- append_btree_pattern(&opts.include, optarg, encoding);
+ append_index_pattern(&opts.include, optarg, encoding);
break;
case 'I':
opts.excludeidx = true;
- append_btree_pattern(&opts.exclude, optarg, encoding);
+ append_index_pattern(&opts.exclude, optarg, encoding);
break;
case 'j':
if (!option_parse_int(optarg, "-j/--jobs", 1, INT_MAX,
@@ -382,7 +385,7 @@ main(int argc, char *argv[])
maintenance_db = pg_strdup(optarg);
break;
case 2:
- opts.no_btree_expansion = true;
+ opts.no_index_expansion = true;
break;
case 3:
opts.no_toast_expansion = true;
@@ -531,6 +534,10 @@ main(int argc, char *argv[])
int ntups;
const char *amcheck_schema = NULL;
DatabaseInfo *dat = (DatabaseInfo *) cell->ptr;
+ int vmaj = 0,
+ vmin = 0,
+ vrev = 0;
+ const char *amcheck_version;
cparams.override_dbname = dat->datname;
if (conn == NULL || strcmp(PQdb(conn), dat->datname) != 0)
@@ -600,36 +607,32 @@ main(int argc, char *argv[])
strlen(amcheck_schema));
/*
- * Check the version of amcheck extension. Skip requested unique
- * constraint check with warning if it is not yet supported by
- * amcheck.
+ * Check the version of amcheck extension.
*/
- if (opts.checkunique == true)
- {
- /*
- * Now amcheck has only major and minor versions in the string but
- * we also support revision just in case. Now it is expected to be
- * zero.
- */
- int vmaj = 0,
- vmin = 0,
- vrev = 0;
- const char *amcheck_version = PQgetvalue(result, 0, 1);
+ amcheck_version = PQgetvalue(result, 0, 1);
- sscanf(amcheck_version, "%d.%d.%d", &vmaj, &vmin, &vrev);
+ /*
+ * Now amcheck has only major and minor versions in the string but we
+ * also support revision just in case. Now it is expected to be zero.
+ */
+ sscanf(amcheck_version, "%d.%d.%d", &vmaj, &vmin, &vrev);
- /*
- * checkunique option is supported in amcheck since version 1.4
- */
- if ((vmaj == 1 && vmin < 4) || vmaj == 0)
- {
- pg_log_warning("option %s is not supported by amcheck version %s",
- "--checkunique", amcheck_version);
- dat->is_checkunique = false;
- }
- else
- dat->is_checkunique = true;
+ /*
+ * checkunique option is supported in amcheck since version 1.4. Skip
+ * requested unique constraint check with warning if it is not yet
+ * supported by amcheck.
+ */
+ if (opts.checkunique && ((vmaj == 1 && vmin < 4) || vmaj == 0))
+ {
+ pg_log_warning("option %s is not supported by amcheck version %s",
+ "--checkunique", amcheck_version);
+ dat->is_checkunique = false;
}
+ else
+ dat->is_checkunique = opts.checkunique;
+
+ /* GiST indexes are supported in 1.5+ */
+ dat->gist_supported = ((vmaj == 1 && vmin >= 5) || vmaj > 1);
PQclear(result);
@@ -651,8 +654,8 @@ main(int argc, char *argv[])
if (pat->heap_only)
log_no_match("no heap tables to check matching \"%s\"",
pat->pattern);
- else if (pat->btree_only)
- log_no_match("no btree indexes to check matching \"%s\"",
+ else if (pat->index_only)
+ log_no_match("no indexes to check matching \"%s\"",
pat->pattern);
else if (pat->rel_regex == NULL)
log_no_match("no relations to check in schemas matching \"%s\"",
@@ -785,13 +788,29 @@ main(int argc, char *argv[])
if (opts.show_progress && progress_since_last_stderr)
fprintf(stderr, "\n");
- pg_log_info("checking btree index \"%s.%s.%s\"",
+ pg_log_info("checking index \"%s.%s.%s\"",
rel->datinfo->datname, rel->nspname, rel->relname);
progress_since_last_stderr = false;
}
- prepare_btree_command(&sql, rel, free_slot->connection);
+ if (rel->amoid == BTREE_AM_OID)
+ prepare_btree_command(&sql, rel, free_slot->connection);
+ else if (rel->amoid == GIST_AM_OID)
+ {
+ if (rel->datinfo->gist_supported)
+ prepare_gist_command(&sql, rel, free_slot->connection);
+ else
+ {
+ if (!gist_warn_printed)
+ pg_log_warning("GiST verification is not supported by installed amcheck version");
+ gist_warn_printed = true;
+ }
+ }
+ else
+ /* should not happen at this stage */
+ pg_log_info("Verification of index type %u not supported",
+ rel->amoid);
rel->sql = pstrdup(sql.data); /* pg_free'd after command */
- ParallelSlotSetHandler(free_slot, verify_btree_slot_handler, rel);
+ ParallelSlotSetHandler(free_slot, verify_index_slot_handler, rel);
run_command(free_slot, rel->sql);
}
}
@@ -869,7 +888,7 @@ prepare_heap_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
* Creates a SQL command for running amcheck checking on the given btree index
* relation. The command does not select any columns, as btree checking
* functions do not return any, but rather return corruption information by
- * raising errors, which verify_btree_slot_handler expects.
+ * raising errors, which verify_index_slot_handler expects.
*
* The constructed SQL command will silently skip temporary indexes, and
* indexes being reindexed concurrently, as checking them would needlessly draw
@@ -915,6 +934,28 @@ prepare_btree_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
rel->reloid);
}
+/*
+ * prepare_gist_command
+ * Similar to btree equivalent prepares command to check GiST index.
+ */
+static void
+prepare_gist_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
+{
+ resetPQExpBuffer(sql);
+
+ appendPQExpBuffer(sql,
+ "SELECT %s.gist_index_check("
+ "index := c.oid, heapallindexed := %s)"
+ "\nFROM pg_catalog.pg_class c, pg_catalog.pg_index i "
+ "WHERE c.oid = %u "
+ "AND c.oid = i.indexrelid "
+ "AND c.relpersistence != 't' "
+ "AND i.indisready AND i.indisvalid AND i.indislive",
+ rel->datinfo->amcheck_schema,
+ (opts.heapallindexed ? "true" : "false"),
+ rel->reloid);
+}
+
/*
* run_command
*
@@ -954,7 +995,7 @@ run_command(ParallelSlot *slot, const char *sql)
* Note: Heap relation corruption is reported by verify_heapam() via the result
* set, rather than an ERROR, but running verify_heapam() on a corrupted heap
* table may still result in an error being returned from the server due to
- * missing relation files, bad checksums, etc. The btree corruption checking
+ * missing relation files, bad checksums, etc. The corruption checking
* functions always use errors to communicate corruption messages. We can't
* just abort processing because we got a mere ERROR.
*
@@ -1104,11 +1145,11 @@ verify_heap_slot_handler(PGresult *res, PGconn *conn, void *context)
}
/*
- * verify_btree_slot_handler
+ * verify_index_slot_handler
*
- * ParallelSlotHandler that receives results from a btree checking command
- * created by prepare_btree_command and outputs them for the user. The results
- * from the btree checking command is assumed to be empty, but when the results
+ * ParallelSlotHandler that receives results from a checking command created by
+ * prepare_[btree,gist]_command and outputs them for the user. The results
+ * from the checking command is assumed to be empty, but when the results
* are an error code, the useful information about the corruption is expected
* in the connection's error message.
*
@@ -1117,7 +1158,7 @@ verify_heap_slot_handler(PGresult *res, PGconn *conn, void *context)
* context: unused
*/
static bool
-verify_btree_slot_handler(PGresult *res, PGconn *conn, void *context)
+verify_index_slot_handler(PGresult *res, PGconn *conn, void *context)
{
RelationInfo *rel = (RelationInfo *) context;
@@ -1128,12 +1169,12 @@ verify_btree_slot_handler(PGresult *res, PGconn *conn, void *context)
if (ntups > 1)
{
/*
- * We expect the btree checking functions to return one void row
- * each, or zero rows if the check was skipped due to the object
- * being in the wrong state to be checked, so we should output
- * some sort of warning if we get anything more, not because it
- * indicates corruption, but because it suggests a mismatch
- * between amcheck and pg_amcheck versions.
+ * We expect the checking functions to return one void row each,
+ * or zero rows if the check was skipped due to the object being
+ * in the wrong state to be checked, so we should output some sort
+ * of warning if we get anything more, not because it indicates
+ * corruption, but because it suggests a mismatch between amcheck
+ * and pg_amcheck versions.
*
* In conjunction with --progress, anything written to stderr at
* this time would present strangely to the user without an extra
@@ -1143,7 +1184,7 @@ verify_btree_slot_handler(PGresult *res, PGconn *conn, void *context)
*/
if (opts.show_progress && progress_since_last_stderr)
fprintf(stderr, "\n");
- pg_log_warning("btree index \"%s.%s.%s\": btree checking function returned unexpected number of rows: %d",
+ pg_log_warning("index \"%s.%s.%s\": checking function returned unexpected number of rows: %d",
rel->datinfo->datname, rel->nspname, rel->relname, ntups);
if (opts.verbose)
pg_log_warning_detail("Query was: %s", rel->sql);
@@ -1157,7 +1198,7 @@ verify_btree_slot_handler(PGresult *res, PGconn *conn, void *context)
char *msg = indent_lines(PQerrorMessage(conn));
all_checks_pass = false;
- printf(_("btree index \"%s.%s.%s\":\n"),
+ printf(_("index \"%s.%s.%s\":\n"),
rel->datinfo->datname, rel->nspname, rel->relname);
printf("%s", msg);
if (opts.verbose)
@@ -1211,6 +1252,8 @@ help(const char *progname)
printf(_(" --heapallindexed check that all heap tuples are found within indexes\n"));
printf(_(" --parent-check check index parent/child relationships\n"));
printf(_(" --rootdescend search from root page to refind tuples\n"));
+ printf(_("\nGiST index checking options:\n"));
+ printf(_(" --heapallindexed check that all heap tuples are found within indexes\n"));
printf(_("\nConnection options:\n"));
printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
printf(_(" -p, --port=PORT database server port\n"));
@@ -1424,11 +1467,11 @@ append_schema_pattern(PatternInfoArray *pia, const char *pattern, int encoding)
* pattern: the relation name pattern
* encoding: client encoding for parsing the pattern
* heap_only: whether the pattern should only be matched against heap tables
- * btree_only: whether the pattern should only be matched against btree indexes
+ * index_only: whether the pattern should only be matched against indexes
*/
static void
append_relation_pattern_helper(PatternInfoArray *pia, const char *pattern,
- int encoding, bool heap_only, bool btree_only)
+ int encoding, bool heap_only, bool index_only)
{
PQExpBufferData dbbuf;
PQExpBufferData nspbuf;
@@ -1463,14 +1506,14 @@ append_relation_pattern_helper(PatternInfoArray *pia, const char *pattern,
termPQExpBuffer(&relbuf);
info->heap_only = heap_only;
- info->btree_only = btree_only;
+ info->index_only = index_only;
}
/*
* append_relation_pattern
*
* Adds the given pattern interpreted as a relation pattern, to be matched
- * against both heap tables and btree indexes.
+ * against both heap tables and indexes.
*
* pia: the pattern info array to be appended
* pattern: the relation name pattern
@@ -1499,17 +1542,17 @@ append_heap_pattern(PatternInfoArray *pia, const char *pattern, int encoding)
}
/*
- * append_btree_pattern
+ * append_index_pattern
*
* Adds the given pattern interpreted as a relation pattern, to be matched only
- * against btree indexes.
+ * against indexes.
*
* pia: the pattern info array to be appended
* pattern: the relation name pattern
* encoding: client encoding for parsing the pattern
*/
static void
-append_btree_pattern(PatternInfoArray *pia, const char *pattern, int encoding)
+append_index_pattern(PatternInfoArray *pia, const char *pattern, int encoding)
{
append_relation_pattern_helper(pia, pattern, encoding, false, true);
}
@@ -1767,7 +1810,7 @@ compile_database_list(PGconn *conn, SimplePtrList *databases,
* rel_regex: the relname regexp parsed from the pattern, or NULL if the
* pattern had no relname part
* heap_only: true if the pattern applies only to heap tables (not indexes)
- * btree_only: true if the pattern applies only to btree indexes (not tables)
+ * index_only: true if the pattern applies only to indexes (not tables)
*
* buf: the buffer to be appended
* patterns: the array of patterns to be inserted into the CTE
@@ -1809,7 +1852,7 @@ append_rel_pattern_raw_cte(PQExpBuffer buf, const PatternInfoArray *pia,
appendPQExpBufferStr(buf, "::TEXT, true::BOOLEAN");
else
appendPQExpBufferStr(buf, "::TEXT, false::BOOLEAN");
- if (info->btree_only)
+ if (info->index_only)
appendPQExpBufferStr(buf, ", true::BOOLEAN");
else
appendPQExpBufferStr(buf, ", false::BOOLEAN");
@@ -1847,8 +1890,8 @@ append_rel_pattern_filtered_cte(PQExpBuffer buf, const char *raw,
const char *filtered, PGconn *conn)
{
appendPQExpBuffer(buf,
- "\n%s (pattern_id, nsp_regex, rel_regex, heap_only, btree_only) AS ("
- "\nSELECT pattern_id, nsp_regex, rel_regex, heap_only, btree_only "
+ "\n%s (pattern_id, nsp_regex, rel_regex, heap_only, index_only) AS ("
+ "\nSELECT pattern_id, nsp_regex, rel_regex, heap_only, index_only "
"FROM %s r"
"\nWHERE (r.db_regex IS NULL "
"OR ",
@@ -1871,7 +1914,7 @@ append_rel_pattern_filtered_cte(PQExpBuffer buf, const char *raw,
* The cells of the constructed list contain all information about the relation
* necessary to connect to the database and check the object, including which
* database to connect to, where contrib/amcheck is installed, and the Oid and
- * type of object (heap table vs. btree index). Rather than duplicating the
+ * type of object (heap table vs. index). Rather than duplicating the
* database details per relation, the relation structs use references to the
* same database object, provided by the caller.
*
@@ -1898,7 +1941,7 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
if (!opts.allrel)
{
appendPQExpBufferStr(&sql,
- " include_raw (pattern_id, db_regex, nsp_regex, rel_regex, heap_only, btree_only) AS (");
+ " include_raw (pattern_id, db_regex, nsp_regex, rel_regex, heap_only, index_only) AS (");
append_rel_pattern_raw_cte(&sql, &opts.include, conn);
appendPQExpBufferStr(&sql, "\n),");
append_rel_pattern_filtered_cte(&sql, "include_raw", "include_pat", conn);
@@ -1908,7 +1951,7 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
if (opts.excludetbl || opts.excludeidx || opts.excludensp)
{
appendPQExpBufferStr(&sql,
- " exclude_raw (pattern_id, db_regex, nsp_regex, rel_regex, heap_only, btree_only) AS (");
+ " exclude_raw (pattern_id, db_regex, nsp_regex, rel_regex, heap_only, index_only) AS (");
append_rel_pattern_raw_cte(&sql, &opts.exclude, conn);
appendPQExpBufferStr(&sql, "\n),");
append_rel_pattern_filtered_cte(&sql, "exclude_raw", "exclude_pat", conn);
@@ -1916,36 +1959,36 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
/* Append the relation CTE. */
appendPQExpBufferStr(&sql,
- " relation (pattern_id, oid, nspname, relname, reltoastrelid, relpages, is_heap, is_btree) AS ("
+ " relation (pattern_id, oid, amoid, nspname, relname, reltoastrelid, relpages, is_heap, is_index) AS ("
"\nSELECT DISTINCT ON (c.oid");
if (!opts.allrel)
appendPQExpBufferStr(&sql, ", ip.pattern_id) ip.pattern_id,");
else
appendPQExpBufferStr(&sql, ") NULL::INTEGER AS pattern_id,");
appendPQExpBuffer(&sql,
- "\nc.oid, n.nspname, c.relname, c.reltoastrelid, c.relpages, "
- "c.relam = %u AS is_heap, "
- "c.relam = %u AS is_btree"
+ "\nc.oid, c.relam as amoid, n.nspname, c.relname, "
+ "c.reltoastrelid, c.relpages, c.relam = %u AS is_heap, "
+ "(c.relam = %u OR c.relam = %u) AS is_index"
"\nFROM pg_catalog.pg_class c "
"INNER JOIN pg_catalog.pg_namespace n "
"ON c.relnamespace = n.oid",
- HEAP_TABLE_AM_OID, BTREE_AM_OID);
+ HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID);
if (!opts.allrel)
appendPQExpBuffer(&sql,
"\nINNER JOIN include_pat ip"
"\nON (n.nspname ~ ip.nsp_regex OR ip.nsp_regex IS NULL)"
"\nAND (c.relname ~ ip.rel_regex OR ip.rel_regex IS NULL)"
"\nAND (c.relam = %u OR NOT ip.heap_only)"
- "\nAND (c.relam = %u OR NOT ip.btree_only)",
- HEAP_TABLE_AM_OID, BTREE_AM_OID);
+ "\nAND ((c.relam = %u OR c.relam = %u) OR NOT ip.index_only)",
+ HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID);
if (opts.excludetbl || opts.excludeidx || opts.excludensp)
appendPQExpBuffer(&sql,
"\nLEFT OUTER JOIN exclude_pat ep"
"\nON (n.nspname ~ ep.nsp_regex OR ep.nsp_regex IS NULL)"
"\nAND (c.relname ~ ep.rel_regex OR ep.rel_regex IS NULL)"
"\nAND (c.relam = %u OR NOT ep.heap_only OR ep.rel_regex IS NULL)"
- "\nAND (c.relam = %u OR NOT ep.btree_only OR ep.rel_regex IS NULL)",
- HEAP_TABLE_AM_OID, BTREE_AM_OID);
+ "\nAND ((c.relam = %u OR c.relam = %u) OR NOT ep.index_only OR ep.rel_regex IS NULL)",
+ HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID);
/*
* Exclude temporary tables and indexes, which must necessarily belong to
@@ -1984,7 +2027,7 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
HEAP_TABLE_AM_OID, PG_TOAST_NAMESPACE);
else
appendPQExpBuffer(&sql,
- " AND c.relam IN (%u, %u)"
+ " AND c.relam IN (%u, %u, %u)"
"AND c.relkind IN ("
CppAsString2(RELKIND_RELATION) ", "
CppAsString2(RELKIND_SEQUENCE) ", "
@@ -1996,10 +2039,10 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
CppAsString2(RELKIND_SEQUENCE) ", "
CppAsString2(RELKIND_MATVIEW) ", "
CppAsString2(RELKIND_TOASTVALUE) ")) OR "
- "(c.relam = %u AND c.relkind = "
+ "((c.relam = %u OR c.relam = %u) AND c.relkind = "
CppAsString2(RELKIND_INDEX) "))",
- HEAP_TABLE_AM_OID, BTREE_AM_OID,
- HEAP_TABLE_AM_OID, BTREE_AM_OID);
+ HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID,
+ HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID);
appendPQExpBufferStr(&sql,
"\nORDER BY c.oid)");
@@ -2028,7 +2071,7 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
appendPQExpBufferStr(&sql,
"\n)");
}
- if (!opts.no_btree_expansion)
+ if (!opts.no_index_expansion)
{
/*
* Include a CTE for btree indexes associated with primary heap tables
@@ -2036,9 +2079,9 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
* btree index names.
*/
appendPQExpBufferStr(&sql,
- ", index (oid, nspname, relname, relpages) AS ("
- "\nSELECT c.oid, r.nspname, c.relname, c.relpages "
- "FROM relation r"
+ ", index (oid, amoid, nspname, relname, relpages) AS ("
+ "\nSELECT c.oid, c.relam as amoid, r.nspname, "
+ "c.relname, c.relpages FROM relation r"
"\nINNER JOIN pg_catalog.pg_index i "
"ON r.oid = i.indrelid "
"INNER JOIN pg_catalog.pg_class c "
@@ -2051,15 +2094,15 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
"\nLEFT OUTER JOIN exclude_pat ep "
"ON (n.nspname ~ ep.nsp_regex OR ep.nsp_regex IS NULL) "
"AND (c.relname ~ ep.rel_regex OR ep.rel_regex IS NULL) "
- "AND ep.btree_only"
+ "AND ep.index_only"
"\nWHERE ep.pattern_id IS NULL");
else
appendPQExpBufferStr(&sql,
"\nWHERE true");
appendPQExpBuffer(&sql,
- " AND c.relam = %u "
+ " AND (c.relam = %u or c.relam = %u) "
"AND c.relkind = " CppAsString2(RELKIND_INDEX),
- BTREE_AM_OID);
+ BTREE_AM_OID, GIST_AM_OID);
if (opts.no_toast_expansion)
appendPQExpBuffer(&sql,
" AND c.relnamespace != %u",
@@ -2067,7 +2110,7 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
appendPQExpBufferStr(&sql, "\n)");
}
- if (!opts.no_toast_expansion && !opts.no_btree_expansion)
+ if (!opts.no_toast_expansion && !opts.no_index_expansion)
{
/*
* Include a CTE for btree indexes associated with toast tables of
@@ -2088,7 +2131,7 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
"\nLEFT OUTER JOIN exclude_pat ep "
"ON ('pg_toast' ~ ep.nsp_regex OR ep.nsp_regex IS NULL) "
"AND (c.relname ~ ep.rel_regex OR ep.rel_regex IS NULL) "
- "AND ep.btree_only "
+ "AND ep.index_only "
"WHERE ep.pattern_id IS NULL");
else
appendPQExpBufferStr(&sql,
@@ -2108,12 +2151,13 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
* list.
*/
appendPQExpBufferStr(&sql,
- "\nSELECT pattern_id, is_heap, is_btree, oid, nspname, relname, relpages "
+ "\nSELECT pattern_id, is_heap, is_index, oid, amoid, nspname, relname, relpages "
"FROM (");
appendPQExpBufferStr(&sql,
/* Inclusion patterns that failed to match */
- "\nSELECT pattern_id, is_heap, is_btree, "
+ "\nSELECT pattern_id, is_heap, is_index, "
"NULL::OID AS oid, "
+ "NULL::OID AS amoid, "
"NULL::TEXT AS nspname, "
"NULL::TEXT AS relname, "
"NULL::INTEGER AS relpages"
@@ -2122,29 +2166,29 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
"UNION"
/* Primary relations */
"\nSELECT NULL::INTEGER AS pattern_id, "
- "is_heap, is_btree, oid, nspname, relname, relpages "
+ "is_heap, is_index, oid, amoid, nspname, relname, relpages "
"FROM relation");
if (!opts.no_toast_expansion)
- appendPQExpBufferStr(&sql,
- " UNION"
+ appendPQExpBuffer(&sql,
+ " UNION"
/* Toast tables for primary relations */
- "\nSELECT NULL::INTEGER AS pattern_id, TRUE AS is_heap, "
- "FALSE AS is_btree, oid, nspname, relname, relpages "
- "FROM toast");
- if (!opts.no_btree_expansion)
+ "\nSELECT NULL::INTEGER AS pattern_id, TRUE AS is_heap, "
+ "FALSE AS is_index, oid, 0 as amoid, nspname, relname, relpages "
+ "FROM toast");
+ if (!opts.no_index_expansion)
appendPQExpBufferStr(&sql,
" UNION"
/* Indexes for primary relations */
"\nSELECT NULL::INTEGER AS pattern_id, FALSE AS is_heap, "
- "TRUE AS is_btree, oid, nspname, relname, relpages "
+ "TRUE AS is_index, oid, amoid, nspname, relname, relpages "
"FROM index");
- if (!opts.no_toast_expansion && !opts.no_btree_expansion)
- appendPQExpBufferStr(&sql,
- " UNION"
+ if (!opts.no_toast_expansion && !opts.no_index_expansion)
+ appendPQExpBuffer(&sql,
+ " UNION"
/* Indexes for toast relations */
- "\nSELECT NULL::INTEGER AS pattern_id, FALSE AS is_heap, "
- "TRUE AS is_btree, oid, nspname, relname, relpages "
- "FROM toast_index");
+ "\nSELECT NULL::INTEGER AS pattern_id, FALSE AS is_heap, "
+ "TRUE AS is_index, oid, %u as amoid, nspname, relname, relpages "
+ "FROM toast_index", BTREE_AM_OID);
appendPQExpBufferStr(&sql,
"\n) AS combined_records "
"ORDER BY relpages DESC NULLS FIRST, oid");
@@ -2164,8 +2208,9 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
{
int pattern_id = -1;
bool is_heap = false;
- bool is_btree PG_USED_FOR_ASSERTS_ONLY = false;
+ bool is_index PG_USED_FOR_ASSERTS_ONLY = false;
Oid oid = InvalidOid;
+ Oid amoid = InvalidOid;
const char *nspname = NULL;
const char *relname = NULL;
int relpages = 0;
@@ -2175,15 +2220,17 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
if (!PQgetisnull(res, i, 1))
is_heap = (PQgetvalue(res, i, 1)[0] == 't');
if (!PQgetisnull(res, i, 2))
- is_btree = (PQgetvalue(res, i, 2)[0] == 't');
+ is_index = (PQgetvalue(res, i, 2)[0] == 't');
if (!PQgetisnull(res, i, 3))
oid = atooid(PQgetvalue(res, i, 3));
if (!PQgetisnull(res, i, 4))
- nspname = PQgetvalue(res, i, 4);
+ amoid = atooid(PQgetvalue(res, i, 4));
if (!PQgetisnull(res, i, 5))
- relname = PQgetvalue(res, i, 5);
+ nspname = PQgetvalue(res, i, 5);
if (!PQgetisnull(res, i, 6))
- relpages = atoi(PQgetvalue(res, i, 6));
+ relname = PQgetvalue(res, i, 6);
+ if (!PQgetisnull(res, i, 7))
+ relpages = atoi(PQgetvalue(res, i, 7));
if (pattern_id >= 0)
{
@@ -2205,10 +2252,11 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
RelationInfo *rel = (RelationInfo *) pg_malloc0(sizeof(RelationInfo));
Assert(OidIsValid(oid));
- Assert((is_heap && !is_btree) || (is_btree && !is_heap));
+ Assert((is_heap && !is_index) || (is_index && !is_heap));
rel->datinfo = dat;
rel->reloid = oid;
+ rel->amoid = amoid;
rel->is_heap = is_heap;
rel->nspname = pstrdup(nspname);
rel->relname = pstrdup(relname);
@@ -2218,7 +2266,7 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
{
/*
* We apply --startblock and --endblock to heap tables, but
- * not btree indexes, and for progress purposes we need to
+ * not supported indexes, and for progress purposes we need to
* track how many blocks we expect to check.
*/
if (opts.endblock >= 0 && rel->blocks_to_check > opts.endblock)
diff --git a/src/bin/pg_amcheck/t/002_nonesuch.pl b/src/bin/pg_amcheck/t/002_nonesuch.pl
index f23368abeab..e11cc4e6158 100644
--- a/src/bin/pg_amcheck/t/002_nonesuch.pl
+++ b/src/bin/pg_amcheck/t/002_nonesuch.pl
@@ -285,8 +285,8 @@ $node->command_checks_all(
[
qr/pg_amcheck: warning: no heap tables to check matching "no_such_table"/,
qr/pg_amcheck: warning: no heap tables to check matching "no\*such\*table"/,
- qr/pg_amcheck: warning: no btree indexes to check matching "no_such_index"/,
- qr/pg_amcheck: warning: no btree indexes to check matching "no\*such\*index"/,
+ qr/pg_amcheck: warning: no indexes to check matching "no_such_index"/,
+ qr/pg_amcheck: warning: no indexes to check matching "no\*such\*index"/,
qr/pg_amcheck: warning: no relations to check matching "no_such_relation"/,
qr/pg_amcheck: warning: no relations to check matching "no\*such\*relation"/,
qr/pg_amcheck: warning: no heap tables to check matching "no\*such\*table"/,
@@ -366,8 +366,8 @@ $node->command_checks_all(
qr/pg_amcheck: warning: no heap tables to check matching "template1\.public\.foo"/,
qr/pg_amcheck: warning: no heap tables to check matching "another_db\.public\.foo"/,
qr/pg_amcheck: warning: no connectable databases to check matching "no_such_database\.public\.foo"/,
- qr/pg_amcheck: warning: no btree indexes to check matching "template1\.public\.foo_idx"/,
- qr/pg_amcheck: warning: no btree indexes to check matching "another_db\.public\.foo_idx"/,
+ qr/pg_amcheck: warning: no indexes to check matching "template1\.public\.foo_idx"/,
+ qr/pg_amcheck: warning: no indexes to check matching "another_db\.public\.foo_idx"/,
qr/pg_amcheck: warning: no connectable databases to check matching "no_such_database\.public\.foo_idx"/,
qr/pg_amcheck: error: no relations to check/,
],
diff --git a/src/bin/pg_amcheck/t/003_check.pl b/src/bin/pg_amcheck/t/003_check.pl
index 881854da254..2d6efbf8b05 100644
--- a/src/bin/pg_amcheck/t/003_check.pl
+++ b/src/bin/pg_amcheck/t/003_check.pl
@@ -185,7 +185,7 @@ for my $dbname (qw(db1 db2 db3))
# schemas. The schemas are all identical to start, but
# we will corrupt them differently later.
#
- for my $schema (qw(s1 s2 s3 s4 s5))
+ for my $schema (qw(s1 s2 s3 s4 s5 s6))
{
$node->safe_psql(
$dbname, qq(
@@ -291,22 +291,24 @@ plan_to_corrupt_first_page('db1', 's3.t2_btree');
# Corrupt toast table, partitions, and materialized views in schema "s4"
plan_to_remove_toast_file('db1', 's4.t2');
-# Corrupt all other object types in schema "s5". We don't have amcheck support
+# Corrupt GiST index in schema "s5"
+plan_to_remove_relation_file('db1', 's5.t1_gist');
+plan_to_corrupt_first_page('db1', 's5.t2_gist');
+
+# Corrupt all other object types in schema "s6". We don't have amcheck support
# for these types, but we check that their corruption does not trigger any
# errors in pg_amcheck
-plan_to_remove_relation_file('db1', 's5.seq1');
-plan_to_remove_relation_file('db1', 's5.t1_hash');
-plan_to_remove_relation_file('db1', 's5.t1_gist');
-plan_to_remove_relation_file('db1', 's5.t1_gin');
-plan_to_remove_relation_file('db1', 's5.t1_brin');
-plan_to_remove_relation_file('db1', 's5.t1_spgist');
+plan_to_remove_relation_file('db1', 's6.seq1');
+plan_to_remove_relation_file('db1', 's6.t1_hash');
+plan_to_remove_relation_file('db1', 's6.t1_gin');
+plan_to_remove_relation_file('db1', 's6.t1_brin');
+plan_to_remove_relation_file('db1', 's6.t1_spgist');
-plan_to_corrupt_first_page('db1', 's5.seq2');
-plan_to_corrupt_first_page('db1', 's5.t2_hash');
-plan_to_corrupt_first_page('db1', 's5.t2_gist');
-plan_to_corrupt_first_page('db1', 's5.t2_gin');
-plan_to_corrupt_first_page('db1', 's5.t2_brin');
-plan_to_corrupt_first_page('db1', 's5.t2_spgist');
+plan_to_corrupt_first_page('db1', 's6.seq2');
+plan_to_corrupt_first_page('db1', 's6.t2_hash');
+plan_to_corrupt_first_page('db1', 's6.t2_gin');
+plan_to_corrupt_first_page('db1', 's6.t2_brin');
+plan_to_corrupt_first_page('db1', 's6.t2_spgist');
# Database 'db2' corruptions
@@ -461,10 +463,22 @@ $node->command_checks_all(
[$no_output_re],
'pg_amcheck in schema s4 excluding toast reports no corruption');
-# Check that no corruption is reported in schema db1.s5
-$node->command_checks_all([ @cmd, '--schema' => 's5', 'db1' ],
+# In schema db1.s5 we should see GiST corruption messages on stdout, and
+# nothing on stderr.
+#
+$node->command_checks_all(
+ [ @cmd, '-s', 's5', 'db1' ],
+ 2,
+ [
+ $missing_file_re, $line_pointer_corruption_re,
+ ],
+ [$no_output_re],
+ 'pg_amcheck schema s5 reports GiST index errors');
+
+# Check that no corruption is reported in schema db1.s6
+$node->command_checks_all([ @cmd, '-s', 's6', 'db1' ],
0, [$no_output_re], [$no_output_re],
- 'pg_amcheck over schema s5 reports no corruption');
+ 'pg_amcheck over schema s6 reports no corruption');
# In schema db1.s1, only indexes are corrupt. Verify that when we exclude
# the indexes, no corruption is reported about the schema.
@@ -619,7 +633,7 @@ $node->command_checks_all(
'pg_amcheck excluding all corrupt schemas with --checkunique option');
#
-# Smoke test for checkunique option for not supported versions.
+# Smoke test for checkunique option and GiST indexes for not supported versions.
#
$node->safe_psql(
'db3', q(
@@ -635,4 +649,19 @@ $node->command_checks_all(
qr/pg_amcheck: warning: option --checkunique is not supported by amcheck version 1.3/
],
'pg_amcheck smoke test --checkunique');
+
+$node->safe_psql(
+ 'db1', q(
+ DROP EXTENSION amcheck;
+ CREATE EXTENSION amcheck WITH SCHEMA amcheck_schema VERSION '1.3' ;
+));
+
+$node->command_checks_all(
+ [ @cmd, '-s', 's5', 'db1' ],
+ 0,
+ [$no_output_re],
+ [
+ qr/pg_amcheck: warning: GiST verification is not supported by installed amcheck version/
+ ],
+ 'pg_amcheck smoke test --checkunique');
done_testing();
--
2.39.5 (Apple Git-154)
v2025-06-30-0001-Add-gist_index_check-function-to-verify-.patchapplication/octet-stream; name=v2025-06-30-0001-Add-gist_index_check-function-to-verify-.patch; x-unix-mode=0644Download
From 8f8a85308ecb24363132cd9a44fac8fcf4267240 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Wed, 18 Jun 2025 20:33:28 +0300
Subject: [PATCH v2025-06-30 1/2] Add gist_index_check() function to verify
GiST index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This function traverses GiST with a depth-fisrt search and checks
that all downlink tuples are included into parent tuple keyspace.
This traverse takes lock of any page until some discapency found.
To re-check suspicious pair of parent and child tuples it aqcuires
locks on both parent and child pages in the same order as page
split does.
Author: Andrey Borodin <amborodin@acm.org>
Author: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-By: José Villanova <jose.arthur@gmail.com>
Reviewed-By: Aleksander Alekseev <aleksander@timescale.com>
Reviewed-By: Nikolay Samokhvalov <samokhvalov@gmail.com>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Tomas Vondra <tomas.vondra@enterprisedb.com>
Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru
---
contrib/amcheck/Makefile | 5 +-
contrib/amcheck/amcheck--1.5--1.6.sql | 14 +
contrib/amcheck/amcheck.control | 2 +-
contrib/amcheck/expected/check_gist.out | 145 +++++
contrib/amcheck/meson.build | 3 +
contrib/amcheck/sql/check_gist.sql | 62 +++
contrib/amcheck/verify_gist.c | 687 ++++++++++++++++++++++++
doc/src/sgml/amcheck.sgml | 19 +
8 files changed, 934 insertions(+), 3 deletions(-)
create mode 100644 contrib/amcheck/amcheck--1.5--1.6.sql
create mode 100644 contrib/amcheck/expected/check_gist.out
create mode 100644 contrib/amcheck/sql/check_gist.sql
create mode 100644 contrib/amcheck/verify_gist.c
diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index 1b7a63cbaa4..1f2fec95de5 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -4,16 +4,17 @@ MODULE_big = amcheck
OBJS = \
$(WIN32RES) \
verify_common.o \
+ verify_gist.o \
verify_gin.o \
verify_heapam.o \
verify_nbtree.o
EXTENSION = amcheck
DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \
- amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql
+ amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql amcheck--1.5--1.6.sql
PGFILEDESC = "amcheck - function for verifying relation integrity"
-REGRESS = check check_btree check_gin check_heap
+REGRESS = check check_btree check_gin check_gist check_heap
EXTRA_INSTALL = contrib/pg_walinspect
TAP_TESTS = 1
diff --git a/contrib/amcheck/amcheck--1.5--1.6.sql b/contrib/amcheck/amcheck--1.5--1.6.sql
new file mode 100644
index 00000000000..a6a1debff12
--- /dev/null
+++ b/contrib/amcheck/amcheck--1.5--1.6.sql
@@ -0,0 +1,14 @@
+/* contrib/amcheck/amcheck--1.5--1.6.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.6'" to load this file. \quit
+
+
+-- gist_index_check()
+--
+CREATE FUNCTION gist_index_check(index regclass, heapallindexed boolean)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'gist_index_check'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION gist_index_check(regclass,boolean) FROM PUBLIC;
diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control
index c8ba6d7c9bc..2f329ef2cf4 100644
--- a/contrib/amcheck/amcheck.control
+++ b/contrib/amcheck/amcheck.control
@@ -1,5 +1,5 @@
# amcheck extension
comment = 'functions for verifying relation integrity'
-default_version = '1.5'
+default_version = '1.6'
module_pathname = '$libdir/amcheck'
relocatable = true
diff --git a/contrib/amcheck/expected/check_gist.out b/contrib/amcheck/expected/check_gist.out
new file mode 100644
index 00000000000..cbc3e27e679
--- /dev/null
+++ b/contrib/amcheck/expected/check_gist.out
@@ -0,0 +1,145 @@
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gist_check;
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+ attstorage
+------------
+ x
+(1 row)
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
+ gist_index_check
+------------------
+
+(1 row)
+
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index 1f0c347ed54..13b36b495ed 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -5,6 +5,7 @@ amcheck_sources = files(
'verify_gin.c',
'verify_heapam.c',
'verify_nbtree.c',
+ 'verify_gist.c',
)
if host_system == 'windows'
@@ -27,6 +28,7 @@ install_data(
'amcheck--1.2--1.3.sql',
'amcheck--1.3--1.4.sql',
'amcheck--1.4--1.5.sql',
+ 'amcheck--1.5--1.6.sql',
kwargs: contrib_data_args,
)
@@ -39,6 +41,7 @@ tests += {
'check',
'check_btree',
'check_gin',
+ 'check_gist',
'check_heap',
],
},
diff --git a/contrib/amcheck/sql/check_gist.sql b/contrib/amcheck/sql/check_gist.sql
new file mode 100644
index 00000000000..37966423b8b
--- /dev/null
+++ b/contrib/amcheck/sql/check_gist.sql
@@ -0,0 +1,62 @@
+
+SELECT setseed(1);
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+-- cleanup
+DROP TABLE gist_check;
+
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
\ No newline at end of file
diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c
new file mode 100644
index 00000000000..477150ac802
--- /dev/null
+++ b/contrib/amcheck/verify_gist.c
@@ -0,0 +1,687 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_gist.c
+ * Verifies the integrity of GiST indexes based on invariants.
+ *
+ * Verification checks that all paths in GiST graph contain
+ * consistent keys: tuples on parent pages consistently include tuples
+ * from children pages. Also, verification checks graph invariants:
+ * internal page must have at least one downlinks, internal page can
+ * reference either only leaf pages or only internal pages.
+ *
+ *
+ * Copyright (c) 2017-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/verify_gist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gist_private.h"
+#include "access/tableam.h"
+#include "catalog/index.h"
+#include "catalog/pg_am.h"
+#include "common/pg_prng.h"
+#include "lib/bloomfilter.h"
+#include "verify_common.h"
+#include "utils/memutils.h"
+
+
+/*
+ * GistScanItem represents one item of depth-first scan of GiST index.
+ */
+typedef struct GistScanItem
+{
+ int depth;
+
+ /* Referenced block number to check next */
+ BlockNumber blkno;
+
+ /*
+ * Correctess of this parent tuple will be checked against contents of
+ * referenced page. This tuple will be NULL for root block.
+ */
+ IndexTuple parenttup;
+
+ /*
+ * LSN to hande concurrent scan of the page. It's necessary to avoid
+ * missing some subtrees from page, that was split just before we read it.
+ */
+ XLogRecPtr parentlsn;
+
+ /*
+ * Reference to parent page for re-locking in case of found parent-child
+ * tuple discrepencies.
+ */
+ BlockNumber parentblk;
+
+ /* Pointer to a next stack item. */
+ struct GistScanItem *next;
+} GistScanItem;
+
+typedef struct GistCheckState
+{
+ /* GiST state */
+ GISTSTATE *state;
+ /* Bloom filter fingerprints index tuples */
+ bloom_filter *filter;
+
+ Snapshot snapshot;
+ Relation rel;
+ Relation heaprel;
+
+ /* Debug counter for reporting percentage of work already done */
+ int64 heaptuplespresent;
+
+ /* progress reporting stuff */
+ BlockNumber totalblocks;
+ BlockNumber reportedblocks;
+ BlockNumber scannedblocks;
+ BlockNumber deltablocks;
+
+ int leafdepth;
+} GistCheckState;
+
+PG_FUNCTION_INFO_V1(gist_index_check);
+
+static void giststate_init_heapallindexed(Relation rel, GistCheckState * result);
+static void gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly);
+static void gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed,
+ BufferAccessStrategy strategy);
+static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
+static IndexTuple gist_refind_parent(Relation rel, BlockNumber parentblkno,
+ BlockNumber childblkno,
+ BufferAccessStrategy strategy);
+static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block,
+ Page page, OffsetNumber offset);
+static void gist_tuple_present_callback(Relation index, ItemPointer tid,
+ Datum *values, bool *isnull,
+ bool tupleIsAlive, void *checkstate);
+static IndexTuple gistFormNormalizedTuple(GISTSTATE *giststate, Relation r,
+ Datum *attdata, bool *isnull, ItemPointerData tid);
+
+/*
+ * gist_index_check(index regclass)
+ *
+ * Verify integrity of GiST index.
+ *
+ * Acquires AccessShareLock on heap & index relations.
+ */
+Datum
+gist_index_check(PG_FUNCTION_ARGS)
+{
+ Oid indrelid = PG_GETARG_OID(0);
+ bool heapallindexed = PG_GETARG_BOOL(1);
+
+ amcheck_lock_relation_and_check(indrelid,
+ GIST_AM_OID,
+ gist_check_parent_keys_consistency,
+ AccessShareLock,
+ &heapallindexed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+* Initaliaze GIST state filed needed to perform.
+* This initialized bloom filter and snapshot.
+*/
+static void
+giststate_init_heapallindexed(Relation rel, GistCheckState * result)
+{
+ int64 total_pages;
+ int64 total_elems;
+ uint64 seed;
+
+ /*
+ * Size Bloom filter based on estimated number of tuples in index. This
+ * logic is similar to B-tree, see verify_btree.c .
+ */
+ total_pages = result->totalblocks;
+ total_elems = Max(total_pages * (MaxOffsetNumber / 5),
+ (int64) rel->rd_rel->reltuples);
+ seed = pg_prng_uint64(&pg_global_prng_state);
+ result->filter = bloom_create(total_elems, maintenance_work_mem, seed);
+
+ result->snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+
+ /*
+ * GetTransactionSnapshot() always acquires a new MVCC snapshot in READ
+ * COMMITTED mode. A new snapshot is guaranteed to have all the entries
+ * it requires in the index.
+ *
+ * We must defend against the possibility that an old xact snapshot was
+ * returned at higher isolation levels when that snapshot is not safe for
+ * index scans of the target index. This is possible when the snapshot
+ * sees tuples that are before the index's indcheckxmin horizon. Throwing
+ * an error here should be very rare. It doesn't seem worth using a
+ * secondary snapshot to avoid this.
+ */
+ if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
+ !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+ result->snapshot->xmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("index \"%s\" cannot be verified using transaction snapshot",
+ RelationGetRelationName(rel))));
+}
+
+/*
+ * Main entry point for GiST check.
+ *
+ * This function verifies that tuples of internal pages cover all
+ * the key space of each tuple on leaf page. To do this we invoke
+ * gist_check_internal_page() for every internal page.
+ *
+ * This check allocates memory context and scans through
+ * GiST graph. This scan is performed in a depth-first search using a stack of
+ * GistScanItem-s. Initially this stack contains only root block number. On
+ * each iteration top block numbmer is replcaed by referenced block numbers.
+ *
+ *
+ * gist_check_internal_page() in it's turn takes every tuple and tries to
+ * adjust it by tuples on referenced child page. Parent gist tuple should
+ * never require any adjustments.
+ */
+static void
+gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly)
+{
+ BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+ GistScanItem *stack;
+ MemoryContext mctx;
+ MemoryContext oldcontext;
+ GISTSTATE *state;
+ bool heapallindexed = *((bool *) callback_state);
+ GistCheckState *check_state = palloc0(sizeof(GistCheckState));
+
+ mctx = AllocSetContextCreate(CurrentMemoryContext,
+ "amcheck context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(mctx);
+
+ state = initGISTstate(rel);
+
+ check_state->state = state;
+ check_state->rel = rel;
+ check_state->heaprel = heaprel;
+
+ /*
+ * We don't know the height of the tree yet, but as soon as we encounter a
+ * leaf page, we will set 'leafdepth' to its depth.
+ */
+ check_state->leafdepth = -1;
+
+ check_state->totalblocks = RelationGetNumberOfBlocks(rel);
+ /* report every 100 blocks or 5%, whichever is bigger */
+ check_state->deltablocks = Max(check_state->totalblocks / 20, 100);
+
+ if (heapallindexed)
+ giststate_init_heapallindexed(rel, check_state);
+
+ /* Start the scan at the root page */
+ stack = (GistScanItem *) palloc0(sizeof(GistScanItem));
+ stack->depth = 0;
+ stack->parenttup = NULL;
+ stack->parentblk = InvalidBlockNumber;
+ stack->parentlsn = InvalidXLogRecPtr;
+ stack->blkno = GIST_ROOT_BLKNO;
+
+ /*
+ * This GiST scan is effectively "old" VACUUM version before commit
+ * fe280694d which introduced physical order scanning.
+ */
+
+ while (stack)
+ {
+ GistScanItem *stack_next;
+ Buffer buffer;
+ Page page;
+ XLogRecPtr lsn;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Report progress */
+ if (check_state->scannedblocks > check_state->reportedblocks +
+ check_state->deltablocks)
+ {
+ elog(DEBUG1, "verified level %u blocks of approximately %u total",
+ check_state->scannedblocks, check_state->totalblocks);
+ check_state->reportedblocks = check_state->scannedblocks;
+ }
+ check_state->scannedblocks++;
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+ RBM_NORMAL, strategy);
+ LockBuffer(buffer, GIST_SHARE);
+ page = (Page) BufferGetPage(buffer);
+ lsn = BufferGetLSNAtomic(buffer);
+
+ /* Do basic sanity checks on the page headers */
+ check_index_page(rel, buffer, stack->blkno);
+
+ /*
+ * It's possible that the page was split since we looked at the
+ * parent, so that we didn't missed the downlink of the right sibling
+ * when we scanned the parent. If so, add the right sibling to the
+ * stack now.
+ */
+ if (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page))
+ {
+ /* split page detected, install right link to the stack */
+ GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+
+ ptr->depth = stack->depth;
+ ptr->parenttup = CopyIndexTuple(stack->parenttup);
+ ptr->parentblk = stack->parentblk;
+ ptr->parentlsn = stack->parentlsn;
+ ptr->blkno = GistPageGetOpaque(page)->rightlink;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+
+ gist_check_page(check_state, stack, page, heapallindexed, strategy);
+
+ if (!GistPageIsLeaf(page))
+ {
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ /* Internal page, so recurse to the child */
+ GistScanItem *ptr;
+ ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+ ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+ ptr->depth = stack->depth + 1;
+ ptr->parenttup = CopyIndexTuple(idxtuple);
+ ptr->parentblk = stack->blkno;
+ ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+ ptr->parentlsn = lsn;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ }
+
+ LockBuffer(buffer, GIST_UNLOCK);
+ ReleaseBuffer(buffer);
+
+ /* Step to next item in the queue */
+ stack_next = stack->next;
+ if (stack->parenttup)
+ pfree(stack->parenttup);
+ pfree(stack);
+ stack = stack_next;
+ }
+
+ if (heapallindexed)
+ {
+ IndexInfo *indexinfo = BuildIndexInfo(rel);
+ TableScanDesc scan;
+
+ scan = table_beginscan_strat(heaprel, /* relation */
+ check_state->snapshot, /* snapshot */
+ 0, /* number of keys */
+ NULL, /* scan key */
+ true, /* buffer access strategy OK */
+ true); /* syncscan OK? */
+
+ /*
+ * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY.
+ */
+ indexinfo->ii_Concurrent = true;
+
+ indexinfo->ii_Unique = false;
+ indexinfo->ii_ExclusionOps = NULL;
+ indexinfo->ii_ExclusionProcs = NULL;
+ indexinfo->ii_ExclusionStrats = NULL;
+
+ elog(DEBUG1, "verifying that tuples from index \"%s\" are present in \"%s\"",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(heaprel));
+
+ table_index_build_scan(heaprel, rel, indexinfo, true, false,
+ gist_tuple_present_callback, (void *) check_state, scan);
+
+ ereport(DEBUG1,
+ (errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set",
+ check_state->heaptuplespresent,
+ RelationGetRelationName(heaprel),
+ 100.0 * bloom_prop_bits_set(check_state->filter))));
+
+ UnregisterSnapshot(check_state->snapshot);
+ bloom_free(check_state->filter);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(mctx);
+ pfree(check_state);
+}
+
+static void
+gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed, BufferAccessStrategy strategy)
+{
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ /* Check that the tree has the same height in all branches */
+ if (GistPageIsLeaf(page))
+ {
+ if (check_state->leafdepth == -1)
+ check_state->leafdepth = stack->depth;
+ else if (stack->depth != check_state->leafdepth)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+ RelationGetRelationName(check_state->rel), stack->blkno)));
+ }
+
+ /*
+ * Check that each tuple looks valid, and is consistent with the downlink
+ * we followed when we stepped on this page.
+ */
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ ItemId iid = PageGetItemIdCareful(check_state->rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+ IndexTuple tmpTuple = NULL;
+
+ /*
+ * Check that it's not a leftover invalid tuple from pre-9.1 See also
+ * gistdoinsert() and gistbulkdelete() handling of such tuples. We do
+ * consider it error here.
+ */
+ if (GistTupleIsInvalid(idxtuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("index \"%s\" contains an inner tuple marked as invalid, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i),
+ errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
+ errhint("Please REINDEX it.")));
+
+ if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+
+ /*
+ * Check if this tuple is consistent with the downlink in the parent.
+ */
+ if (stack->parenttup)
+ tmpTuple = gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state);
+
+ if (tmpTuple)
+ {
+ /*
+ * There was a discrepancy between parent and child tuples. We
+ * need to verify it is not a result of concurrent call of
+ * gistplacetopage(). So, lock parent and try to find downlink for
+ * current page. It may be missing due to concurrent page split,
+ * this is OK.
+ *
+ * Note that when we aquire parent tuple now we hold lock for both
+ * parent and child buffers. Thus parent tuple must include
+ * keyspace of the child.
+ */
+
+ pfree(tmpTuple);
+ pfree(stack->parenttup);
+ stack->parenttup = gist_refind_parent(check_state->rel, stack->parentblk,
+ stack->blkno, strategy);
+
+ /* We found it - make a final check before failing */
+ if (!stack->parenttup)
+ elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
+ stack->blkno, stack->parentblk);
+ else if (gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+ else
+ {
+ /*
+ * But now it is properly adjusted - nothing to do here.
+ */
+ }
+ }
+
+ if (GistPageIsLeaf(page))
+ {
+ if (heapallindexed)
+ bloom_add_element(check_state->filter,
+ (unsigned char *) idxtuple,
+ IndexTupleSize(idxtuple));
+ }
+ else
+ {
+ OffsetNumber off = ItemPointerGetOffsetNumber(&(idxtuple->t_tid));
+
+ if (off != 0xffff)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has on page %u offset %u has item id not pointing to 0xffff, but %hu",
+ RelationGetRelationName(check_state->rel), stack->blkno, i, off)));
+ }
+ }
+}
+
+/*
+ * gistFormNormalizedTuple - analogue to gistFormTuple, but performs deTOASTing
+ * of all included data (for covering indexes). While we do not expected
+ * toasted attributes in normal index, this can happen as a result of
+ * intervention into system catalog. Detoasting of key attributes is expected
+ * to be done by opclass decompression methods, if indexed type might be
+ * toasted.
+ */
+static IndexTuple
+gistFormNormalizedTuple(GISTSTATE *giststate, Relation r,
+ Datum *attdata, bool *isnull, ItemPointerData tid)
+{
+ Datum compatt[INDEX_MAX_KEYS];
+ IndexTuple res;
+
+ gistCompressValues(giststate, r, attdata, isnull, true, compatt);
+
+ for (int i = 0; i < r->rd_att->natts; i++)
+ {
+ Form_pg_attribute att;
+
+ att = TupleDescAttr(giststate->leafTupdesc, i);
+ if (att->attbyval || att->attlen != -1 || isnull[i])
+ continue;
+
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(compatt[i])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
+ ItemPointerGetBlockNumber(&tid),
+ ItemPointerGetOffsetNumber(&tid),
+ RelationGetRelationName(r))));
+ if (VARATT_IS_COMPRESSED(DatumGetPointer(compatt[i])))
+ {
+ /* Datum old = compatt[i]; */
+ /* Key attributes must never be compressed */
+ if (i < IndexRelationGetNumberOfKeyAttributes(r))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("compressed varlena datum in tuple key that references heap row (%u,%u) in index \"%s\"",
+ ItemPointerGetBlockNumber(&tid),
+ ItemPointerGetOffsetNumber(&tid),
+ RelationGetRelationName(r))));
+
+ compatt[i] = PointerGetDatum(PG_DETOAST_DATUM(compatt[i]));
+ /* pfree(DatumGetPointer(old)); // TODO: this fails. Why? */
+ }
+ }
+
+ res = index_form_tuple(giststate->leafTupdesc, compatt, isnull);
+
+ /*
+ * The offset number on tuples on internal pages is unused. For historical
+ * reasons, it is set to 0xffff.
+ */
+ ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
+ return res;
+}
+
+static void
+gist_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
+ bool *isnull, bool tupleIsAlive, void *checkstate)
+{
+ GistCheckState *state = (GistCheckState *) checkstate;
+ IndexTuple itup = gistFormNormalizedTuple(state->state, index, values, isnull, *tid);
+
+ itup->t_tid = *tid;
+ /* Probe Bloom filter -- tuple should be present */
+ if (bloom_lacks_element(state->filter, (unsigned char *) itup,
+ IndexTupleSize(itup)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
+ ItemPointerGetBlockNumber(&(itup->t_tid)),
+ ItemPointerGetOffsetNumber(&(itup->t_tid)),
+ RelationGetRelationName(state->heaprel),
+ RelationGetRelationName(state->rel))));
+
+ state->heaptuplespresent++;
+
+ pfree(itup);
+}
+
+/*
+ * check_index_page - verification of basic invariants about GiST page data
+ * This function does no any tuple analysis.
+ */
+static void
+check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
+{
+ Page page = BufferGetPage(buffer);
+
+ gistcheckpage(rel, buffer);
+
+ if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has corrupted page %d",
+ RelationGetRelationName(rel), blockNo)));
+
+ if (GistPageIsDeleted(page))
+ {
+ if (!GistPageIsLeaf(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted internal page %d",
+ RelationGetRelationName(rel), blockNo)));
+ if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted page %d with tuples",
+ RelationGetRelationName(rel), blockNo)));
+ }
+ else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has page %d with exceeding count of tuples",
+ RelationGetRelationName(rel), blockNo)));
+}
+
+/*
+ * Try to re-find downlink pointing to 'blkno', in 'parentblkno'.
+ *
+ * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
+ * returns NULL.
+ */
+static IndexTuple
+gist_refind_parent(Relation rel,
+ BlockNumber parentblkno, BlockNumber childblkno,
+ BufferAccessStrategy strategy)
+{
+ Buffer parentbuf;
+ Page parentpage;
+ OffsetNumber parent_maxoff;
+ IndexTuple result = NULL;
+
+ parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
+ strategy);
+
+ LockBuffer(parentbuf, GIST_SHARE);
+ parentpage = BufferGetPage(parentbuf);
+
+ if (GistPageIsLeaf(parentpage))
+ {
+ /*
+ * Currently GiST never deletes internal pages, thus they can never
+ * become leaf.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" internal page %d became leaf",
+ RelationGetRelationName(rel), parentblkno)));
+ }
+
+ parent_maxoff = PageGetMaxOffsetNumber(parentpage);
+ for (OffsetNumber o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o))
+ {
+ ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o);
+ IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid);
+
+ if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
+ {
+ /*
+ * Found it! Make copy and return it while both parent and child
+ * pages are locked. This guaranties that at this particular
+ * moment tuples must be coherent to each other.
+ */
+ result = CopyIndexTuple(itup);
+ break;
+ }
+ }
+
+ UnlockReleaseBuffer(parentbuf);
+
+ return result;
+}
+
+static ItemId
+PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
+ OffsetNumber offset)
+{
+ ItemId itemid = PageGetItemId(page, offset);
+
+ if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
+ BLCKSZ - MAXALIGN(sizeof(GISTPageOpaqueData)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("line pointer points past end of tuple space in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ /*
+ * Verify that line pointer isn't LP_REDIRECT or LP_UNUSED, since gist
+ * never uses either. Verify that line pointer has storage, too, since
+ * even LP_DEAD items should.
+ */
+ if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
+ ItemIdGetLength(itemid) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("invalid line pointer storage in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ return itemid;
+}
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index 211a0ae1945..e7bce17f1ec 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -208,6 +208,25 @@ ORDER BY c.relpages DESC LIMIT 10;
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ <function>gist_index_check(index regclass, heapallindexed boolean) returns void</function>
+ <indexterm>
+ <primary>gist_index_check</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ <function>gist_index_check</function> tests that its target GiST
+ has consistent parent-child tuples relations (no parent tuples
+ require tuple adjustement) and page graph respects balanced-tree
+ invariants (internal pages reference only leaf page or only internal
+ pages).
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<tip>
<para>
--
2.39.5 (Apple Git-154)
On 30 Jun 2025, at 16:34, Andrey Borodin <x4mmm@yandex-team.ru> wrote:
Please find attached two new steps for amcheck:
1. A function to verify GiST integrity. This patch is in decent shape, simply rebased from previous year.
2. Support on pg_amcheck's side for this function. This patch did not receive such review attention before. And, perhaps, should be extended to support existing GIN functions.
Here's a version that adds GIN functions to pg_amcheck.
IDK, maybe we should split pg_amcheck part into another thread and add there BRIN too...
Thanks!
Best regards, Andrey Borodin.
Attachments:
v2025-07-11-0003-GIN-in-pg_amcheck.patchapplication/octet-stream; name=v2025-07-11-0003-GIN-in-pg_amcheck.patch; x-unix-mode=0644Download
From 89660718827121c302ca35543babcafc7c92daf4 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Wed, 2 Jul 2025 21:21:26 +0300
Subject: [PATCH v2025-07-11 3/3] GIN in pg_amcheck
---
src/bin/pg_amcheck/pg_amcheck.c | 63 +++++++++++++++++++++++++------
src/bin/pg_amcheck/t/003_check.pl | 56 ++++++++++++++++++---------
2 files changed, 90 insertions(+), 29 deletions(-)
diff --git a/src/bin/pg_amcheck/pg_amcheck.c b/src/bin/pg_amcheck/pg_amcheck.c
index 2575178cd1a..272d0fde708 100644
--- a/src/bin/pg_amcheck/pg_amcheck.c
+++ b/src/bin/pg_amcheck/pg_amcheck.c
@@ -151,6 +151,7 @@ typedef struct DatabaseInfo
char *amcheck_schema; /* escaped, quoted literal */
bool is_checkunique;
bool gist_supported;
+ bool gin_supported;
} DatabaseInfo;
typedef struct RelationInfo
@@ -181,6 +182,8 @@ static void prepare_btree_command(PQExpBuffer sql, RelationInfo *rel,
PGconn *conn);
static void prepare_gist_command(PQExpBuffer sql, RelationInfo *rel,
PGconn *conn);
+static void prepare_gin_command(PQExpBuffer sql, RelationInfo *rel,
+ PGconn *conn);
static void run_command(ParallelSlot *slot, const char *sql);
static bool verify_heap_slot_handler(PGresult *res, PGconn *conn,
void *context);
@@ -291,6 +294,7 @@ main(int argc, char *argv[])
int encoding = pg_get_encoding_from_locale(NULL, false);
ConnParams cparams;
bool gist_warn_printed = false;
+ bool gin_warn_printed = false;
pg_logging_init(argv[0]);
progname = get_progname(argv[0]);
@@ -634,6 +638,9 @@ main(int argc, char *argv[])
/* GiST indexes are supported in 1.6+ */
dat->gist_supported = ((vmaj == 1 && vmin >= 6) || vmaj > 1);
+ /* GIN indexes are supported in 1.5+ */
+ dat->gin_supported = ((vmaj == 1 && vmin >= 5) || vmaj > 1);
+
PQclear(result);
compile_relation_list_one_db(conn, &relations, dat, &pagestotal);
@@ -805,6 +812,17 @@ main(int argc, char *argv[])
gist_warn_printed = true;
}
}
+ else if (rel->amoid == GIN_AM_OID)
+ {
+ if (rel->datinfo->gin_supported)
+ prepare_gin_command(&sql, rel, free_slot->connection);
+ else
+ {
+ if (!gin_warn_printed)
+ pg_log_warning("GIN verification is not supported by installed amcheck version");
+ gin_warn_printed = true;
+ }
+ }
else
/* should not happen at this stage */
pg_log_info("Verification of index type %u not supported",
@@ -956,6 +974,27 @@ prepare_gist_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
rel->reloid);
}
+/*
+ * prepare_gin_command
+ * Similar to btree equivalent prepares command to check GIN index.
+ */
+static void
+prepare_gin_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
+{
+ resetPQExpBuffer(sql);
+
+ appendPQExpBuffer(sql,
+ "SELECT %s.gin_index_check("
+ "index := c.oid)"
+ "\nFROM pg_catalog.pg_class c, pg_catalog.pg_index i "
+ "WHERE c.oid = %u "
+ "AND c.oid = i.indexrelid "
+ "AND c.relpersistence != 't' "
+ "AND i.indisready AND i.indisvalid AND i.indislive",
+ rel->datinfo->amcheck_schema,
+ rel->reloid);
+}
+
/*
* run_command
*
@@ -1968,27 +2007,27 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
appendPQExpBuffer(&sql,
"\nc.oid, c.relam as amoid, n.nspname, c.relname, "
"c.reltoastrelid, c.relpages, c.relam = %u AS is_heap, "
- "(c.relam = %u OR c.relam = %u) AS is_index"
+ "(c.relam = %u OR c.relam = %u OR c.relam = %u) AS is_index"
"\nFROM pg_catalog.pg_class c "
"INNER JOIN pg_catalog.pg_namespace n "
"ON c.relnamespace = n.oid",
- HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID);
+ HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID, GIN_AM_OID);
if (!opts.allrel)
appendPQExpBuffer(&sql,
"\nINNER JOIN include_pat ip"
"\nON (n.nspname ~ ip.nsp_regex OR ip.nsp_regex IS NULL)"
"\nAND (c.relname ~ ip.rel_regex OR ip.rel_regex IS NULL)"
"\nAND (c.relam = %u OR NOT ip.heap_only)"
- "\nAND ((c.relam = %u OR c.relam = %u) OR NOT ip.index_only)",
- HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID);
+ "\nAND ((c.relam = %u OR c.relam = %u OR c.relam = %u) OR NOT ip.index_only)",
+ HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID, GIN_AM_OID);
if (opts.excludetbl || opts.excludeidx || opts.excludensp)
appendPQExpBuffer(&sql,
"\nLEFT OUTER JOIN exclude_pat ep"
"\nON (n.nspname ~ ep.nsp_regex OR ep.nsp_regex IS NULL)"
"\nAND (c.relname ~ ep.rel_regex OR ep.rel_regex IS NULL)"
"\nAND (c.relam = %u OR NOT ep.heap_only OR ep.rel_regex IS NULL)"
- "\nAND ((c.relam = %u OR c.relam = %u) OR NOT ep.index_only OR ep.rel_regex IS NULL)",
- HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID);
+ "\nAND ((c.relam = %u OR c.relam = %u OR c.relam = %u) OR NOT ep.index_only OR ep.rel_regex IS NULL)",
+ HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID, GIN_AM_OID);
/*
* Exclude temporary tables and indexes, which must necessarily belong to
@@ -2027,7 +2066,7 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
HEAP_TABLE_AM_OID, PG_TOAST_NAMESPACE);
else
appendPQExpBuffer(&sql,
- " AND c.relam IN (%u, %u, %u)"
+ " AND c.relam IN (%u, %u, %u, %u)"
"AND c.relkind IN ("
CppAsString2(RELKIND_RELATION) ", "
CppAsString2(RELKIND_SEQUENCE) ", "
@@ -2039,10 +2078,10 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
CppAsString2(RELKIND_SEQUENCE) ", "
CppAsString2(RELKIND_MATVIEW) ", "
CppAsString2(RELKIND_TOASTVALUE) ")) OR "
- "((c.relam = %u OR c.relam = %u) AND c.relkind = "
+ "((c.relam = %u OR c.relam = %u OR c.relam = %u) AND c.relkind = "
CppAsString2(RELKIND_INDEX) "))",
- HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID,
- HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID);
+ HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID, GIN_AM_OID,
+ HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID, GIN_AM_OID);
appendPQExpBufferStr(&sql,
"\nORDER BY c.oid)");
@@ -2100,9 +2139,9 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
appendPQExpBufferStr(&sql,
"\nWHERE true");
appendPQExpBuffer(&sql,
- " AND (c.relam = %u or c.relam = %u) "
+ " AND (c.relam = %u or c.relam = %u or c.relam = %u) "
"AND c.relkind = " CppAsString2(RELKIND_INDEX),
- BTREE_AM_OID, GIST_AM_OID);
+ BTREE_AM_OID, GIST_AM_OID, GIN_AM_OID);
if (opts.no_toast_expansion)
appendPQExpBuffer(&sql,
" AND c.relnamespace != %u",
diff --git a/src/bin/pg_amcheck/t/003_check.pl b/src/bin/pg_amcheck/t/003_check.pl
index 2d6efbf8b05..f22146d1466 100644
--- a/src/bin/pg_amcheck/t/003_check.pl
+++ b/src/bin/pg_amcheck/t/003_check.pl
@@ -1,4 +1,3 @@
-
# Copyright (c) 2021-2025, PostgreSQL Global Development Group
use strict;
@@ -185,7 +184,7 @@ for my $dbname (qw(db1 db2 db3))
# schemas. The schemas are all identical to start, but
# we will corrupt them differently later.
#
- for my $schema (qw(s1 s2 s3 s4 s5 s6))
+ for my $schema (qw(s1 s2 s3 s4 s5 s6 s7))
{
$node->safe_psql(
$dbname, qq(
@@ -295,20 +294,22 @@ plan_to_remove_toast_file('db1', 's4.t2');
plan_to_remove_relation_file('db1', 's5.t1_gist');
plan_to_corrupt_first_page('db1', 's5.t2_gist');
-# Corrupt all other object types in schema "s6". We don't have amcheck support
+# Corrupt GIN index in schema "s6"
+plan_to_remove_relation_file('db1', 's6.t1_gin');
+plan_to_corrupt_first_page('db1', 's6.t2_gin');
+
+# Corrupt all other object types in schema "s7". We don't have amcheck support
# for these types, but we check that their corruption does not trigger any
# errors in pg_amcheck
-plan_to_remove_relation_file('db1', 's6.seq1');
-plan_to_remove_relation_file('db1', 's6.t1_hash');
-plan_to_remove_relation_file('db1', 's6.t1_gin');
-plan_to_remove_relation_file('db1', 's6.t1_brin');
-plan_to_remove_relation_file('db1', 's6.t1_spgist');
+plan_to_remove_relation_file('db1', 's7.seq1');
+plan_to_remove_relation_file('db1', 's7.t1_hash');
+plan_to_remove_relation_file('db1', 's7.t1_brin');
+plan_to_remove_relation_file('db1', 's7.t1_spgist');
-plan_to_corrupt_first_page('db1', 's6.seq2');
-plan_to_corrupt_first_page('db1', 's6.t2_hash');
-plan_to_corrupt_first_page('db1', 's6.t2_gin');
-plan_to_corrupt_first_page('db1', 's6.t2_brin');
-plan_to_corrupt_first_page('db1', 's6.t2_spgist');
+plan_to_corrupt_first_page('db1', 's7.seq2');
+plan_to_corrupt_first_page('db1', 's7.t2_hash');
+plan_to_corrupt_first_page('db1', 's7.t2_brin');
+plan_to_corrupt_first_page('db1', 's7.t2_spgist');
# Database 'db2' corruptions
@@ -475,10 +476,22 @@ $node->command_checks_all(
[$no_output_re],
'pg_amcheck schema s5 reports GiST index errors');
-# Check that no corruption is reported in schema db1.s6
-$node->command_checks_all([ @cmd, '-s', 's6', 'db1' ],
+# In schema db1.s6 we should see GIN corruption messages on stdout, and
+# nothing on stderr.
+#
+$node->command_checks_all(
+ [ @cmd, '-s', 's6', 'db1' ],
+ 2,
+ [
+ $missing_file_re,
+ ],
+ [$no_output_re],
+ 'pg_amcheck schema s6 reports GIN index errors');
+
+# Check that no corruption is reported in schema db1.s7
+$node->command_checks_all([ @cmd, '-s', 's7', 'db1' ],
0, [$no_output_re], [$no_output_re],
- 'pg_amcheck over schema s6 reports no corruption');
+ 'pg_amcheck over schema s7 reports no corruption');
# In schema db1.s1, only indexes are corrupt. Verify that when we exclude
# the indexes, no corruption is reported about the schema.
@@ -663,5 +676,14 @@ $node->command_checks_all(
[
qr/pg_amcheck: warning: GiST verification is not supported by installed amcheck version/
],
- 'pg_amcheck smoke test --checkunique');
+ 'pg_amcheck smoke test GiST version warning');
+
+$node->command_checks_all(
+ [ @cmd, '-s', 's6', 'db1' ],
+ 0,
+ [$no_output_re],
+ [
+ qr/pg_amcheck: warning: GIN verification is not supported by installed amcheck version/
+ ],
+ 'pg_amcheck smoke test GIN version warning');
done_testing();
--
2.39.5 (Apple Git-154)
v2025-07-11-0001-Add-gist_index_check-function-to-verify-.patchapplication/octet-stream; name=v2025-07-11-0001-Add-gist_index_check-function-to-verify-.patch; x-unix-mode=0644Download
From 8f8a85308ecb24363132cd9a44fac8fcf4267240 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Wed, 18 Jun 2025 20:33:28 +0300
Subject: [PATCH v2025-07-11 1/3] Add gist_index_check() function to verify
GiST index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This function traverses GiST with a depth-fisrt search and checks
that all downlink tuples are included into parent tuple keyspace.
This traverse takes lock of any page until some discapency found.
To re-check suspicious pair of parent and child tuples it aqcuires
locks on both parent and child pages in the same order as page
split does.
Author: Andrey Borodin <amborodin@acm.org>
Author: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-By: José Villanova <jose.arthur@gmail.com>
Reviewed-By: Aleksander Alekseev <aleksander@timescale.com>
Reviewed-By: Nikolay Samokhvalov <samokhvalov@gmail.com>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Tomas Vondra <tomas.vondra@enterprisedb.com>
Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru
---
contrib/amcheck/Makefile | 5 +-
contrib/amcheck/amcheck--1.5--1.6.sql | 14 +
contrib/amcheck/amcheck.control | 2 +-
contrib/amcheck/expected/check_gist.out | 145 +++++
contrib/amcheck/meson.build | 3 +
contrib/amcheck/sql/check_gist.sql | 62 +++
contrib/amcheck/verify_gist.c | 687 ++++++++++++++++++++++++
doc/src/sgml/amcheck.sgml | 19 +
8 files changed, 934 insertions(+), 3 deletions(-)
create mode 100644 contrib/amcheck/amcheck--1.5--1.6.sql
create mode 100644 contrib/amcheck/expected/check_gist.out
create mode 100644 contrib/amcheck/sql/check_gist.sql
create mode 100644 contrib/amcheck/verify_gist.c
diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index 1b7a63cbaa4..1f2fec95de5 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -4,16 +4,17 @@ MODULE_big = amcheck
OBJS = \
$(WIN32RES) \
verify_common.o \
+ verify_gist.o \
verify_gin.o \
verify_heapam.o \
verify_nbtree.o
EXTENSION = amcheck
DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \
- amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql
+ amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql amcheck--1.5--1.6.sql
PGFILEDESC = "amcheck - function for verifying relation integrity"
-REGRESS = check check_btree check_gin check_heap
+REGRESS = check check_btree check_gin check_gist check_heap
EXTRA_INSTALL = contrib/pg_walinspect
TAP_TESTS = 1
diff --git a/contrib/amcheck/amcheck--1.5--1.6.sql b/contrib/amcheck/amcheck--1.5--1.6.sql
new file mode 100644
index 00000000000..a6a1debff12
--- /dev/null
+++ b/contrib/amcheck/amcheck--1.5--1.6.sql
@@ -0,0 +1,14 @@
+/* contrib/amcheck/amcheck--1.5--1.6.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.6'" to load this file. \quit
+
+
+-- gist_index_check()
+--
+CREATE FUNCTION gist_index_check(index regclass, heapallindexed boolean)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'gist_index_check'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION gist_index_check(regclass,boolean) FROM PUBLIC;
diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control
index c8ba6d7c9bc..2f329ef2cf4 100644
--- a/contrib/amcheck/amcheck.control
+++ b/contrib/amcheck/amcheck.control
@@ -1,5 +1,5 @@
# amcheck extension
comment = 'functions for verifying relation integrity'
-default_version = '1.5'
+default_version = '1.6'
module_pathname = '$libdir/amcheck'
relocatable = true
diff --git a/contrib/amcheck/expected/check_gist.out b/contrib/amcheck/expected/check_gist.out
new file mode 100644
index 00000000000..cbc3e27e679
--- /dev/null
+++ b/contrib/amcheck/expected/check_gist.out
@@ -0,0 +1,145 @@
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gist_check;
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+ attstorage
+------------
+ x
+(1 row)
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
+ gist_index_check
+------------------
+
+(1 row)
+
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index 1f0c347ed54..13b36b495ed 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -5,6 +5,7 @@ amcheck_sources = files(
'verify_gin.c',
'verify_heapam.c',
'verify_nbtree.c',
+ 'verify_gist.c',
)
if host_system == 'windows'
@@ -27,6 +28,7 @@ install_data(
'amcheck--1.2--1.3.sql',
'amcheck--1.3--1.4.sql',
'amcheck--1.4--1.5.sql',
+ 'amcheck--1.5--1.6.sql',
kwargs: contrib_data_args,
)
@@ -39,6 +41,7 @@ tests += {
'check',
'check_btree',
'check_gin',
+ 'check_gist',
'check_heap',
],
},
diff --git a/contrib/amcheck/sql/check_gist.sql b/contrib/amcheck/sql/check_gist.sql
new file mode 100644
index 00000000000..37966423b8b
--- /dev/null
+++ b/contrib/amcheck/sql/check_gist.sql
@@ -0,0 +1,62 @@
+
+SELECT setseed(1);
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+-- cleanup
+DROP TABLE gist_check;
+
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
\ No newline at end of file
diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c
new file mode 100644
index 00000000000..477150ac802
--- /dev/null
+++ b/contrib/amcheck/verify_gist.c
@@ -0,0 +1,687 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_gist.c
+ * Verifies the integrity of GiST indexes based on invariants.
+ *
+ * Verification checks that all paths in GiST graph contain
+ * consistent keys: tuples on parent pages consistently include tuples
+ * from children pages. Also, verification checks graph invariants:
+ * internal page must have at least one downlinks, internal page can
+ * reference either only leaf pages or only internal pages.
+ *
+ *
+ * Copyright (c) 2017-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/verify_gist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gist_private.h"
+#include "access/tableam.h"
+#include "catalog/index.h"
+#include "catalog/pg_am.h"
+#include "common/pg_prng.h"
+#include "lib/bloomfilter.h"
+#include "verify_common.h"
+#include "utils/memutils.h"
+
+
+/*
+ * GistScanItem represents one item of depth-first scan of GiST index.
+ */
+typedef struct GistScanItem
+{
+ int depth;
+
+ /* Referenced block number to check next */
+ BlockNumber blkno;
+
+ /*
+ * Correctess of this parent tuple will be checked against contents of
+ * referenced page. This tuple will be NULL for root block.
+ */
+ IndexTuple parenttup;
+
+ /*
+ * LSN to hande concurrent scan of the page. It's necessary to avoid
+ * missing some subtrees from page, that was split just before we read it.
+ */
+ XLogRecPtr parentlsn;
+
+ /*
+ * Reference to parent page for re-locking in case of found parent-child
+ * tuple discrepencies.
+ */
+ BlockNumber parentblk;
+
+ /* Pointer to a next stack item. */
+ struct GistScanItem *next;
+} GistScanItem;
+
+typedef struct GistCheckState
+{
+ /* GiST state */
+ GISTSTATE *state;
+ /* Bloom filter fingerprints index tuples */
+ bloom_filter *filter;
+
+ Snapshot snapshot;
+ Relation rel;
+ Relation heaprel;
+
+ /* Debug counter for reporting percentage of work already done */
+ int64 heaptuplespresent;
+
+ /* progress reporting stuff */
+ BlockNumber totalblocks;
+ BlockNumber reportedblocks;
+ BlockNumber scannedblocks;
+ BlockNumber deltablocks;
+
+ int leafdepth;
+} GistCheckState;
+
+PG_FUNCTION_INFO_V1(gist_index_check);
+
+static void giststate_init_heapallindexed(Relation rel, GistCheckState * result);
+static void gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly);
+static void gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed,
+ BufferAccessStrategy strategy);
+static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
+static IndexTuple gist_refind_parent(Relation rel, BlockNumber parentblkno,
+ BlockNumber childblkno,
+ BufferAccessStrategy strategy);
+static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block,
+ Page page, OffsetNumber offset);
+static void gist_tuple_present_callback(Relation index, ItemPointer tid,
+ Datum *values, bool *isnull,
+ bool tupleIsAlive, void *checkstate);
+static IndexTuple gistFormNormalizedTuple(GISTSTATE *giststate, Relation r,
+ Datum *attdata, bool *isnull, ItemPointerData tid);
+
+/*
+ * gist_index_check(index regclass)
+ *
+ * Verify integrity of GiST index.
+ *
+ * Acquires AccessShareLock on heap & index relations.
+ */
+Datum
+gist_index_check(PG_FUNCTION_ARGS)
+{
+ Oid indrelid = PG_GETARG_OID(0);
+ bool heapallindexed = PG_GETARG_BOOL(1);
+
+ amcheck_lock_relation_and_check(indrelid,
+ GIST_AM_OID,
+ gist_check_parent_keys_consistency,
+ AccessShareLock,
+ &heapallindexed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+* Initaliaze GIST state filed needed to perform.
+* This initialized bloom filter and snapshot.
+*/
+static void
+giststate_init_heapallindexed(Relation rel, GistCheckState * result)
+{
+ int64 total_pages;
+ int64 total_elems;
+ uint64 seed;
+
+ /*
+ * Size Bloom filter based on estimated number of tuples in index. This
+ * logic is similar to B-tree, see verify_btree.c .
+ */
+ total_pages = result->totalblocks;
+ total_elems = Max(total_pages * (MaxOffsetNumber / 5),
+ (int64) rel->rd_rel->reltuples);
+ seed = pg_prng_uint64(&pg_global_prng_state);
+ result->filter = bloom_create(total_elems, maintenance_work_mem, seed);
+
+ result->snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+
+ /*
+ * GetTransactionSnapshot() always acquires a new MVCC snapshot in READ
+ * COMMITTED mode. A new snapshot is guaranteed to have all the entries
+ * it requires in the index.
+ *
+ * We must defend against the possibility that an old xact snapshot was
+ * returned at higher isolation levels when that snapshot is not safe for
+ * index scans of the target index. This is possible when the snapshot
+ * sees tuples that are before the index's indcheckxmin horizon. Throwing
+ * an error here should be very rare. It doesn't seem worth using a
+ * secondary snapshot to avoid this.
+ */
+ if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
+ !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+ result->snapshot->xmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("index \"%s\" cannot be verified using transaction snapshot",
+ RelationGetRelationName(rel))));
+}
+
+/*
+ * Main entry point for GiST check.
+ *
+ * This function verifies that tuples of internal pages cover all
+ * the key space of each tuple on leaf page. To do this we invoke
+ * gist_check_internal_page() for every internal page.
+ *
+ * This check allocates memory context and scans through
+ * GiST graph. This scan is performed in a depth-first search using a stack of
+ * GistScanItem-s. Initially this stack contains only root block number. On
+ * each iteration top block numbmer is replcaed by referenced block numbers.
+ *
+ *
+ * gist_check_internal_page() in it's turn takes every tuple and tries to
+ * adjust it by tuples on referenced child page. Parent gist tuple should
+ * never require any adjustments.
+ */
+static void
+gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly)
+{
+ BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+ GistScanItem *stack;
+ MemoryContext mctx;
+ MemoryContext oldcontext;
+ GISTSTATE *state;
+ bool heapallindexed = *((bool *) callback_state);
+ GistCheckState *check_state = palloc0(sizeof(GistCheckState));
+
+ mctx = AllocSetContextCreate(CurrentMemoryContext,
+ "amcheck context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(mctx);
+
+ state = initGISTstate(rel);
+
+ check_state->state = state;
+ check_state->rel = rel;
+ check_state->heaprel = heaprel;
+
+ /*
+ * We don't know the height of the tree yet, but as soon as we encounter a
+ * leaf page, we will set 'leafdepth' to its depth.
+ */
+ check_state->leafdepth = -1;
+
+ check_state->totalblocks = RelationGetNumberOfBlocks(rel);
+ /* report every 100 blocks or 5%, whichever is bigger */
+ check_state->deltablocks = Max(check_state->totalblocks / 20, 100);
+
+ if (heapallindexed)
+ giststate_init_heapallindexed(rel, check_state);
+
+ /* Start the scan at the root page */
+ stack = (GistScanItem *) palloc0(sizeof(GistScanItem));
+ stack->depth = 0;
+ stack->parenttup = NULL;
+ stack->parentblk = InvalidBlockNumber;
+ stack->parentlsn = InvalidXLogRecPtr;
+ stack->blkno = GIST_ROOT_BLKNO;
+
+ /*
+ * This GiST scan is effectively "old" VACUUM version before commit
+ * fe280694d which introduced physical order scanning.
+ */
+
+ while (stack)
+ {
+ GistScanItem *stack_next;
+ Buffer buffer;
+ Page page;
+ XLogRecPtr lsn;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Report progress */
+ if (check_state->scannedblocks > check_state->reportedblocks +
+ check_state->deltablocks)
+ {
+ elog(DEBUG1, "verified level %u blocks of approximately %u total",
+ check_state->scannedblocks, check_state->totalblocks);
+ check_state->reportedblocks = check_state->scannedblocks;
+ }
+ check_state->scannedblocks++;
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+ RBM_NORMAL, strategy);
+ LockBuffer(buffer, GIST_SHARE);
+ page = (Page) BufferGetPage(buffer);
+ lsn = BufferGetLSNAtomic(buffer);
+
+ /* Do basic sanity checks on the page headers */
+ check_index_page(rel, buffer, stack->blkno);
+
+ /*
+ * It's possible that the page was split since we looked at the
+ * parent, so that we didn't missed the downlink of the right sibling
+ * when we scanned the parent. If so, add the right sibling to the
+ * stack now.
+ */
+ if (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page))
+ {
+ /* split page detected, install right link to the stack */
+ GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+
+ ptr->depth = stack->depth;
+ ptr->parenttup = CopyIndexTuple(stack->parenttup);
+ ptr->parentblk = stack->parentblk;
+ ptr->parentlsn = stack->parentlsn;
+ ptr->blkno = GistPageGetOpaque(page)->rightlink;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+
+ gist_check_page(check_state, stack, page, heapallindexed, strategy);
+
+ if (!GistPageIsLeaf(page))
+ {
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ /* Internal page, so recurse to the child */
+ GistScanItem *ptr;
+ ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+ ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+ ptr->depth = stack->depth + 1;
+ ptr->parenttup = CopyIndexTuple(idxtuple);
+ ptr->parentblk = stack->blkno;
+ ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+ ptr->parentlsn = lsn;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ }
+
+ LockBuffer(buffer, GIST_UNLOCK);
+ ReleaseBuffer(buffer);
+
+ /* Step to next item in the queue */
+ stack_next = stack->next;
+ if (stack->parenttup)
+ pfree(stack->parenttup);
+ pfree(stack);
+ stack = stack_next;
+ }
+
+ if (heapallindexed)
+ {
+ IndexInfo *indexinfo = BuildIndexInfo(rel);
+ TableScanDesc scan;
+
+ scan = table_beginscan_strat(heaprel, /* relation */
+ check_state->snapshot, /* snapshot */
+ 0, /* number of keys */
+ NULL, /* scan key */
+ true, /* buffer access strategy OK */
+ true); /* syncscan OK? */
+
+ /*
+ * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY.
+ */
+ indexinfo->ii_Concurrent = true;
+
+ indexinfo->ii_Unique = false;
+ indexinfo->ii_ExclusionOps = NULL;
+ indexinfo->ii_ExclusionProcs = NULL;
+ indexinfo->ii_ExclusionStrats = NULL;
+
+ elog(DEBUG1, "verifying that tuples from index \"%s\" are present in \"%s\"",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(heaprel));
+
+ table_index_build_scan(heaprel, rel, indexinfo, true, false,
+ gist_tuple_present_callback, (void *) check_state, scan);
+
+ ereport(DEBUG1,
+ (errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set",
+ check_state->heaptuplespresent,
+ RelationGetRelationName(heaprel),
+ 100.0 * bloom_prop_bits_set(check_state->filter))));
+
+ UnregisterSnapshot(check_state->snapshot);
+ bloom_free(check_state->filter);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(mctx);
+ pfree(check_state);
+}
+
+static void
+gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed, BufferAccessStrategy strategy)
+{
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ /* Check that the tree has the same height in all branches */
+ if (GistPageIsLeaf(page))
+ {
+ if (check_state->leafdepth == -1)
+ check_state->leafdepth = stack->depth;
+ else if (stack->depth != check_state->leafdepth)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+ RelationGetRelationName(check_state->rel), stack->blkno)));
+ }
+
+ /*
+ * Check that each tuple looks valid, and is consistent with the downlink
+ * we followed when we stepped on this page.
+ */
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ ItemId iid = PageGetItemIdCareful(check_state->rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+ IndexTuple tmpTuple = NULL;
+
+ /*
+ * Check that it's not a leftover invalid tuple from pre-9.1 See also
+ * gistdoinsert() and gistbulkdelete() handling of such tuples. We do
+ * consider it error here.
+ */
+ if (GistTupleIsInvalid(idxtuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("index \"%s\" contains an inner tuple marked as invalid, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i),
+ errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
+ errhint("Please REINDEX it.")));
+
+ if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+
+ /*
+ * Check if this tuple is consistent with the downlink in the parent.
+ */
+ if (stack->parenttup)
+ tmpTuple = gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state);
+
+ if (tmpTuple)
+ {
+ /*
+ * There was a discrepancy between parent and child tuples. We
+ * need to verify it is not a result of concurrent call of
+ * gistplacetopage(). So, lock parent and try to find downlink for
+ * current page. It may be missing due to concurrent page split,
+ * this is OK.
+ *
+ * Note that when we aquire parent tuple now we hold lock for both
+ * parent and child buffers. Thus parent tuple must include
+ * keyspace of the child.
+ */
+
+ pfree(tmpTuple);
+ pfree(stack->parenttup);
+ stack->parenttup = gist_refind_parent(check_state->rel, stack->parentblk,
+ stack->blkno, strategy);
+
+ /* We found it - make a final check before failing */
+ if (!stack->parenttup)
+ elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
+ stack->blkno, stack->parentblk);
+ else if (gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+ else
+ {
+ /*
+ * But now it is properly adjusted - nothing to do here.
+ */
+ }
+ }
+
+ if (GistPageIsLeaf(page))
+ {
+ if (heapallindexed)
+ bloom_add_element(check_state->filter,
+ (unsigned char *) idxtuple,
+ IndexTupleSize(idxtuple));
+ }
+ else
+ {
+ OffsetNumber off = ItemPointerGetOffsetNumber(&(idxtuple->t_tid));
+
+ if (off != 0xffff)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has on page %u offset %u has item id not pointing to 0xffff, but %hu",
+ RelationGetRelationName(check_state->rel), stack->blkno, i, off)));
+ }
+ }
+}
+
+/*
+ * gistFormNormalizedTuple - analogue to gistFormTuple, but performs deTOASTing
+ * of all included data (for covering indexes). While we do not expected
+ * toasted attributes in normal index, this can happen as a result of
+ * intervention into system catalog. Detoasting of key attributes is expected
+ * to be done by opclass decompression methods, if indexed type might be
+ * toasted.
+ */
+static IndexTuple
+gistFormNormalizedTuple(GISTSTATE *giststate, Relation r,
+ Datum *attdata, bool *isnull, ItemPointerData tid)
+{
+ Datum compatt[INDEX_MAX_KEYS];
+ IndexTuple res;
+
+ gistCompressValues(giststate, r, attdata, isnull, true, compatt);
+
+ for (int i = 0; i < r->rd_att->natts; i++)
+ {
+ Form_pg_attribute att;
+
+ att = TupleDescAttr(giststate->leafTupdesc, i);
+ if (att->attbyval || att->attlen != -1 || isnull[i])
+ continue;
+
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(compatt[i])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
+ ItemPointerGetBlockNumber(&tid),
+ ItemPointerGetOffsetNumber(&tid),
+ RelationGetRelationName(r))));
+ if (VARATT_IS_COMPRESSED(DatumGetPointer(compatt[i])))
+ {
+ /* Datum old = compatt[i]; */
+ /* Key attributes must never be compressed */
+ if (i < IndexRelationGetNumberOfKeyAttributes(r))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("compressed varlena datum in tuple key that references heap row (%u,%u) in index \"%s\"",
+ ItemPointerGetBlockNumber(&tid),
+ ItemPointerGetOffsetNumber(&tid),
+ RelationGetRelationName(r))));
+
+ compatt[i] = PointerGetDatum(PG_DETOAST_DATUM(compatt[i]));
+ /* pfree(DatumGetPointer(old)); // TODO: this fails. Why? */
+ }
+ }
+
+ res = index_form_tuple(giststate->leafTupdesc, compatt, isnull);
+
+ /*
+ * The offset number on tuples on internal pages is unused. For historical
+ * reasons, it is set to 0xffff.
+ */
+ ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
+ return res;
+}
+
+static void
+gist_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
+ bool *isnull, bool tupleIsAlive, void *checkstate)
+{
+ GistCheckState *state = (GistCheckState *) checkstate;
+ IndexTuple itup = gistFormNormalizedTuple(state->state, index, values, isnull, *tid);
+
+ itup->t_tid = *tid;
+ /* Probe Bloom filter -- tuple should be present */
+ if (bloom_lacks_element(state->filter, (unsigned char *) itup,
+ IndexTupleSize(itup)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
+ ItemPointerGetBlockNumber(&(itup->t_tid)),
+ ItemPointerGetOffsetNumber(&(itup->t_tid)),
+ RelationGetRelationName(state->heaprel),
+ RelationGetRelationName(state->rel))));
+
+ state->heaptuplespresent++;
+
+ pfree(itup);
+}
+
+/*
+ * check_index_page - verification of basic invariants about GiST page data
+ * This function does no any tuple analysis.
+ */
+static void
+check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
+{
+ Page page = BufferGetPage(buffer);
+
+ gistcheckpage(rel, buffer);
+
+ if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has corrupted page %d",
+ RelationGetRelationName(rel), blockNo)));
+
+ if (GistPageIsDeleted(page))
+ {
+ if (!GistPageIsLeaf(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted internal page %d",
+ RelationGetRelationName(rel), blockNo)));
+ if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted page %d with tuples",
+ RelationGetRelationName(rel), blockNo)));
+ }
+ else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has page %d with exceeding count of tuples",
+ RelationGetRelationName(rel), blockNo)));
+}
+
+/*
+ * Try to re-find downlink pointing to 'blkno', in 'parentblkno'.
+ *
+ * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
+ * returns NULL.
+ */
+static IndexTuple
+gist_refind_parent(Relation rel,
+ BlockNumber parentblkno, BlockNumber childblkno,
+ BufferAccessStrategy strategy)
+{
+ Buffer parentbuf;
+ Page parentpage;
+ OffsetNumber parent_maxoff;
+ IndexTuple result = NULL;
+
+ parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
+ strategy);
+
+ LockBuffer(parentbuf, GIST_SHARE);
+ parentpage = BufferGetPage(parentbuf);
+
+ if (GistPageIsLeaf(parentpage))
+ {
+ /*
+ * Currently GiST never deletes internal pages, thus they can never
+ * become leaf.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" internal page %d became leaf",
+ RelationGetRelationName(rel), parentblkno)));
+ }
+
+ parent_maxoff = PageGetMaxOffsetNumber(parentpage);
+ for (OffsetNumber o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o))
+ {
+ ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o);
+ IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid);
+
+ if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
+ {
+ /*
+ * Found it! Make copy and return it while both parent and child
+ * pages are locked. This guaranties that at this particular
+ * moment tuples must be coherent to each other.
+ */
+ result = CopyIndexTuple(itup);
+ break;
+ }
+ }
+
+ UnlockReleaseBuffer(parentbuf);
+
+ return result;
+}
+
+static ItemId
+PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
+ OffsetNumber offset)
+{
+ ItemId itemid = PageGetItemId(page, offset);
+
+ if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
+ BLCKSZ - MAXALIGN(sizeof(GISTPageOpaqueData)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("line pointer points past end of tuple space in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ /*
+ * Verify that line pointer isn't LP_REDIRECT or LP_UNUSED, since gist
+ * never uses either. Verify that line pointer has storage, too, since
+ * even LP_DEAD items should.
+ */
+ if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
+ ItemIdGetLength(itemid) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("invalid line pointer storage in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ return itemid;
+}
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index 211a0ae1945..e7bce17f1ec 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -208,6 +208,25 @@ ORDER BY c.relpages DESC LIMIT 10;
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ <function>gist_index_check(index regclass, heapallindexed boolean) returns void</function>
+ <indexterm>
+ <primary>gist_index_check</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ <function>gist_index_check</function> tests that its target GiST
+ has consistent parent-child tuples relations (no parent tuples
+ require tuple adjustement) and page graph respects balanced-tree
+ invariants (internal pages reference only leaf page or only internal
+ pages).
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<tip>
<para>
--
2.39.5 (Apple Git-154)
v2025-07-11-0002-Add-GiST-support-to-pg_amcheck.patchapplication/octet-stream; name=v2025-07-11-0002-Add-GiST-support-to-pg_amcheck.patch; x-unix-mode=0644Download
From 9010619e032511dc9d1cecc5e8dcbf6fa6d72987 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Mon, 30 Jun 2025 14:20:13 +0300
Subject: [PATCH v2025-07-11 2/3] Add GiST support to pg_amcheck
Proof of concept patch for pg_amcheck binary support
for GIST index checks.
Author: Andrey Borodin <amborodin@acm.org>
Reviewed-by: Alexander Lakhin <exclusion@gmail.com>
Reviewed-By: Tomas Vondra <tomas.vondra@enterprisedb.com>
---
src/bin/pg_amcheck/pg_amcheck.c | 290 ++++++++++++++++-----------
src/bin/pg_amcheck/t/002_nonesuch.pl | 8 +-
src/bin/pg_amcheck/t/003_check.pl | 65 ++++--
3 files changed, 220 insertions(+), 143 deletions(-)
diff --git a/src/bin/pg_amcheck/pg_amcheck.c b/src/bin/pg_amcheck/pg_amcheck.c
index 2b1fd566c35..2575178cd1a 100644
--- a/src/bin/pg_amcheck/pg_amcheck.c
+++ b/src/bin/pg_amcheck/pg_amcheck.c
@@ -40,8 +40,7 @@ typedef struct PatternInfo
* NULL */
bool heap_only; /* true if rel_regex should only match heap
* tables */
- bool btree_only; /* true if rel_regex should only match btree
- * indexes */
+ bool index_only; /* true if rel_regex should only match indexes */
bool matched; /* true if the pattern matched in any database */
} PatternInfo;
@@ -75,10 +74,9 @@ typedef struct AmcheckOptions
/*
* As an optimization, if any pattern in the exclude list applies to heap
- * tables, or similarly if any such pattern applies to btree indexes, or
- * to schemas, then these will be true, otherwise false. These should
- * always agree with what you'd conclude by grep'ing through the exclude
- * list.
+ * tables, or similarly if any such pattern applies to indexes, or to
+ * schemas, then these will be true, otherwise false. These should always
+ * agree with what you'd conclude by grep'ing through the exclude list.
*/
bool excludetbl;
bool excludeidx;
@@ -99,14 +97,14 @@ typedef struct AmcheckOptions
int64 endblock;
const char *skip;
- /* btree index checking options */
+ /* index checking options */
bool parent_check;
bool rootdescend;
bool heapallindexed;
bool checkunique;
- /* heap and btree hybrid option */
- bool no_btree_expansion;
+ /* heap and indexes hybrid option */
+ bool no_index_expansion;
} AmcheckOptions;
static AmcheckOptions opts = {
@@ -135,7 +133,7 @@ static AmcheckOptions opts = {
.rootdescend = false,
.heapallindexed = false,
.checkunique = false,
- .no_btree_expansion = false
+ .no_index_expansion = false
};
static const char *progname = NULL;
@@ -152,13 +150,15 @@ typedef struct DatabaseInfo
char *datname;
char *amcheck_schema; /* escaped, quoted literal */
bool is_checkunique;
+ bool gist_supported;
} DatabaseInfo;
typedef struct RelationInfo
{
const DatabaseInfo *datinfo; /* shared by other relinfos */
Oid reloid;
- bool is_heap; /* true if heap, false if btree */
+ Oid amoid;
+ bool is_heap; /* true if heap, false if index */
char *nspname;
char *relname;
int relpages;
@@ -179,10 +179,12 @@ static void prepare_heap_command(PQExpBuffer sql, RelationInfo *rel,
PGconn *conn);
static void prepare_btree_command(PQExpBuffer sql, RelationInfo *rel,
PGconn *conn);
+static void prepare_gist_command(PQExpBuffer sql, RelationInfo *rel,
+ PGconn *conn);
static void run_command(ParallelSlot *slot, const char *sql);
static bool verify_heap_slot_handler(PGresult *res, PGconn *conn,
void *context);
-static bool verify_btree_slot_handler(PGresult *res, PGconn *conn, void *context);
+static bool verify_index_slot_handler(PGresult *res, PGconn *conn, void *context);
static void help(const char *progname);
static void progress_report(uint64 relations_total, uint64 relations_checked,
uint64 relpages_total, uint64 relpages_checked,
@@ -196,7 +198,7 @@ static void append_relation_pattern(PatternInfoArray *pia, const char *pattern,
int encoding);
static void append_heap_pattern(PatternInfoArray *pia, const char *pattern,
int encoding);
-static void append_btree_pattern(PatternInfoArray *pia, const char *pattern,
+static void append_index_pattern(PatternInfoArray *pia, const char *pattern,
int encoding);
static void compile_database_list(PGconn *conn, SimplePtrList *databases,
const char *initial_dbname);
@@ -288,6 +290,7 @@ main(int argc, char *argv[])
enum trivalue prompt_password = TRI_DEFAULT;
int encoding = pg_get_encoding_from_locale(NULL, false);
ConnParams cparams;
+ bool gist_warn_printed = false;
pg_logging_init(argv[0]);
progname = get_progname(argv[0]);
@@ -323,11 +326,11 @@ main(int argc, char *argv[])
break;
case 'i':
opts.allrel = false;
- append_btree_pattern(&opts.include, optarg, encoding);
+ append_index_pattern(&opts.include, optarg, encoding);
break;
case 'I':
opts.excludeidx = true;
- append_btree_pattern(&opts.exclude, optarg, encoding);
+ append_index_pattern(&opts.exclude, optarg, encoding);
break;
case 'j':
if (!option_parse_int(optarg, "-j/--jobs", 1, INT_MAX,
@@ -382,7 +385,7 @@ main(int argc, char *argv[])
maintenance_db = pg_strdup(optarg);
break;
case 2:
- opts.no_btree_expansion = true;
+ opts.no_index_expansion = true;
break;
case 3:
opts.no_toast_expansion = true;
@@ -531,6 +534,10 @@ main(int argc, char *argv[])
int ntups;
const char *amcheck_schema = NULL;
DatabaseInfo *dat = (DatabaseInfo *) cell->ptr;
+ int vmaj = 0,
+ vmin = 0,
+ vrev = 0;
+ const char *amcheck_version;
cparams.override_dbname = dat->datname;
if (conn == NULL || strcmp(PQdb(conn), dat->datname) != 0)
@@ -600,36 +607,32 @@ main(int argc, char *argv[])
strlen(amcheck_schema));
/*
- * Check the version of amcheck extension. Skip requested unique
- * constraint check with warning if it is not yet supported by
- * amcheck.
+ * Check the version of amcheck extension.
*/
- if (opts.checkunique == true)
- {
- /*
- * Now amcheck has only major and minor versions in the string but
- * we also support revision just in case. Now it is expected to be
- * zero.
- */
- int vmaj = 0,
- vmin = 0,
- vrev = 0;
- const char *amcheck_version = PQgetvalue(result, 0, 1);
+ amcheck_version = PQgetvalue(result, 0, 1);
- sscanf(amcheck_version, "%d.%d.%d", &vmaj, &vmin, &vrev);
+ /*
+ * Now amcheck has only major and minor versions in the string but we
+ * also support revision just in case. Now it is expected to be zero.
+ */
+ sscanf(amcheck_version, "%d.%d.%d", &vmaj, &vmin, &vrev);
- /*
- * checkunique option is supported in amcheck since version 1.4
- */
- if ((vmaj == 1 && vmin < 4) || vmaj == 0)
- {
- pg_log_warning("option %s is not supported by amcheck version %s",
- "--checkunique", amcheck_version);
- dat->is_checkunique = false;
- }
- else
- dat->is_checkunique = true;
+ /*
+ * checkunique option is supported in amcheck since version 1.4. Skip
+ * requested unique constraint check with warning if it is not yet
+ * supported by amcheck.
+ */
+ if (opts.checkunique && ((vmaj == 1 && vmin < 4) || vmaj == 0))
+ {
+ pg_log_warning("option %s is not supported by amcheck version %s",
+ "--checkunique", amcheck_version);
+ dat->is_checkunique = false;
}
+ else
+ dat->is_checkunique = opts.checkunique;
+
+ /* GiST indexes are supported in 1.6+ */
+ dat->gist_supported = ((vmaj == 1 && vmin >= 6) || vmaj > 1);
PQclear(result);
@@ -651,8 +654,8 @@ main(int argc, char *argv[])
if (pat->heap_only)
log_no_match("no heap tables to check matching \"%s\"",
pat->pattern);
- else if (pat->btree_only)
- log_no_match("no btree indexes to check matching \"%s\"",
+ else if (pat->index_only)
+ log_no_match("no indexes to check matching \"%s\"",
pat->pattern);
else if (pat->rel_regex == NULL)
log_no_match("no relations to check in schemas matching \"%s\"",
@@ -785,13 +788,29 @@ main(int argc, char *argv[])
if (opts.show_progress && progress_since_last_stderr)
fprintf(stderr, "\n");
- pg_log_info("checking btree index \"%s.%s.%s\"",
+ pg_log_info("checking index \"%s.%s.%s\"",
rel->datinfo->datname, rel->nspname, rel->relname);
progress_since_last_stderr = false;
}
- prepare_btree_command(&sql, rel, free_slot->connection);
+ if (rel->amoid == BTREE_AM_OID)
+ prepare_btree_command(&sql, rel, free_slot->connection);
+ else if (rel->amoid == GIST_AM_OID)
+ {
+ if (rel->datinfo->gist_supported)
+ prepare_gist_command(&sql, rel, free_slot->connection);
+ else
+ {
+ if (!gist_warn_printed)
+ pg_log_warning("GiST verification is not supported by installed amcheck version");
+ gist_warn_printed = true;
+ }
+ }
+ else
+ /* should not happen at this stage */
+ pg_log_info("Verification of index type %u not supported",
+ rel->amoid);
rel->sql = pstrdup(sql.data); /* pg_free'd after command */
- ParallelSlotSetHandler(free_slot, verify_btree_slot_handler, rel);
+ ParallelSlotSetHandler(free_slot, verify_index_slot_handler, rel);
run_command(free_slot, rel->sql);
}
}
@@ -869,7 +888,7 @@ prepare_heap_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
* Creates a SQL command for running amcheck checking on the given btree index
* relation. The command does not select any columns, as btree checking
* functions do not return any, but rather return corruption information by
- * raising errors, which verify_btree_slot_handler expects.
+ * raising errors, which verify_index_slot_handler expects.
*
* The constructed SQL command will silently skip temporary indexes, and
* indexes being reindexed concurrently, as checking them would needlessly draw
@@ -915,6 +934,28 @@ prepare_btree_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
rel->reloid);
}
+/*
+ * prepare_gist_command
+ * Similar to btree equivalent prepares command to check GiST index.
+ */
+static void
+prepare_gist_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
+{
+ resetPQExpBuffer(sql);
+
+ appendPQExpBuffer(sql,
+ "SELECT %s.gist_index_check("
+ "index := c.oid, heapallindexed := %s)"
+ "\nFROM pg_catalog.pg_class c, pg_catalog.pg_index i "
+ "WHERE c.oid = %u "
+ "AND c.oid = i.indexrelid "
+ "AND c.relpersistence != 't' "
+ "AND i.indisready AND i.indisvalid AND i.indislive",
+ rel->datinfo->amcheck_schema,
+ (opts.heapallindexed ? "true" : "false"),
+ rel->reloid);
+}
+
/*
* run_command
*
@@ -954,7 +995,7 @@ run_command(ParallelSlot *slot, const char *sql)
* Note: Heap relation corruption is reported by verify_heapam() via the result
* set, rather than an ERROR, but running verify_heapam() on a corrupted heap
* table may still result in an error being returned from the server due to
- * missing relation files, bad checksums, etc. The btree corruption checking
+ * missing relation files, bad checksums, etc. The corruption checking
* functions always use errors to communicate corruption messages. We can't
* just abort processing because we got a mere ERROR.
*
@@ -1104,11 +1145,11 @@ verify_heap_slot_handler(PGresult *res, PGconn *conn, void *context)
}
/*
- * verify_btree_slot_handler
+ * verify_index_slot_handler
*
- * ParallelSlotHandler that receives results from a btree checking command
- * created by prepare_btree_command and outputs them for the user. The results
- * from the btree checking command is assumed to be empty, but when the results
+ * ParallelSlotHandler that receives results from a checking command created by
+ * prepare_[btree,gist]_command and outputs them for the user. The results
+ * from the checking command is assumed to be empty, but when the results
* are an error code, the useful information about the corruption is expected
* in the connection's error message.
*
@@ -1117,7 +1158,7 @@ verify_heap_slot_handler(PGresult *res, PGconn *conn, void *context)
* context: unused
*/
static bool
-verify_btree_slot_handler(PGresult *res, PGconn *conn, void *context)
+verify_index_slot_handler(PGresult *res, PGconn *conn, void *context)
{
RelationInfo *rel = (RelationInfo *) context;
@@ -1128,12 +1169,12 @@ verify_btree_slot_handler(PGresult *res, PGconn *conn, void *context)
if (ntups > 1)
{
/*
- * We expect the btree checking functions to return one void row
- * each, or zero rows if the check was skipped due to the object
- * being in the wrong state to be checked, so we should output
- * some sort of warning if we get anything more, not because it
- * indicates corruption, but because it suggests a mismatch
- * between amcheck and pg_amcheck versions.
+ * We expect the checking functions to return one void row each,
+ * or zero rows if the check was skipped due to the object being
+ * in the wrong state to be checked, so we should output some sort
+ * of warning if we get anything more, not because it indicates
+ * corruption, but because it suggests a mismatch between amcheck
+ * and pg_amcheck versions.
*
* In conjunction with --progress, anything written to stderr at
* this time would present strangely to the user without an extra
@@ -1143,7 +1184,7 @@ verify_btree_slot_handler(PGresult *res, PGconn *conn, void *context)
*/
if (opts.show_progress && progress_since_last_stderr)
fprintf(stderr, "\n");
- pg_log_warning("btree index \"%s.%s.%s\": btree checking function returned unexpected number of rows: %d",
+ pg_log_warning("index \"%s.%s.%s\": checking function returned unexpected number of rows: %d",
rel->datinfo->datname, rel->nspname, rel->relname, ntups);
if (opts.verbose)
pg_log_warning_detail("Query was: %s", rel->sql);
@@ -1157,7 +1198,7 @@ verify_btree_slot_handler(PGresult *res, PGconn *conn, void *context)
char *msg = indent_lines(PQerrorMessage(conn));
all_checks_pass = false;
- printf(_("btree index \"%s.%s.%s\":\n"),
+ printf(_("index \"%s.%s.%s\":\n"),
rel->datinfo->datname, rel->nspname, rel->relname);
printf("%s", msg);
if (opts.verbose)
@@ -1211,6 +1252,8 @@ help(const char *progname)
printf(_(" --heapallindexed check that all heap tuples are found within indexes\n"));
printf(_(" --parent-check check index parent/child relationships\n"));
printf(_(" --rootdescend search from root page to refind tuples\n"));
+ printf(_("\nGiST index checking options:\n"));
+ printf(_(" --heapallindexed check that all heap tuples are found within indexes\n"));
printf(_("\nConnection options:\n"));
printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
printf(_(" -p, --port=PORT database server port\n"));
@@ -1424,11 +1467,11 @@ append_schema_pattern(PatternInfoArray *pia, const char *pattern, int encoding)
* pattern: the relation name pattern
* encoding: client encoding for parsing the pattern
* heap_only: whether the pattern should only be matched against heap tables
- * btree_only: whether the pattern should only be matched against btree indexes
+ * index_only: whether the pattern should only be matched against indexes
*/
static void
append_relation_pattern_helper(PatternInfoArray *pia, const char *pattern,
- int encoding, bool heap_only, bool btree_only)
+ int encoding, bool heap_only, bool index_only)
{
PQExpBufferData dbbuf;
PQExpBufferData nspbuf;
@@ -1463,14 +1506,14 @@ append_relation_pattern_helper(PatternInfoArray *pia, const char *pattern,
termPQExpBuffer(&relbuf);
info->heap_only = heap_only;
- info->btree_only = btree_only;
+ info->index_only = index_only;
}
/*
* append_relation_pattern
*
* Adds the given pattern interpreted as a relation pattern, to be matched
- * against both heap tables and btree indexes.
+ * against both heap tables and indexes.
*
* pia: the pattern info array to be appended
* pattern: the relation name pattern
@@ -1499,17 +1542,17 @@ append_heap_pattern(PatternInfoArray *pia, const char *pattern, int encoding)
}
/*
- * append_btree_pattern
+ * append_index_pattern
*
* Adds the given pattern interpreted as a relation pattern, to be matched only
- * against btree indexes.
+ * against indexes.
*
* pia: the pattern info array to be appended
* pattern: the relation name pattern
* encoding: client encoding for parsing the pattern
*/
static void
-append_btree_pattern(PatternInfoArray *pia, const char *pattern, int encoding)
+append_index_pattern(PatternInfoArray *pia, const char *pattern, int encoding)
{
append_relation_pattern_helper(pia, pattern, encoding, false, true);
}
@@ -1767,7 +1810,7 @@ compile_database_list(PGconn *conn, SimplePtrList *databases,
* rel_regex: the relname regexp parsed from the pattern, or NULL if the
* pattern had no relname part
* heap_only: true if the pattern applies only to heap tables (not indexes)
- * btree_only: true if the pattern applies only to btree indexes (not tables)
+ * index_only: true if the pattern applies only to indexes (not tables)
*
* buf: the buffer to be appended
* patterns: the array of patterns to be inserted into the CTE
@@ -1809,7 +1852,7 @@ append_rel_pattern_raw_cte(PQExpBuffer buf, const PatternInfoArray *pia,
appendPQExpBufferStr(buf, "::TEXT, true::BOOLEAN");
else
appendPQExpBufferStr(buf, "::TEXT, false::BOOLEAN");
- if (info->btree_only)
+ if (info->index_only)
appendPQExpBufferStr(buf, ", true::BOOLEAN");
else
appendPQExpBufferStr(buf, ", false::BOOLEAN");
@@ -1847,8 +1890,8 @@ append_rel_pattern_filtered_cte(PQExpBuffer buf, const char *raw,
const char *filtered, PGconn *conn)
{
appendPQExpBuffer(buf,
- "\n%s (pattern_id, nsp_regex, rel_regex, heap_only, btree_only) AS ("
- "\nSELECT pattern_id, nsp_regex, rel_regex, heap_only, btree_only "
+ "\n%s (pattern_id, nsp_regex, rel_regex, heap_only, index_only) AS ("
+ "\nSELECT pattern_id, nsp_regex, rel_regex, heap_only, index_only "
"FROM %s r"
"\nWHERE (r.db_regex IS NULL "
"OR ",
@@ -1871,7 +1914,7 @@ append_rel_pattern_filtered_cte(PQExpBuffer buf, const char *raw,
* The cells of the constructed list contain all information about the relation
* necessary to connect to the database and check the object, including which
* database to connect to, where contrib/amcheck is installed, and the Oid and
- * type of object (heap table vs. btree index). Rather than duplicating the
+ * type of object (heap table vs. index). Rather than duplicating the
* database details per relation, the relation structs use references to the
* same database object, provided by the caller.
*
@@ -1898,7 +1941,7 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
if (!opts.allrel)
{
appendPQExpBufferStr(&sql,
- " include_raw (pattern_id, db_regex, nsp_regex, rel_regex, heap_only, btree_only) AS (");
+ " include_raw (pattern_id, db_regex, nsp_regex, rel_regex, heap_only, index_only) AS (");
append_rel_pattern_raw_cte(&sql, &opts.include, conn);
appendPQExpBufferStr(&sql, "\n),");
append_rel_pattern_filtered_cte(&sql, "include_raw", "include_pat", conn);
@@ -1908,7 +1951,7 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
if (opts.excludetbl || opts.excludeidx || opts.excludensp)
{
appendPQExpBufferStr(&sql,
- " exclude_raw (pattern_id, db_regex, nsp_regex, rel_regex, heap_only, btree_only) AS (");
+ " exclude_raw (pattern_id, db_regex, nsp_regex, rel_regex, heap_only, index_only) AS (");
append_rel_pattern_raw_cte(&sql, &opts.exclude, conn);
appendPQExpBufferStr(&sql, "\n),");
append_rel_pattern_filtered_cte(&sql, "exclude_raw", "exclude_pat", conn);
@@ -1916,36 +1959,36 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
/* Append the relation CTE. */
appendPQExpBufferStr(&sql,
- " relation (pattern_id, oid, nspname, relname, reltoastrelid, relpages, is_heap, is_btree) AS ("
+ " relation (pattern_id, oid, amoid, nspname, relname, reltoastrelid, relpages, is_heap, is_index) AS ("
"\nSELECT DISTINCT ON (c.oid");
if (!opts.allrel)
appendPQExpBufferStr(&sql, ", ip.pattern_id) ip.pattern_id,");
else
appendPQExpBufferStr(&sql, ") NULL::INTEGER AS pattern_id,");
appendPQExpBuffer(&sql,
- "\nc.oid, n.nspname, c.relname, c.reltoastrelid, c.relpages, "
- "c.relam = %u AS is_heap, "
- "c.relam = %u AS is_btree"
+ "\nc.oid, c.relam as amoid, n.nspname, c.relname, "
+ "c.reltoastrelid, c.relpages, c.relam = %u AS is_heap, "
+ "(c.relam = %u OR c.relam = %u) AS is_index"
"\nFROM pg_catalog.pg_class c "
"INNER JOIN pg_catalog.pg_namespace n "
"ON c.relnamespace = n.oid",
- HEAP_TABLE_AM_OID, BTREE_AM_OID);
+ HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID);
if (!opts.allrel)
appendPQExpBuffer(&sql,
"\nINNER JOIN include_pat ip"
"\nON (n.nspname ~ ip.nsp_regex OR ip.nsp_regex IS NULL)"
"\nAND (c.relname ~ ip.rel_regex OR ip.rel_regex IS NULL)"
"\nAND (c.relam = %u OR NOT ip.heap_only)"
- "\nAND (c.relam = %u OR NOT ip.btree_only)",
- HEAP_TABLE_AM_OID, BTREE_AM_OID);
+ "\nAND ((c.relam = %u OR c.relam = %u) OR NOT ip.index_only)",
+ HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID);
if (opts.excludetbl || opts.excludeidx || opts.excludensp)
appendPQExpBuffer(&sql,
"\nLEFT OUTER JOIN exclude_pat ep"
"\nON (n.nspname ~ ep.nsp_regex OR ep.nsp_regex IS NULL)"
"\nAND (c.relname ~ ep.rel_regex OR ep.rel_regex IS NULL)"
"\nAND (c.relam = %u OR NOT ep.heap_only OR ep.rel_regex IS NULL)"
- "\nAND (c.relam = %u OR NOT ep.btree_only OR ep.rel_regex IS NULL)",
- HEAP_TABLE_AM_OID, BTREE_AM_OID);
+ "\nAND ((c.relam = %u OR c.relam = %u) OR NOT ep.index_only OR ep.rel_regex IS NULL)",
+ HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID);
/*
* Exclude temporary tables and indexes, which must necessarily belong to
@@ -1984,7 +2027,7 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
HEAP_TABLE_AM_OID, PG_TOAST_NAMESPACE);
else
appendPQExpBuffer(&sql,
- " AND c.relam IN (%u, %u)"
+ " AND c.relam IN (%u, %u, %u)"
"AND c.relkind IN ("
CppAsString2(RELKIND_RELATION) ", "
CppAsString2(RELKIND_SEQUENCE) ", "
@@ -1996,10 +2039,10 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
CppAsString2(RELKIND_SEQUENCE) ", "
CppAsString2(RELKIND_MATVIEW) ", "
CppAsString2(RELKIND_TOASTVALUE) ")) OR "
- "(c.relam = %u AND c.relkind = "
+ "((c.relam = %u OR c.relam = %u) AND c.relkind = "
CppAsString2(RELKIND_INDEX) "))",
- HEAP_TABLE_AM_OID, BTREE_AM_OID,
- HEAP_TABLE_AM_OID, BTREE_AM_OID);
+ HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID,
+ HEAP_TABLE_AM_OID, BTREE_AM_OID, GIST_AM_OID);
appendPQExpBufferStr(&sql,
"\nORDER BY c.oid)");
@@ -2028,7 +2071,7 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
appendPQExpBufferStr(&sql,
"\n)");
}
- if (!opts.no_btree_expansion)
+ if (!opts.no_index_expansion)
{
/*
* Include a CTE for btree indexes associated with primary heap tables
@@ -2036,9 +2079,9 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
* btree index names.
*/
appendPQExpBufferStr(&sql,
- ", index (oid, nspname, relname, relpages) AS ("
- "\nSELECT c.oid, r.nspname, c.relname, c.relpages "
- "FROM relation r"
+ ", index (oid, amoid, nspname, relname, relpages) AS ("
+ "\nSELECT c.oid, c.relam as amoid, r.nspname, "
+ "c.relname, c.relpages FROM relation r"
"\nINNER JOIN pg_catalog.pg_index i "
"ON r.oid = i.indrelid "
"INNER JOIN pg_catalog.pg_class c "
@@ -2051,15 +2094,15 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
"\nLEFT OUTER JOIN exclude_pat ep "
"ON (n.nspname ~ ep.nsp_regex OR ep.nsp_regex IS NULL) "
"AND (c.relname ~ ep.rel_regex OR ep.rel_regex IS NULL) "
- "AND ep.btree_only"
+ "AND ep.index_only"
"\nWHERE ep.pattern_id IS NULL");
else
appendPQExpBufferStr(&sql,
"\nWHERE true");
appendPQExpBuffer(&sql,
- " AND c.relam = %u "
+ " AND (c.relam = %u or c.relam = %u) "
"AND c.relkind = " CppAsString2(RELKIND_INDEX),
- BTREE_AM_OID);
+ BTREE_AM_OID, GIST_AM_OID);
if (opts.no_toast_expansion)
appendPQExpBuffer(&sql,
" AND c.relnamespace != %u",
@@ -2067,7 +2110,7 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
appendPQExpBufferStr(&sql, "\n)");
}
- if (!opts.no_toast_expansion && !opts.no_btree_expansion)
+ if (!opts.no_toast_expansion && !opts.no_index_expansion)
{
/*
* Include a CTE for btree indexes associated with toast tables of
@@ -2088,7 +2131,7 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
"\nLEFT OUTER JOIN exclude_pat ep "
"ON ('pg_toast' ~ ep.nsp_regex OR ep.nsp_regex IS NULL) "
"AND (c.relname ~ ep.rel_regex OR ep.rel_regex IS NULL) "
- "AND ep.btree_only "
+ "AND ep.index_only "
"WHERE ep.pattern_id IS NULL");
else
appendPQExpBufferStr(&sql,
@@ -2108,12 +2151,13 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
* list.
*/
appendPQExpBufferStr(&sql,
- "\nSELECT pattern_id, is_heap, is_btree, oid, nspname, relname, relpages "
+ "\nSELECT pattern_id, is_heap, is_index, oid, amoid, nspname, relname, relpages "
"FROM (");
appendPQExpBufferStr(&sql,
/* Inclusion patterns that failed to match */
- "\nSELECT pattern_id, is_heap, is_btree, "
+ "\nSELECT pattern_id, is_heap, is_index, "
"NULL::OID AS oid, "
+ "NULL::OID AS amoid, "
"NULL::TEXT AS nspname, "
"NULL::TEXT AS relname, "
"NULL::INTEGER AS relpages"
@@ -2122,29 +2166,29 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
"UNION"
/* Primary relations */
"\nSELECT NULL::INTEGER AS pattern_id, "
- "is_heap, is_btree, oid, nspname, relname, relpages "
+ "is_heap, is_index, oid, amoid, nspname, relname, relpages "
"FROM relation");
if (!opts.no_toast_expansion)
- appendPQExpBufferStr(&sql,
- " UNION"
+ appendPQExpBuffer(&sql,
+ " UNION"
/* Toast tables for primary relations */
- "\nSELECT NULL::INTEGER AS pattern_id, TRUE AS is_heap, "
- "FALSE AS is_btree, oid, nspname, relname, relpages "
- "FROM toast");
- if (!opts.no_btree_expansion)
+ "\nSELECT NULL::INTEGER AS pattern_id, TRUE AS is_heap, "
+ "FALSE AS is_index, oid, 0 as amoid, nspname, relname, relpages "
+ "FROM toast");
+ if (!opts.no_index_expansion)
appendPQExpBufferStr(&sql,
" UNION"
/* Indexes for primary relations */
"\nSELECT NULL::INTEGER AS pattern_id, FALSE AS is_heap, "
- "TRUE AS is_btree, oid, nspname, relname, relpages "
+ "TRUE AS is_index, oid, amoid, nspname, relname, relpages "
"FROM index");
- if (!opts.no_toast_expansion && !opts.no_btree_expansion)
- appendPQExpBufferStr(&sql,
- " UNION"
+ if (!opts.no_toast_expansion && !opts.no_index_expansion)
+ appendPQExpBuffer(&sql,
+ " UNION"
/* Indexes for toast relations */
- "\nSELECT NULL::INTEGER AS pattern_id, FALSE AS is_heap, "
- "TRUE AS is_btree, oid, nspname, relname, relpages "
- "FROM toast_index");
+ "\nSELECT NULL::INTEGER AS pattern_id, FALSE AS is_heap, "
+ "TRUE AS is_index, oid, %u as amoid, nspname, relname, relpages "
+ "FROM toast_index", BTREE_AM_OID);
appendPQExpBufferStr(&sql,
"\n) AS combined_records "
"ORDER BY relpages DESC NULLS FIRST, oid");
@@ -2164,8 +2208,9 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
{
int pattern_id = -1;
bool is_heap = false;
- bool is_btree PG_USED_FOR_ASSERTS_ONLY = false;
+ bool is_index PG_USED_FOR_ASSERTS_ONLY = false;
Oid oid = InvalidOid;
+ Oid amoid = InvalidOid;
const char *nspname = NULL;
const char *relname = NULL;
int relpages = 0;
@@ -2175,15 +2220,17 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
if (!PQgetisnull(res, i, 1))
is_heap = (PQgetvalue(res, i, 1)[0] == 't');
if (!PQgetisnull(res, i, 2))
- is_btree = (PQgetvalue(res, i, 2)[0] == 't');
+ is_index = (PQgetvalue(res, i, 2)[0] == 't');
if (!PQgetisnull(res, i, 3))
oid = atooid(PQgetvalue(res, i, 3));
if (!PQgetisnull(res, i, 4))
- nspname = PQgetvalue(res, i, 4);
+ amoid = atooid(PQgetvalue(res, i, 4));
if (!PQgetisnull(res, i, 5))
- relname = PQgetvalue(res, i, 5);
+ nspname = PQgetvalue(res, i, 5);
if (!PQgetisnull(res, i, 6))
- relpages = atoi(PQgetvalue(res, i, 6));
+ relname = PQgetvalue(res, i, 6);
+ if (!PQgetisnull(res, i, 7))
+ relpages = atoi(PQgetvalue(res, i, 7));
if (pattern_id >= 0)
{
@@ -2205,10 +2252,11 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
RelationInfo *rel = (RelationInfo *) pg_malloc0(sizeof(RelationInfo));
Assert(OidIsValid(oid));
- Assert((is_heap && !is_btree) || (is_btree && !is_heap));
+ Assert((is_heap && !is_index) || (is_index && !is_heap));
rel->datinfo = dat;
rel->reloid = oid;
+ rel->amoid = amoid;
rel->is_heap = is_heap;
rel->nspname = pstrdup(nspname);
rel->relname = pstrdup(relname);
@@ -2218,7 +2266,7 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
{
/*
* We apply --startblock and --endblock to heap tables, but
- * not btree indexes, and for progress purposes we need to
+ * not supported indexes, and for progress purposes we need to
* track how many blocks we expect to check.
*/
if (opts.endblock >= 0 && rel->blocks_to_check > opts.endblock)
diff --git a/src/bin/pg_amcheck/t/002_nonesuch.pl b/src/bin/pg_amcheck/t/002_nonesuch.pl
index f23368abeab..e11cc4e6158 100644
--- a/src/bin/pg_amcheck/t/002_nonesuch.pl
+++ b/src/bin/pg_amcheck/t/002_nonesuch.pl
@@ -285,8 +285,8 @@ $node->command_checks_all(
[
qr/pg_amcheck: warning: no heap tables to check matching "no_such_table"/,
qr/pg_amcheck: warning: no heap tables to check matching "no\*such\*table"/,
- qr/pg_amcheck: warning: no btree indexes to check matching "no_such_index"/,
- qr/pg_amcheck: warning: no btree indexes to check matching "no\*such\*index"/,
+ qr/pg_amcheck: warning: no indexes to check matching "no_such_index"/,
+ qr/pg_amcheck: warning: no indexes to check matching "no\*such\*index"/,
qr/pg_amcheck: warning: no relations to check matching "no_such_relation"/,
qr/pg_amcheck: warning: no relations to check matching "no\*such\*relation"/,
qr/pg_amcheck: warning: no heap tables to check matching "no\*such\*table"/,
@@ -366,8 +366,8 @@ $node->command_checks_all(
qr/pg_amcheck: warning: no heap tables to check matching "template1\.public\.foo"/,
qr/pg_amcheck: warning: no heap tables to check matching "another_db\.public\.foo"/,
qr/pg_amcheck: warning: no connectable databases to check matching "no_such_database\.public\.foo"/,
- qr/pg_amcheck: warning: no btree indexes to check matching "template1\.public\.foo_idx"/,
- qr/pg_amcheck: warning: no btree indexes to check matching "another_db\.public\.foo_idx"/,
+ qr/pg_amcheck: warning: no indexes to check matching "template1\.public\.foo_idx"/,
+ qr/pg_amcheck: warning: no indexes to check matching "another_db\.public\.foo_idx"/,
qr/pg_amcheck: warning: no connectable databases to check matching "no_such_database\.public\.foo_idx"/,
qr/pg_amcheck: error: no relations to check/,
],
diff --git a/src/bin/pg_amcheck/t/003_check.pl b/src/bin/pg_amcheck/t/003_check.pl
index 881854da254..2d6efbf8b05 100644
--- a/src/bin/pg_amcheck/t/003_check.pl
+++ b/src/bin/pg_amcheck/t/003_check.pl
@@ -185,7 +185,7 @@ for my $dbname (qw(db1 db2 db3))
# schemas. The schemas are all identical to start, but
# we will corrupt them differently later.
#
- for my $schema (qw(s1 s2 s3 s4 s5))
+ for my $schema (qw(s1 s2 s3 s4 s5 s6))
{
$node->safe_psql(
$dbname, qq(
@@ -291,22 +291,24 @@ plan_to_corrupt_first_page('db1', 's3.t2_btree');
# Corrupt toast table, partitions, and materialized views in schema "s4"
plan_to_remove_toast_file('db1', 's4.t2');
-# Corrupt all other object types in schema "s5". We don't have amcheck support
+# Corrupt GiST index in schema "s5"
+plan_to_remove_relation_file('db1', 's5.t1_gist');
+plan_to_corrupt_first_page('db1', 's5.t2_gist');
+
+# Corrupt all other object types in schema "s6". We don't have amcheck support
# for these types, but we check that their corruption does not trigger any
# errors in pg_amcheck
-plan_to_remove_relation_file('db1', 's5.seq1');
-plan_to_remove_relation_file('db1', 's5.t1_hash');
-plan_to_remove_relation_file('db1', 's5.t1_gist');
-plan_to_remove_relation_file('db1', 's5.t1_gin');
-plan_to_remove_relation_file('db1', 's5.t1_brin');
-plan_to_remove_relation_file('db1', 's5.t1_spgist');
+plan_to_remove_relation_file('db1', 's6.seq1');
+plan_to_remove_relation_file('db1', 's6.t1_hash');
+plan_to_remove_relation_file('db1', 's6.t1_gin');
+plan_to_remove_relation_file('db1', 's6.t1_brin');
+plan_to_remove_relation_file('db1', 's6.t1_spgist');
-plan_to_corrupt_first_page('db1', 's5.seq2');
-plan_to_corrupt_first_page('db1', 's5.t2_hash');
-plan_to_corrupt_first_page('db1', 's5.t2_gist');
-plan_to_corrupt_first_page('db1', 's5.t2_gin');
-plan_to_corrupt_first_page('db1', 's5.t2_brin');
-plan_to_corrupt_first_page('db1', 's5.t2_spgist');
+plan_to_corrupt_first_page('db1', 's6.seq2');
+plan_to_corrupt_first_page('db1', 's6.t2_hash');
+plan_to_corrupt_first_page('db1', 's6.t2_gin');
+plan_to_corrupt_first_page('db1', 's6.t2_brin');
+plan_to_corrupt_first_page('db1', 's6.t2_spgist');
# Database 'db2' corruptions
@@ -461,10 +463,22 @@ $node->command_checks_all(
[$no_output_re],
'pg_amcheck in schema s4 excluding toast reports no corruption');
-# Check that no corruption is reported in schema db1.s5
-$node->command_checks_all([ @cmd, '--schema' => 's5', 'db1' ],
+# In schema db1.s5 we should see GiST corruption messages on stdout, and
+# nothing on stderr.
+#
+$node->command_checks_all(
+ [ @cmd, '-s', 's5', 'db1' ],
+ 2,
+ [
+ $missing_file_re, $line_pointer_corruption_re,
+ ],
+ [$no_output_re],
+ 'pg_amcheck schema s5 reports GiST index errors');
+
+# Check that no corruption is reported in schema db1.s6
+$node->command_checks_all([ @cmd, '-s', 's6', 'db1' ],
0, [$no_output_re], [$no_output_re],
- 'pg_amcheck over schema s5 reports no corruption');
+ 'pg_amcheck over schema s6 reports no corruption');
# In schema db1.s1, only indexes are corrupt. Verify that when we exclude
# the indexes, no corruption is reported about the schema.
@@ -619,7 +633,7 @@ $node->command_checks_all(
'pg_amcheck excluding all corrupt schemas with --checkunique option');
#
-# Smoke test for checkunique option for not supported versions.
+# Smoke test for checkunique option and GiST indexes for not supported versions.
#
$node->safe_psql(
'db3', q(
@@ -635,4 +649,19 @@ $node->command_checks_all(
qr/pg_amcheck: warning: option --checkunique is not supported by amcheck version 1.3/
],
'pg_amcheck smoke test --checkunique');
+
+$node->safe_psql(
+ 'db1', q(
+ DROP EXTENSION amcheck;
+ CREATE EXTENSION amcheck WITH SCHEMA amcheck_schema VERSION '1.3' ;
+));
+
+$node->command_checks_all(
+ [ @cmd, '-s', 's5', 'db1' ],
+ 0,
+ [$no_output_re],
+ [
+ qr/pg_amcheck: warning: GiST verification is not supported by installed amcheck version/
+ ],
+ 'pg_amcheck smoke test --checkunique');
done_testing();
--
2.39.5 (Apple Git-154)
Hi, Andrey!
Thank you for working on this! There is a long history of the patch, I
hope it will be committed soon!)
On Fri, Jul 11, 2025 at 3:39 PM Andrey Borodin <x4mmm@yandex-team.ru> wrote:
On 30 Jun 2025, at 16:34, Andrey Borodin <x4mmm@yandex-team.ru> wrote:
Please find attached two new steps for amcheck:
1. A function to verify GiST integrity. This patch is in decent shape, simply rebased from previous year.
2. Support on pg_amcheck's side for this function. This patch did not receive such review attention before. And, perhaps, should be extended to support existing GIN functions.Here's a version that adds GIN functions to pg_amcheck.
IDK, maybe we should split pg_amcheck part into another thread and add there BRIN too...
Speaking of BRIN pg_amcheck, I probably wouldn't merge it with the
gist/gin pg_amceck patchset because that would create a dependency on
the amcheck BRIN support patch, which is not clear when it will be
ready.
There are some points about the patch:
1) There are several typos in verify_gist.c:
downlinks -> downlink (header comment)
discrepencies -> discrepancies
Correctess -> Correctness
hande -> handle
Initaliaze -> Initialize
numbmer -> number
replcaed -> replaced
aquire -> aqcuire
2) Copyright year is 2023 in the patch. Time flies:)
3) There is the same check in btree and while reviewing the patch I
realised it should be added to the BRIN amcheck as well. Probably it
will be needed for GIN someday. What do you think about moving it to
verify_common?
if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
!TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
result->snapshot->xmin))
4) Should we check blknum of the new entry before pushing to the
stack? Probably we can check if it's a valid blknum and it's not
outside of the index. This way we can give a more detailed error
message in case we meet the wrong blknum.
in the split detection code:
ptr->blkno = GistPageGetOpaque(page)->rightlink;
and when we add children of an inner page:
ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
5) There is a macros for 0xffff - 'TUPLE_IS_VALID'. Maybe we can use
it to make the code more readable? Also the error message contains one
extra 'has'.
if (off != 0xffff)
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("index \"%s\" has on page %u offset %u has
item id not pointing to 0xffff, but %hu",
RelationGetRelationName(check_state->rel),
stack->blkno, i, off)));
6) Several points about 'gistFormNormalizedTuple'. I read the previous
thread [1]/messages/by-id/CAAhFRxiHCWe_6AmqGWZqYEkgN_uQG3Jgw0WgPw+0zO3_D-q4DA@mail.gmail.com and it seems there is an unfinished discussion about
normalizing during heapallindexed check. I think normalization needs
some more work here.
6a) There is a TODO
/* pfree(DatumGetPointer(old)); // TODO: this fails. Why? */
6b) AFAICS 'compress' is an optional support function. If opclass
doesn't have a 'compress' function, then 'gistCompressValues' leaves
such attributes as it is. Here we get attdata from the heap scan, and
it could be toasted. That means that these checks can result in false
positives:
gistCompressValues(giststate, r, attdata, isnull, true, compatt);
...
for (int i = 0; i < r->rd_att->natts; i++)
{
if (VARATT_IS_EXTERNAL(DatumGetPointer(compatt[i])))
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
....
if (VARATT_IS_COMPRESSED(DatumGetPointer(compatt[i])))
{
if (i < IndexRelationGetNumberOfKeyAttributes(r))
ereport(ERROR,
Also 'VARATT_IS_EXTERNAL' check will always result in a false
positive for toasted include attributes here. Reproducer for it:
DROP TABLE IF EXISTS tbl;
CREATE TABLE tbl(a point, t text);
-- disable compression for 't', but let it to be external
ALTER TABLE tbl ALTER COLUMN t SET STORAGE external ;
INSERT INTO tbl values (point(random(), random()), repeat('a',3000 ));
CREATE INDEX tbl_idx ON tbl using gist (a) include (t);
SELECT gist_index_check('tbl_idx', true);
So I think we need to remove these checks completely.
6c) Current code doesn't apply normalization for existing index tuples
during adding to bloom filter, which can result in false positive,
reproducer:
Here we use plain storage during index build, then during check we
have extended storage, which results in different binary
representation of the same data and we have false positive here.
DROP TABLE IF EXISTS tbl;
CREATE TABLE tbl(a tsvector);
CREATE INDEX tbl_idx ON tbl using gist (a) ;
ALTER TABLE tbl ALTER COLUMN a SET STORAGE plain;
INSERT INTO tbl values ('a' ::tsvector);
ALTER TABLE tbl ALTER COLUMN a SET STORAGE extended ;
SELECT gist_index_check('tbl_idx', true);
6d) In the end of 'gistFormNormalizedTuple' we have
ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
I guess it follows gistFormTuple function, but here we use
gistFormNormalizedTuple only for leaf tuples and we override
offsetnumber right after 'gistFormNormalizedTuple' function call, so
looks like we can drop it.
In general I think normalization here can follow the same logic as
for verify_nbtree. We can even reuse 'bt_normalize_tuple' as a
normalization function. It handles all corner cases like short varatt,
differences in compressions etc, that we can have in gist as well. It
contains just a few lines about btree and everything else valid for
gist, so we need to modify it a bit. I think we can move it to
verify_common. Then we need to normalize every existing leaf index
tuple before adding it to the bloom filter. During the probing phase I
think we just can use 'gistFormTuple' to build an index tuple and then
normalize it before probing. What do you think?
Thank you!
[1]: /messages/by-id/CAAhFRxiHCWe_6AmqGWZqYEkgN_uQG3Jgw0WgPw+0zO3_D-q4DA@mail.gmail.com
Best regards,
Arseniy Mukhin
Hi! Thank you for your review.
Im posting new version of 0001 patch of series
On Tue, 22 Jul 2025 at 15:47, Arseniy Mukhin
<arseniy.mukhin.dev@gmail.com> wrote:
Hi, Andrey!
Thank you for working on this! There is a long history of the patch, I
hope it will be committed soon!)On Fri, Jul 11, 2025 at 3:39 PM Andrey Borodin <x4mmm@yandex-team.ru> wrote:
On 30 Jun 2025, at 16:34, Andrey Borodin <x4mmm@yandex-team.ru> wrote:
Please find attached two new steps for amcheck:
1. A function to verify GiST integrity. This patch is in decent shape, simply rebased from previous year.
2. Support on pg_amcheck's side for this function. This patch did not receive such review attention before. And, perhaps, should be extended to support existing GIN functions.Here's a version that adds GIN functions to pg_amcheck.
IDK, maybe we should split pg_amcheck part into another thread and add there BRIN too...Speaking of BRIN pg_amcheck, I probably wouldn't merge it with the
gist/gin pg_amceck patchset because that would create a dependency on
the amcheck BRIN support patch, which is not clear when it will be
ready.There are some points about the patch:
1) There are several typos in verify_gist.c:
downlinks -> downlink (header comment)
discrepencies -> discrepancies
Correctess -> Correctness
hande -> handle
Initaliaze -> Initialize
numbmer -> number
replcaed -> replaced
aquire -> aqcuire2) Copyright year is 2023 in the patch. Time flies:)
These two are (trivially) fixed.
3) There is the same check in btree and while reviewing the patch I
realised it should be added to the BRIN amcheck as well. Probably it
will be needed for GIN someday. What do you think about moving it to
verify_common?if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
!TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
result->snapshot->xmin))
I think this is a good idea. I'm not sure if we should bother with
refactoring in this series though...
4) Should we check blknum of the new entry before pushing to the
stack? Probably we can check if it's a valid blknum and it's not
outside of the index. This way we can give a more detailed error
message in case we meet the wrong blknum.in the split detection code:
ptr->blkno = GistPageGetOpaque(page)->rightlink;
and when we add children of an inner page:
ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
We indeed need to recheck all bytes in possibly-corrupted indexes,
including downlinks.
But amcheck can be run concurrently with Index insert, which will
change the current index size, so checking is not trivial.
And given it is not checked in already-committed nbtree & GIN amcheck
modules, I suggest not bother with that in this thread.
This can be a separate patch to verify_nbtree.
5) There is a macros for 0xffff - 'TUPLE_IS_VALID'. Maybe we can use
it to make the code more readable? Also the error message contains one
extra 'has'.if (off != 0xffff)
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("index \"%s\" has on page %u offset %u has
item id not pointing to 0xffff, but %hu",
RelationGetRelationName(check_state->rel),
stack->blkno, i, off)));
Sure, I replaced all usages with TUPLE_IS_VALID.
6) Several points about 'gistFormNormalizedTuple'. I read the previous
thread [1] and it seems there is an unfinished discussion about
normalizing during heapallindexed check. I think normalization needs
some more work here.6a) There is a TODO
/* pfree(DatumGetPointer(old)); // TODO: this fails. Why? */6b) AFAICS 'compress' is an optional support function. If opclass
doesn't have a 'compress' function, then 'gistCompressValues' leaves
such attributes as it is. Here we get attdata from the heap scan, and
it could be toasted. That means that these checks can result in false
positives:gistCompressValues(giststate, r, attdata, isnull, true, compatt);
...for (int i = 0; i < r->rd_att->natts; i++)
{
if (VARATT_IS_EXTERNAL(DatumGetPointer(compatt[i])))
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
....
if (VARATT_IS_COMPRESSED(DatumGetPointer(compatt[i])))
{
if (i < IndexRelationGetNumberOfKeyAttributes(r))
ereport(ERROR,Also 'VARATT_IS_EXTERNAL' check will always result in a false
positive for toasted include attributes here. Reproducer for it:DROP TABLE IF EXISTS tbl;
CREATE TABLE tbl(a point, t text);
-- disable compression for 't', but let it to be external
ALTER TABLE tbl ALTER COLUMN t SET STORAGE external ;
INSERT INTO tbl values (point(random(), random()), repeat('a',3000 ));
CREATE INDEX tbl_idx ON tbl using gist (a) include (t);
SELECT gist_index_check('tbl_idx', true);So I think we need to remove these checks completely.
6c) Current code doesn't apply normalization for existing index tuples
during adding to bloom filter, which can result in false positive,
reproducer:
Here we use plain storage during index build, then during check we
have extended storage, which results in different binary
representation of the same data and we have false positive here.DROP TABLE IF EXISTS tbl;
CREATE TABLE tbl(a tsvector);
CREATE INDEX tbl_idx ON tbl using gist (a) ;
ALTER TABLE tbl ALTER COLUMN a SET STORAGE plain;
INSERT INTO tbl values ('a' ::tsvector);
ALTER TABLE tbl ALTER COLUMN a SET STORAGE extended ;
SELECT gist_index_check('tbl_idx', true);6d) In the end of 'gistFormNormalizedTuple' we have
ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
I guess it follows gistFormTuple function, but here we use
gistFormNormalizedTuple only for leaf tuples and we override
offsetnumber right after 'gistFormNormalizedTuple' function call, so
looks like we can drop it.In general I think normalization here can follow the same logic as
for verify_nbtree. We can even reuse 'bt_normalize_tuple' as a
normalization function. It handles all corner cases like short varatt,
differences in compressions etc, that we can have in gist as well. It
contains just a few lines about btree and everything else valid for
gist, so we need to modify it a bit. I think we can move it to
verify_common. Then we need to normalize every existing leaf index
tuple before adding it to the bloom filter. During the probing phase I
think we just can use 'gistFormTuple' to build an index tuple and then
normalize it before probing. What do you think?Thank you!
I did a little refactor in patch one, so we can reuse
bt_normalize_tuple. With these changes, your reproducers do not
complain.
I guess `gistFormNormalizedTuple` is now unneeded and its comment no
longer true. index_form_tuple in gist_tuple_present_callback ensures
all attributes are detoasted, and then we do `amcheck_normalize_tuple`
call.
I will remove gistFormNormalizedTuple function in next iteration if
this approach is OK
[1] /messages/by-id/CAAhFRxiHCWe_6AmqGWZqYEkgN_uQG3Jgw0WgPw+0zO3_D-q4DA@mail.gmail.com
Best regards,
Arseniy Mukhin
--
Best regards,
Kirill Reshke
Attachments:
v20251022-0001-Move-normalize-tuple-logic-from-nbtcheck-t.patchapplication/octet-stream; name=v20251022-0001-Move-normalize-tuple-logic-from-nbtcheck-t.patchDownload
From 8c164d2e64cfc8ccfea41ab99021a277d0c8b8be Mon Sep 17 00:00:00 2001
From: reshke <reshke@double.cloud>
Date: Wed, 22 Oct 2025 15:29:26 +0000
Subject: [PATCH v20251022 1/2] Move `normalize tuple` logic from nbtcheck to
verify_common
Preparational patch to reuse index tuple normalize logic
in GiST amcheck.
---
contrib/amcheck/verify_common.c | 112 ++++++++++++++++++++++++++++++++
contrib/amcheck/verify_common.h | 2 +
contrib/amcheck/verify_nbtree.c | 107 +-----------------------------
3 files changed, 115 insertions(+), 106 deletions(-)
diff --git a/contrib/amcheck/verify_common.c b/contrib/amcheck/verify_common.c
index a31ce06ed99..e9b4887f65e 100644
--- a/contrib/amcheck/verify_common.c
+++ b/contrib/amcheck/verify_common.c
@@ -13,6 +13,7 @@
#include "postgres.h"
#include "access/genam.h"
+#include "access/heaptoast.h"
#include "access/table.h"
#include "access/tableam.h"
#include "verify_common.h"
@@ -189,3 +190,114 @@ index_checkable(Relation rel, Oid am_id)
return amcheck_index_mainfork_expected(rel);
}
+
+IndexTuple
+amcheck_normalize_tuple(Relation irel, IndexTuple itup)
+{
+ TupleDesc tupleDescriptor = RelationGetDescr(irel);
+ Datum normalized[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ bool need_free[INDEX_MAX_KEYS];
+ bool formnewtup = false;
+ IndexTuple reformed;
+ int i;
+
+ /* Easy case: It's immediately clear that tuple has no varlena datums */
+ if (!IndexTupleHasVarwidths(itup))
+ return itup;
+
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ {
+ Form_pg_attribute att;
+
+ att = TupleDescAttr(tupleDescriptor, i);
+
+ /* Assume untoasted/already normalized datum initially */
+ need_free[i] = false;
+ normalized[i] = index_getattr(itup, att->attnum,
+ tupleDescriptor,
+ &isnull[i]);
+ if (att->attbyval || att->attlen != -1 || isnull[i])
+ continue;
+
+ /*
+ * Callers always pass a tuple that could safely be inserted into the
+ * index without further processing, so an external varlena header
+ * should never be encountered here
+ */
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
+ ItemPointerGetBlockNumber(&(itup->t_tid)),
+ ItemPointerGetOffsetNumber(&(itup->t_tid)),
+ RelationGetRelationName(irel))));
+ else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
+ VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
+ (att->attstorage == TYPSTORAGE_EXTENDED ||
+ att->attstorage == TYPSTORAGE_MAIN))
+ {
+ /*
+ * This value will be compressed by index_form_tuple() with the
+ * current storage settings. We may be here because this tuple
+ * was formed with different storage settings. So, force forming.
+ */
+ formnewtup = true;
+ }
+ else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
+ {
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
+ need_free[i] = true;
+ }
+
+ /*
+ * Short tuples may have 1B or 4B header. Convert 4B header of short
+ * tuples to 1B
+ */
+ else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
+ {
+ /* convert to short varlena */
+ Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
+ char *data = palloc(len);
+
+ SET_VARSIZE_SHORT(data, len);
+ memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
+
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(data);
+ need_free[i] = true;
+ }
+ }
+
+ /*
+ * Easier case: Tuple has varlena datums, none of which are compressed or
+ * short with 4B header
+ */
+ if (!formnewtup)
+ return itup;
+
+ /*
+ * Hard case: Tuple had compressed varlena datums that necessitate
+ * creating normalized version of the tuple from uncompressed input datums
+ * (normalized input datums). This is rather naive, but shouldn't be
+ * necessary too often.
+ *
+ * In the heap, tuples may contain short varlena datums with both 1B
+ * header and 4B headers. But the corresponding index tuple should always
+ * have such varlena's with 1B headers. So, if there is a short varlena
+ * with 4B header, we need to convert it for fingerprinting.
+ *
+ * Note that we rely on deterministic index_form_tuple() TOAST compression
+ * of normalized input.
+ */
+ reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
+ reformed->t_tid = itup->t_tid;
+
+ /* Cannot leak memory here */
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ if (need_free[i])
+ pfree(DatumGetPointer(normalized[i]));
+
+ return reformed;
+}
\ No newline at end of file
diff --git a/contrib/amcheck/verify_common.h b/contrib/amcheck/verify_common.h
index 3fa63d2121a..ffe0d30beb3 100644
--- a/contrib/amcheck/verify_common.h
+++ b/contrib/amcheck/verify_common.h
@@ -26,3 +26,5 @@ extern void amcheck_lock_relation_and_check(Oid indrelid,
Oid am_id,
IndexDoCheckCallback check,
LOCKMODE lockmode, void *state);
+
+extern IndexTuple amcheck_normalize_tuple(Relation irel, IndexTuple itup);
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index 0949c88983a..678528f2fd5 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -2859,115 +2859,10 @@ bt_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
static IndexTuple
bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
{
- TupleDesc tupleDescriptor = RelationGetDescr(state->rel);
- Datum normalized[INDEX_MAX_KEYS];
- bool isnull[INDEX_MAX_KEYS];
- bool need_free[INDEX_MAX_KEYS];
- bool formnewtup = false;
- IndexTuple reformed;
- int i;
-
/* Caller should only pass "logical" non-pivot tuples here */
Assert(!BTreeTupleIsPosting(itup) && !BTreeTupleIsPivot(itup));
- /* Easy case: It's immediately clear that tuple has no varlena datums */
- if (!IndexTupleHasVarwidths(itup))
- return itup;
-
- for (i = 0; i < tupleDescriptor->natts; i++)
- {
- Form_pg_attribute att;
-
- att = TupleDescAttr(tupleDescriptor, i);
-
- /* Assume untoasted/already normalized datum initially */
- need_free[i] = false;
- normalized[i] = index_getattr(itup, att->attnum,
- tupleDescriptor,
- &isnull[i]);
- if (att->attbyval || att->attlen != -1 || isnull[i])
- continue;
-
- /*
- * Callers always pass a tuple that could safely be inserted into the
- * index without further processing, so an external varlena header
- * should never be encountered here
- */
- if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
- ItemPointerGetBlockNumber(&(itup->t_tid)),
- ItemPointerGetOffsetNumber(&(itup->t_tid)),
- RelationGetRelationName(state->rel))));
- else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
- VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
- (att->attstorage == TYPSTORAGE_EXTENDED ||
- att->attstorage == TYPSTORAGE_MAIN))
- {
- /*
- * This value will be compressed by index_form_tuple() with the
- * current storage settings. We may be here because this tuple
- * was formed with different storage settings. So, force forming.
- */
- formnewtup = true;
- }
- else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
- {
- formnewtup = true;
- normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
- need_free[i] = true;
- }
-
- /*
- * Short tuples may have 1B or 4B header. Convert 4B header of short
- * tuples to 1B
- */
- else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
- {
- /* convert to short varlena */
- Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
- char *data = palloc(len);
-
- SET_VARSIZE_SHORT(data, len);
- memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
-
- formnewtup = true;
- normalized[i] = PointerGetDatum(data);
- need_free[i] = true;
- }
- }
-
- /*
- * Easier case: Tuple has varlena datums, none of which are compressed or
- * short with 4B header
- */
- if (!formnewtup)
- return itup;
-
- /*
- * Hard case: Tuple had compressed varlena datums that necessitate
- * creating normalized version of the tuple from uncompressed input datums
- * (normalized input datums). This is rather naive, but shouldn't be
- * necessary too often.
- *
- * In the heap, tuples may contain short varlena datums with both 1B
- * header and 4B headers. But the corresponding index tuple should always
- * have such varlena's with 1B headers. So, if there is a short varlena
- * with 4B header, we need to convert it for fingerprinting.
- *
- * Note that we rely on deterministic index_form_tuple() TOAST compression
- * of normalized input.
- */
- reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
- reformed->t_tid = itup->t_tid;
-
- /* Cannot leak memory here */
- for (i = 0; i < tupleDescriptor->natts; i++)
- if (need_free[i])
- pfree(DatumGetPointer(normalized[i]));
-
- return reformed;
+ return amcheck_normalize_tuple(state->rel, itup);
}
/*
--
2.43.0
v20251022-0002-Add-gist_index_check-function-to-verify-Gi.patchapplication/octet-stream; name=v20251022-0002-Add-gist_index_check-function-to-verify-Gi.patchDownload
From afe0c970933bdb0838cdb75fb651a2740b74be07 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Wed, 18 Jun 2025 20:33:28 +0300
Subject: [PATCH v20251022 2/2] Add gist_index_check() function to verify GiST
index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This function traverses GiST with a depth-fisrt search and checks
that all downlink tuples are included into parent tuple keyspace.
This traverse takes lock of any page until some discapency found.
To re-check suspicious pair of parent and child tuples it aqcuires
locks on both parent and child pages in the same order as page
split does.
Author: Andrey Borodin <amborodin@acm.org>
Author: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-By: José Villanova <jose.arthur@gmail.com>
Reviewed-By: Aleksander Alekseev <aleksander@timescale.com>
Reviewed-By: Nikolay Samokhvalov <samokhvalov@gmail.com>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Tomas Vondra <tomas.vondra@enterprisedb.com>
Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru
---
contrib/amcheck/Makefile | 5 +-
contrib/amcheck/amcheck--1.5--1.6.sql | 14 +
contrib/amcheck/amcheck.control | 2 +-
contrib/amcheck/expected/check_gist.out | 145 ++++++
contrib/amcheck/meson.build | 3 +
contrib/amcheck/sql/check_gist.sql | 62 +++
contrib/amcheck/verify_gist.c | 664 ++++++++++++++++++++++++
doc/src/sgml/amcheck.sgml | 19 +
8 files changed, 911 insertions(+), 3 deletions(-)
create mode 100644 contrib/amcheck/amcheck--1.5--1.6.sql
create mode 100644 contrib/amcheck/expected/check_gist.out
create mode 100644 contrib/amcheck/sql/check_gist.sql
create mode 100644 contrib/amcheck/verify_gist.c
diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index 1b7a63cbaa4..1f2fec95de5 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -4,16 +4,17 @@ MODULE_big = amcheck
OBJS = \
$(WIN32RES) \
verify_common.o \
+ verify_gist.o \
verify_gin.o \
verify_heapam.o \
verify_nbtree.o
EXTENSION = amcheck
DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \
- amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql
+ amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql amcheck--1.5--1.6.sql
PGFILEDESC = "amcheck - function for verifying relation integrity"
-REGRESS = check check_btree check_gin check_heap
+REGRESS = check check_btree check_gin check_gist check_heap
EXTRA_INSTALL = contrib/pg_walinspect
TAP_TESTS = 1
diff --git a/contrib/amcheck/amcheck--1.5--1.6.sql b/contrib/amcheck/amcheck--1.5--1.6.sql
new file mode 100644
index 00000000000..a6a1debff12
--- /dev/null
+++ b/contrib/amcheck/amcheck--1.5--1.6.sql
@@ -0,0 +1,14 @@
+/* contrib/amcheck/amcheck--1.5--1.6.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.6'" to load this file. \quit
+
+
+-- gist_index_check()
+--
+CREATE FUNCTION gist_index_check(index regclass, heapallindexed boolean)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'gist_index_check'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION gist_index_check(regclass,boolean) FROM PUBLIC;
diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control
index c8ba6d7c9bc..2f329ef2cf4 100644
--- a/contrib/amcheck/amcheck.control
+++ b/contrib/amcheck/amcheck.control
@@ -1,5 +1,5 @@
# amcheck extension
comment = 'functions for verifying relation integrity'
-default_version = '1.5'
+default_version = '1.6'
module_pathname = '$libdir/amcheck'
relocatable = true
diff --git a/contrib/amcheck/expected/check_gist.out b/contrib/amcheck/expected/check_gist.out
new file mode 100644
index 00000000000..cbc3e27e679
--- /dev/null
+++ b/contrib/amcheck/expected/check_gist.out
@@ -0,0 +1,145 @@
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gist_check;
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+ attstorage
+------------
+ x
+(1 row)
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
+ gist_index_check
+------------------
+
+(1 row)
+
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index 1f0c347ed54..13b36b495ed 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -5,6 +5,7 @@ amcheck_sources = files(
'verify_gin.c',
'verify_heapam.c',
'verify_nbtree.c',
+ 'verify_gist.c',
)
if host_system == 'windows'
@@ -27,6 +28,7 @@ install_data(
'amcheck--1.2--1.3.sql',
'amcheck--1.3--1.4.sql',
'amcheck--1.4--1.5.sql',
+ 'amcheck--1.5--1.6.sql',
kwargs: contrib_data_args,
)
@@ -39,6 +41,7 @@ tests += {
'check',
'check_btree',
'check_gin',
+ 'check_gist',
'check_heap',
],
},
diff --git a/contrib/amcheck/sql/check_gist.sql b/contrib/amcheck/sql/check_gist.sql
new file mode 100644
index 00000000000..37966423b8b
--- /dev/null
+++ b/contrib/amcheck/sql/check_gist.sql
@@ -0,0 +1,62 @@
+
+SELECT setseed(1);
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+-- cleanup
+DROP TABLE gist_check;
+
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
\ No newline at end of file
diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c
new file mode 100644
index 00000000000..c15cd6ab556
--- /dev/null
+++ b/contrib/amcheck/verify_gist.c
@@ -0,0 +1,664 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_gist.c
+ * Verifies the integrity of GiST indexes based on invariants.
+ *
+ * Verification checks that all paths in GiST graph contain
+ * consistent keys: tuples on parent pages consistently include tuples
+ * from children pages. Also, verification checks graph invariants:
+ * internal page must have at least one downlink, internal page can
+ * reference either only leaf pages or only internal pages.
+ *
+ *
+ * Copyright (c) 2017-2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/verify_gist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gist_private.h"
+#include "access/heaptoast.h"
+#include "access/tableam.h"
+#include "catalog/index.h"
+#include "catalog/pg_am.h"
+#include "common/pg_prng.h"
+#include "lib/bloomfilter.h"
+#include "verify_common.h"
+#include "utils/memutils.h"
+
+
+/*
+ * GistScanItem represents one item of depth-first scan of GiST index.
+ */
+typedef struct GistScanItem
+{
+ int depth;
+
+ /* Referenced block number to check next */
+ BlockNumber blkno;
+
+ /*
+ * Correctness of this parent tuple will be checked against contents of
+ * referenced page. This tuple will be NULL for the root block.
+ */
+ IndexTuple parenttup;
+
+ /*
+ * LSN to handle concurrent scans of the page. It's necessary to avoid
+ * missing some subtrees from the page that was split just before we read it.
+ */
+ XLogRecPtr parentlsn;
+
+ /*
+ * Reference to parent page for re-locking in case of found parent-child
+ * tuple discrepancies.
+ */
+ BlockNumber parentblk;
+
+ /* Pointer to a next stack item. */
+ struct GistScanItem *next;
+} GistScanItem;
+
+typedef struct GistCheckState
+{
+ /* GiST state */
+ GISTSTATE *state;
+ /* Bloom filter fingerprints index tuples */
+ bloom_filter *filter;
+
+ Snapshot snapshot;
+ Relation rel;
+ Relation heaprel;
+
+ /* Debug counter for reporting percentage of work already done */
+ int64 heaptuplespresent;
+
+ /* progress reporting stuff */
+ BlockNumber totalblocks;
+ BlockNumber reportedblocks;
+ BlockNumber scannedblocks;
+ BlockNumber deltablocks;
+
+ int leafdepth;
+} GistCheckState;
+
+PG_FUNCTION_INFO_V1(gist_index_check);
+
+static void giststate_init_heapallindexed(Relation rel, GistCheckState * result);
+static void gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly);
+static void gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed,
+ BufferAccessStrategy strategy);
+static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
+static IndexTuple gist_refind_parent(Relation rel, BlockNumber parentblkno,
+ BlockNumber childblkno,
+ BufferAccessStrategy strategy);
+static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block,
+ Page page, OffsetNumber offset);
+static void gist_tuple_present_callback(Relation index, ItemPointer tid,
+ Datum *values, bool *isnull,
+ bool tupleIsAlive, void *checkstate);
+static IndexTuple gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup);
+
+/*
+ * gist_index_check(index regclass)
+ *
+ * Verify integrity of GiST index.
+ *
+ * Acquires AccessShareLock on heap & index relations.
+ */
+Datum
+gist_index_check(PG_FUNCTION_ARGS)
+{
+ Oid indrelid = PG_GETARG_OID(0);
+ bool heapallindexed = PG_GETARG_BOOL(1);
+
+ amcheck_lock_relation_and_check(indrelid,
+ GIST_AM_OID,
+ gist_check_parent_keys_consistency,
+ AccessShareLock,
+ &heapallindexed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Initialize GIST state files needed to perform.
+ * This initialized bloom filter and snapshot.
+ */
+static void
+giststate_init_heapallindexed(Relation rel, GistCheckState * result)
+{
+ int64 total_pages;
+ int64 total_elems;
+ uint64 seed;
+
+ /*
+ * Size Bloom filter based on estimated number of tuples in index. This
+ * logic is similar to B-tree, see verify_btree.c .
+ */
+ total_pages = result->totalblocks;
+ total_elems = Max(total_pages * (MaxOffsetNumber / 5),
+ (int64) rel->rd_rel->reltuples);
+ seed = pg_prng_uint64(&pg_global_prng_state);
+ result->filter = bloom_create(total_elems, maintenance_work_mem, seed);
+
+ result->snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+
+ /*
+ * GetTransactionSnapshot() always acquires a new MVCC snapshot in READ
+ * COMMITTED mode. A new snapshot is guaranteed to have all the entries
+ * it requires in the index.
+ *
+ * We must defend against the possibility that an old xact snapshot was
+ * returned at higher isolation levels when that snapshot is not safe for
+ * index scans of the target index. This is possible when the snapshot
+ * sees tuples that are before the index's indcheckxmin horizon. Throwing
+ * an error here should be very rare. It doesn't seem worth using a
+ * secondary snapshot to avoid this.
+ */
+ if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
+ !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+ result->snapshot->xmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("index \"%s\" cannot be verified using transaction snapshot",
+ RelationGetRelationName(rel))));
+}
+
+/*
+ * Main entry point for GiST check.
+ *
+ * This function verifies that tuples of internal pages cover all
+ * the key space of each tuple on the leaf page. To do this we invoke
+ * gist_check_internal_page() for every internal page.
+ *
+ * This check allocates memory context and scans through
+ * GiST graph. This scan is performed in a depth-first search using a stack of
+ * GistScanItem-s. Initially this stack contains only the root block number. On
+ * each iteration the top block number is replaced by referenced block numbers.
+ *
+ *
+ * gist_check_internal_page() in its turn takes every tuple and tries to
+ * adjust it by tuples on the referenced child page. Parent gist tuple should
+ * never require any adjustments.
+ */
+static void
+gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly)
+{
+ BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+ GistScanItem *stack;
+ MemoryContext mctx;
+ MemoryContext oldcontext;
+ GISTSTATE *state;
+ bool heapallindexed = *((bool *) callback_state);
+ GistCheckState *check_state = palloc0(sizeof(GistCheckState));
+
+ mctx = AllocSetContextCreate(CurrentMemoryContext,
+ "amcheck context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(mctx);
+
+ state = initGISTstate(rel);
+
+ check_state->state = state;
+ check_state->rel = rel;
+ check_state->heaprel = heaprel;
+
+ /*
+ * We don't know the height of the tree yet, but as soon as we encounter a
+ * leaf page, we will set 'leafdepth' to its depth.
+ */
+ check_state->leafdepth = -1;
+
+ check_state->totalblocks = RelationGetNumberOfBlocks(rel);
+ /* report every 100 blocks or 5%, whichever is bigger */
+ check_state->deltablocks = Max(check_state->totalblocks / 20, 100);
+
+ if (heapallindexed)
+ giststate_init_heapallindexed(rel, check_state);
+
+ /* Start the scan at the root page */
+ stack = (GistScanItem *) palloc0(sizeof(GistScanItem));
+ stack->depth = 0;
+ stack->parenttup = NULL;
+ stack->parentblk = InvalidBlockNumber;
+ stack->parentlsn = InvalidXLogRecPtr;
+ stack->blkno = GIST_ROOT_BLKNO;
+
+ /*
+ * This GiST scan is effectively an "old" VACUUM version before commit
+ * fe280694d which introduced physical order scanning.
+ */
+
+ while (stack)
+ {
+ GistScanItem *stack_next;
+ Buffer buffer;
+ Page page;
+ XLogRecPtr lsn;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Report progress */
+ if (check_state->scannedblocks > check_state->reportedblocks +
+ check_state->deltablocks)
+ {
+ elog(DEBUG1, "verified level %u blocks of approximately %u total",
+ check_state->scannedblocks, check_state->totalblocks);
+ check_state->reportedblocks = check_state->scannedblocks;
+ }
+ check_state->scannedblocks++;
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+ RBM_NORMAL, strategy);
+ LockBuffer(buffer, GIST_SHARE);
+ page = (Page) BufferGetPage(buffer);
+ lsn = BufferGetLSNAtomic(buffer);
+
+ /* Do basic sanity checks on the page headers */
+ check_index_page(rel, buffer, stack->blkno);
+
+ /*
+ * It's possible that the page was split since we looked at the
+ * parent, so that we didn't missed the downlink of the right sibling
+ * when we scanned the parent. If so, add the right sibling to the
+ * stack now.
+ */
+ if (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page))
+ {
+ /* split page detected, install right link to the stack */
+ GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+
+ ptr->depth = stack->depth;
+ ptr->parenttup = CopyIndexTuple(stack->parenttup);
+ ptr->parentblk = stack->parentblk;
+ ptr->parentlsn = stack->parentlsn;
+ ptr->blkno = GistPageGetOpaque(page)->rightlink;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+
+ gist_check_page(check_state, stack, page, heapallindexed, strategy);
+
+ if (!GistPageIsLeaf(page))
+ {
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ /* Internal page, so recurse to the child */
+ GistScanItem *ptr;
+ ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+ ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+ ptr->depth = stack->depth + 1;
+ ptr->parenttup = CopyIndexTuple(idxtuple);
+ ptr->parentblk = stack->blkno;
+ ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+ ptr->parentlsn = lsn;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ }
+
+ LockBuffer(buffer, GIST_UNLOCK);
+ ReleaseBuffer(buffer);
+
+ /* Step to next item in the queue */
+ stack_next = stack->next;
+ if (stack->parenttup)
+ pfree(stack->parenttup);
+ pfree(stack);
+ stack = stack_next;
+ }
+
+ if (heapallindexed)
+ {
+ IndexInfo *indexinfo = BuildIndexInfo(rel);
+ TableScanDesc scan;
+
+ scan = table_beginscan_strat(heaprel, /* relation */
+ check_state->snapshot, /* snapshot */
+ 0, /* number of keys */
+ NULL, /* scan key */
+ true, /* buffer access strategy OK */
+ true); /* syncscan OK? */
+
+ /*
+ * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY.
+ */
+ indexinfo->ii_Concurrent = true;
+
+ indexinfo->ii_Unique = false;
+ indexinfo->ii_ExclusionOps = NULL;
+ indexinfo->ii_ExclusionProcs = NULL;
+ indexinfo->ii_ExclusionStrats = NULL;
+
+ elog(DEBUG1, "verifying that tuples from index \"%s\" are present in \"%s\"",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(heaprel));
+
+ table_index_build_scan(heaprel, rel, indexinfo, true, false,
+ gist_tuple_present_callback, (void *) check_state, scan);
+
+ ereport(DEBUG1,
+ (errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set",
+ check_state->heaptuplespresent,
+ RelationGetRelationName(heaprel),
+ 100.0 * bloom_prop_bits_set(check_state->filter))));
+
+ UnregisterSnapshot(check_state->snapshot);
+ bloom_free(check_state->filter);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(mctx);
+ pfree(check_state);
+}
+
+static void
+gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed, BufferAccessStrategy strategy)
+{
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ /* Check that the tree has the same height in all branches */
+ if (GistPageIsLeaf(page))
+ {
+ if (check_state->leafdepth == -1)
+ check_state->leafdepth = stack->depth;
+ else if (stack->depth != check_state->leafdepth)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+ RelationGetRelationName(check_state->rel), stack->blkno)));
+ }
+
+ /*
+ * Check that each tuple looks valid, and is consistent with the downlink
+ * we followed when we stepped on this page.
+ */
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ ItemId iid = PageGetItemIdCareful(check_state->rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+ IndexTuple tmpTuple = NULL;
+
+ /*
+ * Check that it's not a leftover invalid tuple from pre-9.1 See also
+ * gistdoinsert() and gistbulkdelete() handling of such tuples. We do
+ * consider it an error here.
+ */
+ if (GistTupleIsInvalid(idxtuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("index \"%s\" contains an inner tuple marked as invalid, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i),
+ errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
+ errhint("Please REINDEX it.")));
+
+ if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+
+ /*
+ * Check if this tuple is consistent with the downlink in the parent.
+ */
+ if (stack->parenttup)
+ tmpTuple = gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state);
+
+ if (tmpTuple)
+ {
+ /*
+ * There was a discrepancy between parent and child tuples. We
+ * need to verify it is not a result of concurrent call of
+ * gistplacetopage(). So, lock parent and try to find downlink for
+ * current page. It may be missing due to concurrent page split,
+ * this is OK.
+ *
+ * Note that when we acquire parent tuple now we hold lock for both
+ * parent and child buffers. Thus the parent tuple must include the
+ * keyspace of the child.
+ */
+
+ pfree(tmpTuple);
+ pfree(stack->parenttup);
+ stack->parenttup = gist_refind_parent(check_state->rel, stack->parentblk,
+ stack->blkno, strategy);
+
+ /* We found it - make a final check before failing */
+ if (!stack->parenttup)
+ elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
+ stack->blkno, stack->parentblk);
+ else if (gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+ else
+ {
+ /*
+ * But now it is properly adjusted - nothing to do here.
+ */
+ }
+ }
+
+ if (GistPageIsLeaf(page))
+ {
+ if (heapallindexed)
+ {
+ IndexTuple norm;
+
+ norm = gistFormNormalizedTuple(check_state, idxtuple);
+
+ bloom_add_element(check_state->filter,
+ (unsigned char *) norm,
+ IndexTupleSize(norm));
+
+ /* Be tidy */
+ if (norm != idxtuple)
+ pfree(norm);
+ }
+ }
+ else
+ {
+ OffsetNumber off = ItemPointerGetOffsetNumber(&(idxtuple->t_tid));
+
+ if (off != TUPLE_IS_VALID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has on page %u offset %u has item id not pointing to 0xffff, but %hu",
+ RelationGetRelationName(check_state->rel), stack->blkno, i, off)));
+ }
+ }
+}
+
+/*
+ * gistFormNormalizedTuple - analogue to gistFormTuple, but performs deTOASTing
+ * of all included data (for covering indexes). While we do not expect
+ * toasted attributes in normal indexes, this can happen as a result of
+ * intervention into system catalog. Detoasting of key attributes is expected
+ * to be done by opclass decompression methods, if the indexed type might be
+ * toasted.
+ */
+static IndexTuple
+gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup)
+{
+ return amcheck_normalize_tuple(giststate->rel, itup);
+}
+
+static void
+gist_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
+ bool *isnull, bool tupleIsAlive, void *checkstate)
+{
+ GistCheckState *state = (GistCheckState *) checkstate;
+ IndexTuple itup, norm;
+ Datum compatt[INDEX_MAX_KEYS];
+
+ /* Generate a normalized index tuple for fingerprinting */
+ gistCompressValues(state->state, index, values, isnull, true, compatt);
+ itup = index_form_tuple(RelationGetDescr(index), compatt, isnull);
+ itup->t_tid = *tid;
+
+ norm = gistFormNormalizedTuple(state, itup);
+
+ /* Probe Bloom filter -- tuple should be present */
+ if (bloom_lacks_element(state->filter, (unsigned char *) norm,
+ IndexTupleSize(norm)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
+ ItemPointerGetBlockNumber(&(norm->t_tid)),
+ ItemPointerGetOffsetNumber(&(norm->t_tid)),
+ RelationGetRelationName(state->heaprel),
+ RelationGetRelationName(state->rel))));
+
+ state->heaptuplespresent++;
+
+ pfree(itup);
+ /* Be tidy */
+ if (norm != itup)
+ pfree(norm);
+}
+
+/*
+ * check_index_page - verification of basic invariants about GiST page data
+ * This function does not do any tuple analysis.
+ */
+static void
+check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
+{
+ Page page = BufferGetPage(buffer);
+
+ gistcheckpage(rel, buffer);
+
+ if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has corrupted page %d",
+ RelationGetRelationName(rel), blockNo)));
+
+ if (GistPageIsDeleted(page))
+ {
+ if (!GistPageIsLeaf(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted internal page %d",
+ RelationGetRelationName(rel), blockNo)));
+ if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted page %d with tuples",
+ RelationGetRelationName(rel), blockNo)));
+ }
+ else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has page %d with exceeding count of tuples",
+ RelationGetRelationName(rel), blockNo)));
+}
+
+/*
+ * Try to re-find the downlink pointing to 'blkno', in 'parentblkno'.
+ *
+ * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
+ * returns NULL.
+ */
+static IndexTuple
+gist_refind_parent(Relation rel,
+ BlockNumber parentblkno, BlockNumber childblkno,
+ BufferAccessStrategy strategy)
+{
+ Buffer parentbuf;
+ Page parentpage;
+ OffsetNumber parent_maxoff;
+ IndexTuple result = NULL;
+
+ parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
+ strategy);
+
+ LockBuffer(parentbuf, GIST_SHARE);
+ parentpage = BufferGetPage(parentbuf);
+
+ if (GistPageIsLeaf(parentpage))
+ {
+ /*
+ * Currently GiST never deletes internal pages, thus they can never
+ * become leaf.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" internal page %d became leaf",
+ RelationGetRelationName(rel), parentblkno)));
+ }
+
+ parent_maxoff = PageGetMaxOffsetNumber(parentpage);
+ for (OffsetNumber o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o))
+ {
+ ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o);
+ IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid);
+
+ if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
+ {
+ /*
+ * Found it! Make a copy and return it while both parent and child
+ * pages are locked. This guarantees that at this particular
+ * moment tuples must be coherent to each other.
+ */
+ result = CopyIndexTuple(itup);
+ break;
+ }
+ }
+
+ UnlockReleaseBuffer(parentbuf);
+
+ return result;
+}
+
+static ItemId
+PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
+ OffsetNumber offset)
+{
+ ItemId itemid = PageGetItemId(page, offset);
+
+ if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
+ BLCKSZ - MAXALIGN(sizeof(GISTPageOpaqueData)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("line pointer points past end of tuple space in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ /*
+ * Verify that the line pointer isn't LP_REDIRECT or LP_UNUSED, since gist
+ * never uses either. Verify that the line pointer has storage, too, since
+ * even LP_DEAD items should.
+ */
+ if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
+ ItemIdGetLength(itemid) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("invalid line pointer storage in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ return itemid;
+}
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index 0aff0a6c8c6..7e4b6c6f692 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -208,6 +208,25 @@ ORDER BY c.relpages DESC LIMIT 10;
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ <function>gist_index_check(index regclass, heapallindexed boolean) returns void</function>
+ <indexterm>
+ <primary>gist_index_check</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ <function>gist_index_check</function> tests that its target GiST
+ has consistent parent-child tuples relations (no parent tuples
+ require tuple adjustement) and page graph respects balanced-tree
+ invariants (internal pages reference only leaf page or only internal
+ pages).
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<tip>
<para>
--
2.43.0
Hi,
On Wed, Oct 22, 2025 at 9:57 PM Kirill Reshke <reshkekirill@gmail.com> wrote:
Hi! Thank you for your review.
Thank you for the new version!
Im posting new version of 0001 patch of series
On Tue, 22 Jul 2025 at 15:47, Arseniy Mukhin
<arseniy.mukhin.dev@gmail.com> wrote:Hi, Andrey!
Thank you for working on this! There is a long history of the patch, I
hope it will be committed soon!)On Fri, Jul 11, 2025 at 3:39 PM Andrey Borodin <x4mmm@yandex-team.ru> wrote:
On 30 Jun 2025, at 16:34, Andrey Borodin <x4mmm@yandex-team.ru> wrote:
Please find attached two new steps for amcheck:
1. A function to verify GiST integrity. This patch is in decent shape, simply rebased from previous year.
2. Support on pg_amcheck's side for this function. This patch did not receive such review attention before. And, perhaps, should be extended to support existing GIN functions.Here's a version that adds GIN functions to pg_amcheck.
IDK, maybe we should split pg_amcheck part into another thread and add there BRIN too...Speaking of BRIN pg_amcheck, I probably wouldn't merge it with the
gist/gin pg_amceck patchset because that would create a dependency on
the amcheck BRIN support patch, which is not clear when it will be
ready.There are some points about the patch:
1) There are several typos in verify_gist.c:
downlinks -> downlink (header comment)
discrepencies -> discrepancies
Correctess -> Correctness
hande -> handle
Initaliaze -> Initialize
numbmer -> number
replcaed -> replaced
aquire -> aqcuire2) Copyright year is 2023 in the patch. Time flies:)
These two are (trivially) fixed.
3) There is the same check in btree and while reviewing the patch I
realised it should be added to the BRIN amcheck as well. Probably it
will be needed for GIN someday. What do you think about moving it to
verify_common?if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
!TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
result->snapshot->xmin))I think this is a good idea. I'm not sure if we should bother with
refactoring in this series though...
Great, so maybe we can start a separate thread for this small
refactoring. Some work in this direction has been already done in brin
amcheck thread [0]/messages/by-id/CAE7r3MKUOGJ0v5-b5fYaF6sxKZvr0J-YXHTJf8u8GUr1tTcvNg@mail.gmail.com.
4) Should we check blknum of the new entry before pushing to the
stack? Probably we can check if it's a valid blknum and it's not
outside of the index. This way we can give a more detailed error
message in case we meet the wrong blknum.in the split detection code:
ptr->blkno = GistPageGetOpaque(page)->rightlink;
and when we add children of an inner page:
ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
We indeed need to recheck all bytes in possibly-corrupted indexes,
including downlinks.
But amcheck can be run concurrently with Index insert, which will
change the current index size, so checking is not trivial.
And given it is not checked in already-committed nbtree & GIN amcheck
modules, I suggest not bother with that in this thread.
This can be a separate patch to verify_nbtree.
OK.
6) Several points about 'gistFormNormalizedTuple'. I read the previous
thread [1] and it seems there is an unfinished discussion about
normalizing during heapallindexed check. I think normalization needs
some more work here.6a) There is a TODO
/* pfree(DatumGetPointer(old)); // TODO: this fails. Why? */6b) AFAICS 'compress' is an optional support function. If opclass
doesn't have a 'compress' function, then 'gistCompressValues' leaves
such attributes as it is. Here we get attdata from the heap scan, and
it could be toasted. That means that these checks can result in false
positives:gistCompressValues(giststate, r, attdata, isnull, true, compatt);
...for (int i = 0; i < r->rd_att->natts; i++)
{
if (VARATT_IS_EXTERNAL(DatumGetPointer(compatt[i])))
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
....
if (VARATT_IS_COMPRESSED(DatumGetPointer(compatt[i])))
{
if (i < IndexRelationGetNumberOfKeyAttributes(r))
ereport(ERROR,Also 'VARATT_IS_EXTERNAL' check will always result in a false
positive for toasted include attributes here. Reproducer for it:DROP TABLE IF EXISTS tbl;
CREATE TABLE tbl(a point, t text);
-- disable compression for 't', but let it to be external
ALTER TABLE tbl ALTER COLUMN t SET STORAGE external ;
INSERT INTO tbl values (point(random(), random()), repeat('a',3000 ));
CREATE INDEX tbl_idx ON tbl using gist (a) include (t);
SELECT gist_index_check('tbl_idx', true);So I think we need to remove these checks completely.
6c) Current code doesn't apply normalization for existing index tuples
during adding to bloom filter, which can result in false positive,
reproducer:
Here we use plain storage during index build, then during check we
have extended storage, which results in different binary
representation of the same data and we have false positive here.DROP TABLE IF EXISTS tbl;
CREATE TABLE tbl(a tsvector);
CREATE INDEX tbl_idx ON tbl using gist (a) ;
ALTER TABLE tbl ALTER COLUMN a SET STORAGE plain;
INSERT INTO tbl values ('a' ::tsvector);
ALTER TABLE tbl ALTER COLUMN a SET STORAGE extended ;
SELECT gist_index_check('tbl_idx', true);6d) In the end of 'gistFormNormalizedTuple' we have
ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
I guess it follows gistFormTuple function, but here we use
gistFormNormalizedTuple only for leaf tuples and we override
offsetnumber right after 'gistFormNormalizedTuple' function call, so
looks like we can drop it.In general I think normalization here can follow the same logic as
for verify_nbtree. We can even reuse 'bt_normalize_tuple' as a
normalization function. It handles all corner cases like short varatt,
differences in compressions etc, that we can have in gist as well. It
contains just a few lines about btree and everything else valid for
gist, so we need to modify it a bit. I think we can move it to
verify_common. Then we need to normalize every existing leaf index
tuple before adding it to the bloom filter. During the probing phase I
think we just can use 'gistFormTuple' to build an index tuple and then
normalize it before probing. What do you think?Thank you!
I did a little refactor in patch one, so we can reuse
bt_normalize_tuple. With these changes, your reproducers do not
complain.
I guess `gistFormNormalizedTuple` is now unneeded and its comment no
longer true. index_form_tuple in gist_tuple_present_callback ensures
all attributes are detoasted, and then we do `amcheck_normalize_tuple`
call.
I will remove gistFormNormalizedTuple function in next iteration if
this approach is OK
LGTM. Only one point here: I think maybe we need to move the
bt_normalize_tuple comment as well, as now it is more about
amcheck_normalize_tuple than bt_normalize_tuple.
[0]: /messages/by-id/CAE7r3MKUOGJ0v5-b5fYaF6sxKZvr0J-YXHTJf8u8GUr1tTcvNg@mail.gmail.com
Best regards,
Arseniy Mukhin
The following review has been posted through the commitfest application:
make installcheck-world: not tested
Implements feature: not tested
Spec compliant: not tested
Documentation: not tested
Hi,
Together with Sergey we did a review and found a few things that need fixing:
- `contrib/amcheck/amcheck--1.5--1.6.sql:14` - missing space after comma
- `verify_gist.c` should be the second entry in meson.build, not at the end
- Some function arguments like `(GistCheckState * check_state, GistScanItem * stack)` have extra spaces after the `*` - should be `(GistCheckState *check_state, GistScanItem *stack)`
- Missing `#include "access/itup.h"` in `verify_common.h`
- Missing test file `007verify_gist_.pl` (not sure if should be created)
- `contrib/amcheck/sql/check_gist.sql` - missing cleanup statement `DROP TABLE toast_bug;`
Let us know if you need any clarification on these points!
- Miłosz and Sergey
Hi Miłosz and Sergey!
Thanks a lot for reviewing this patch!
On 9 Dec 2025, at 23:54, Miłosz Bieniek <bieniek.milosz@proton.me> wrote:
Hi,
Together with Sergey we did a review and found a few things that need fixing:- `contrib/amcheck/amcheck--1.5--1.6.sql:14` - missing space after comma
Fixed.
- `verify_gist.c` should be the second entry in meson.build, not at the end
Fixed.
- Some function arguments like `(GistCheckState * check_state, GistScanItem * stack)` have extra spaces after the `*` - should be `(GistCheckState *check_state, GistScanItem *stack)`
Fixed.
- Missing `#include "access/itup.h"` in `verify_common.h`
I do not understand why. Perhaps, optimizing headers would be a good idea. This file is not included by any of files that include verify_common.h.
- Missing test file `007verify_gist_.pl` (not sure if should be created)
I don't think we ever had it for gist.
- `contrib/amcheck/sql/check_gist.sql` - missing cleanup statement `DROP TABLE toast_bug;`
Fixed.
Thanks!
Best regards, Andrey Borodin.
Attachments:
v20251216-0001-Move-normalize-tuple-logic-from-nbtcheck-t.patchapplication/octet-stream; name=v20251216-0001-Move-normalize-tuple-logic-from-nbtcheck-t.patch; x-unix-mode=0644Download
From 47635357745ad1fa7e168a6be1f7ae5ddc009f2a Mon Sep 17 00:00:00 2001
From: reshke <reshke@double.cloud>
Date: Wed, 22 Oct 2025 15:29:26 +0000
Subject: [PATCH v20251216 1/3] Move `normalize tuple` logic from nbtcheck to
verify_common
Preparational patch to reuse index tuple normalize logic
in GiST amcheck.
---
contrib/amcheck/verify_common.c | 112 ++++++++++++++++++++++++++++++++
contrib/amcheck/verify_common.h | 2 +
contrib/amcheck/verify_nbtree.c | 107 +-----------------------------
3 files changed, 115 insertions(+), 106 deletions(-)
diff --git a/contrib/amcheck/verify_common.c b/contrib/amcheck/verify_common.c
index a31ce06ed99..e9b4887f65e 100644
--- a/contrib/amcheck/verify_common.c
+++ b/contrib/amcheck/verify_common.c
@@ -13,6 +13,7 @@
#include "postgres.h"
#include "access/genam.h"
+#include "access/heaptoast.h"
#include "access/table.h"
#include "access/tableam.h"
#include "verify_common.h"
@@ -189,3 +190,114 @@ index_checkable(Relation rel, Oid am_id)
return amcheck_index_mainfork_expected(rel);
}
+
+IndexTuple
+amcheck_normalize_tuple(Relation irel, IndexTuple itup)
+{
+ TupleDesc tupleDescriptor = RelationGetDescr(irel);
+ Datum normalized[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ bool need_free[INDEX_MAX_KEYS];
+ bool formnewtup = false;
+ IndexTuple reformed;
+ int i;
+
+ /* Easy case: It's immediately clear that tuple has no varlena datums */
+ if (!IndexTupleHasVarwidths(itup))
+ return itup;
+
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ {
+ Form_pg_attribute att;
+
+ att = TupleDescAttr(tupleDescriptor, i);
+
+ /* Assume untoasted/already normalized datum initially */
+ need_free[i] = false;
+ normalized[i] = index_getattr(itup, att->attnum,
+ tupleDescriptor,
+ &isnull[i]);
+ if (att->attbyval || att->attlen != -1 || isnull[i])
+ continue;
+
+ /*
+ * Callers always pass a tuple that could safely be inserted into the
+ * index without further processing, so an external varlena header
+ * should never be encountered here
+ */
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
+ ItemPointerGetBlockNumber(&(itup->t_tid)),
+ ItemPointerGetOffsetNumber(&(itup->t_tid)),
+ RelationGetRelationName(irel))));
+ else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
+ VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
+ (att->attstorage == TYPSTORAGE_EXTENDED ||
+ att->attstorage == TYPSTORAGE_MAIN))
+ {
+ /*
+ * This value will be compressed by index_form_tuple() with the
+ * current storage settings. We may be here because this tuple
+ * was formed with different storage settings. So, force forming.
+ */
+ formnewtup = true;
+ }
+ else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
+ {
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
+ need_free[i] = true;
+ }
+
+ /*
+ * Short tuples may have 1B or 4B header. Convert 4B header of short
+ * tuples to 1B
+ */
+ else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
+ {
+ /* convert to short varlena */
+ Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
+ char *data = palloc(len);
+
+ SET_VARSIZE_SHORT(data, len);
+ memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
+
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(data);
+ need_free[i] = true;
+ }
+ }
+
+ /*
+ * Easier case: Tuple has varlena datums, none of which are compressed or
+ * short with 4B header
+ */
+ if (!formnewtup)
+ return itup;
+
+ /*
+ * Hard case: Tuple had compressed varlena datums that necessitate
+ * creating normalized version of the tuple from uncompressed input datums
+ * (normalized input datums). This is rather naive, but shouldn't be
+ * necessary too often.
+ *
+ * In the heap, tuples may contain short varlena datums with both 1B
+ * header and 4B headers. But the corresponding index tuple should always
+ * have such varlena's with 1B headers. So, if there is a short varlena
+ * with 4B header, we need to convert it for fingerprinting.
+ *
+ * Note that we rely on deterministic index_form_tuple() TOAST compression
+ * of normalized input.
+ */
+ reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
+ reformed->t_tid = itup->t_tid;
+
+ /* Cannot leak memory here */
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ if (need_free[i])
+ pfree(DatumGetPointer(normalized[i]));
+
+ return reformed;
+}
\ No newline at end of file
diff --git a/contrib/amcheck/verify_common.h b/contrib/amcheck/verify_common.h
index 3fa63d2121a..ffe0d30beb3 100644
--- a/contrib/amcheck/verify_common.h
+++ b/contrib/amcheck/verify_common.h
@@ -26,3 +26,5 @@ extern void amcheck_lock_relation_and_check(Oid indrelid,
Oid am_id,
IndexDoCheckCallback check,
LOCKMODE lockmode, void *state);
+
+extern IndexTuple amcheck_normalize_tuple(Relation irel, IndexTuple itup);
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index f91392a3a49..2ad27eb5c2b 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -2845,115 +2845,10 @@ bt_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
static IndexTuple
bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
{
- TupleDesc tupleDescriptor = RelationGetDescr(state->rel);
- Datum normalized[INDEX_MAX_KEYS];
- bool isnull[INDEX_MAX_KEYS];
- bool need_free[INDEX_MAX_KEYS];
- bool formnewtup = false;
- IndexTuple reformed;
- int i;
-
/* Caller should only pass "logical" non-pivot tuples here */
Assert(!BTreeTupleIsPosting(itup) && !BTreeTupleIsPivot(itup));
- /* Easy case: It's immediately clear that tuple has no varlena datums */
- if (!IndexTupleHasVarwidths(itup))
- return itup;
-
- for (i = 0; i < tupleDescriptor->natts; i++)
- {
- Form_pg_attribute att;
-
- att = TupleDescAttr(tupleDescriptor, i);
-
- /* Assume untoasted/already normalized datum initially */
- need_free[i] = false;
- normalized[i] = index_getattr(itup, att->attnum,
- tupleDescriptor,
- &isnull[i]);
- if (att->attbyval || att->attlen != -1 || isnull[i])
- continue;
-
- /*
- * Callers always pass a tuple that could safely be inserted into the
- * index without further processing, so an external varlena header
- * should never be encountered here
- */
- if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
- ItemPointerGetBlockNumber(&(itup->t_tid)),
- ItemPointerGetOffsetNumber(&(itup->t_tid)),
- RelationGetRelationName(state->rel))));
- else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
- VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
- (att->attstorage == TYPSTORAGE_EXTENDED ||
- att->attstorage == TYPSTORAGE_MAIN))
- {
- /*
- * This value will be compressed by index_form_tuple() with the
- * current storage settings. We may be here because this tuple
- * was formed with different storage settings. So, force forming.
- */
- formnewtup = true;
- }
- else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
- {
- formnewtup = true;
- normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
- need_free[i] = true;
- }
-
- /*
- * Short tuples may have 1B or 4B header. Convert 4B header of short
- * tuples to 1B
- */
- else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
- {
- /* convert to short varlena */
- Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
- char *data = palloc(len);
-
- SET_VARSIZE_SHORT(data, len);
- memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
-
- formnewtup = true;
- normalized[i] = PointerGetDatum(data);
- need_free[i] = true;
- }
- }
-
- /*
- * Easier case: Tuple has varlena datums, none of which are compressed or
- * short with 4B header
- */
- if (!formnewtup)
- return itup;
-
- /*
- * Hard case: Tuple had compressed varlena datums that necessitate
- * creating normalized version of the tuple from uncompressed input datums
- * (normalized input datums). This is rather naive, but shouldn't be
- * necessary too often.
- *
- * In the heap, tuples may contain short varlena datums with both 1B
- * header and 4B headers. But the corresponding index tuple should always
- * have such varlena's with 1B headers. So, if there is a short varlena
- * with 4B header, we need to convert it for fingerprinting.
- *
- * Note that we rely on deterministic index_form_tuple() TOAST compression
- * of normalized input.
- */
- reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
- reformed->t_tid = itup->t_tid;
-
- /* Cannot leak memory here */
- for (i = 0; i < tupleDescriptor->natts; i++)
- if (need_free[i])
- pfree(DatumGetPointer(normalized[i]));
-
- return reformed;
+ return amcheck_normalize_tuple(state->rel, itup);
}
/*
--
2.51.2
v20251216-0003-Address-review-comments-by-Mi-osz-and-Serg.patchapplication/octet-stream; name=v20251216-0003-Address-review-comments-by-Mi-osz-and-Serg.patch; x-unix-mode=0644Download
From da9e30360b14156b598f61c2dd7a59fa49ed1340 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Tue, 16 Dec 2025 16:55:07 +0500
Subject: [PATCH v20251216 3/3] =?UTF-8?q?Address=20review=20comments=20by?=
=?UTF-8?q?=20Mi=C5=82osz=20and=20Sergey?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
contrib/amcheck/amcheck--1.5--1.6.sql | 2 +-
contrib/amcheck/expected/check_gist.out | 2 ++
contrib/amcheck/meson.build | 2 +-
contrib/amcheck/sql/check_gist.sql | 5 ++++-
contrib/amcheck/verify_gist.c | 6 +++---
5 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/contrib/amcheck/amcheck--1.5--1.6.sql b/contrib/amcheck/amcheck--1.5--1.6.sql
index a6a1debff12..e2d4ca60d5e 100644
--- a/contrib/amcheck/amcheck--1.5--1.6.sql
+++ b/contrib/amcheck/amcheck--1.5--1.6.sql
@@ -11,4 +11,4 @@ RETURNS VOID
AS 'MODULE_PATHNAME', 'gist_index_check'
LANGUAGE C STRICT;
-REVOKE ALL ON FUNCTION gist_index_check(regclass,boolean) FROM PUBLIC;
+REVOKE ALL ON FUNCTION gist_index_check(regclass, boolean) FROM PUBLIC;
diff --git a/contrib/amcheck/expected/check_gist.out b/contrib/amcheck/expected/check_gist.out
index cbc3e27e679..e229f38aa48 100644
--- a/contrib/amcheck/expected/check_gist.out
+++ b/contrib/amcheck/expected/check_gist.out
@@ -143,3 +143,5 @@ SELECT gist_index_check('toasty', true);
(1 row)
+-- cleanup
+DROP TABLE toast_bug;
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index 13b36b495ed..536af0cccd4 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -2,10 +2,10 @@
amcheck_sources = files(
'verify_common.c',
+ 'verify_gist.c',
'verify_gin.c',
'verify_heapam.c',
'verify_nbtree.c',
- 'verify_gist.c',
)
if host_system == 'windows'
diff --git a/contrib/amcheck/sql/check_gist.sql b/contrib/amcheck/sql/check_gist.sql
index 37966423b8b..bc5737a7710 100644
--- a/contrib/amcheck/sql/check_gist.sql
+++ b/contrib/amcheck/sql/check_gist.sql
@@ -59,4 +59,7 @@ WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
-- Should not get false positive report of corruption:
-SELECT gist_index_check('toasty', true);
\ No newline at end of file
+SELECT gist_index_check('toasty', true);
+
+-- cleanup
+DROP TABLE toast_bug;
\ No newline at end of file
diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c
index c15cd6ab556..3c4abfda538 100644
--- a/contrib/amcheck/verify_gist.c
+++ b/contrib/amcheck/verify_gist.c
@@ -87,10 +87,10 @@ typedef struct GistCheckState
PG_FUNCTION_INFO_V1(gist_index_check);
-static void giststate_init_heapallindexed(Relation rel, GistCheckState * result);
+static void giststate_init_heapallindexed(Relation rel, GistCheckState *result);
static void gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
void *callback_state, bool readonly);
-static void gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+static void gist_check_page(GistCheckState *check_state, GistScanItem *stack,
Page page, bool heapallindexed,
BufferAccessStrategy strategy);
static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
@@ -366,7 +366,7 @@ gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
}
static void
-gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+gist_check_page(GistCheckState *check_state, GistScanItem *stack,
Page page, bool heapallindexed, BufferAccessStrategy strategy)
{
OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
--
2.51.2
v20251216-0002-Add-gist_index_check-function-to-verify-Gi.patchapplication/octet-stream; name=v20251216-0002-Add-gist_index_check-function-to-verify-Gi.patch; x-unix-mode=0644Download
From 928f2b7dc89c5046803aa74b16608c87eaf5d01e Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Wed, 18 Jun 2025 20:33:28 +0300
Subject: [PATCH v20251216 2/3] Add gist_index_check() function to verify GiST
index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This function traverses GiST with a depth-fisrt search and checks
that all downlink tuples are included into parent tuple keyspace.
This traverse takes lock of any page until some discapency found.
To re-check suspicious pair of parent and child tuples it aqcuires
locks on both parent and child pages in the same order as page
split does.
Author: Andrey Borodin <amborodin@acm.org>
Author: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-By: José Villanova <jose.arthur@gmail.com>
Reviewed-By: Aleksander Alekseev <aleksander@timescale.com>
Reviewed-By: Nikolay Samokhvalov <samokhvalov@gmail.com>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Tomas Vondra <tomas.vondra@enterprisedb.com>
Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru
---
contrib/amcheck/Makefile | 5 +-
contrib/amcheck/amcheck--1.5--1.6.sql | 14 +
contrib/amcheck/amcheck.control | 2 +-
contrib/amcheck/expected/check_gist.out | 145 ++++++
contrib/amcheck/meson.build | 3 +
contrib/amcheck/sql/check_gist.sql | 62 +++
contrib/amcheck/verify_gist.c | 664 ++++++++++++++++++++++++
doc/src/sgml/amcheck.sgml | 19 +
8 files changed, 911 insertions(+), 3 deletions(-)
create mode 100644 contrib/amcheck/amcheck--1.5--1.6.sql
create mode 100644 contrib/amcheck/expected/check_gist.out
create mode 100644 contrib/amcheck/sql/check_gist.sql
create mode 100644 contrib/amcheck/verify_gist.c
diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index 1b7a63cbaa4..1f2fec95de5 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -4,16 +4,17 @@ MODULE_big = amcheck
OBJS = \
$(WIN32RES) \
verify_common.o \
+ verify_gist.o \
verify_gin.o \
verify_heapam.o \
verify_nbtree.o
EXTENSION = amcheck
DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \
- amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql
+ amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql amcheck--1.5--1.6.sql
PGFILEDESC = "amcheck - function for verifying relation integrity"
-REGRESS = check check_btree check_gin check_heap
+REGRESS = check check_btree check_gin check_gist check_heap
EXTRA_INSTALL = contrib/pg_walinspect
TAP_TESTS = 1
diff --git a/contrib/amcheck/amcheck--1.5--1.6.sql b/contrib/amcheck/amcheck--1.5--1.6.sql
new file mode 100644
index 00000000000..a6a1debff12
--- /dev/null
+++ b/contrib/amcheck/amcheck--1.5--1.6.sql
@@ -0,0 +1,14 @@
+/* contrib/amcheck/amcheck--1.5--1.6.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.6'" to load this file. \quit
+
+
+-- gist_index_check()
+--
+CREATE FUNCTION gist_index_check(index regclass, heapallindexed boolean)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'gist_index_check'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION gist_index_check(regclass,boolean) FROM PUBLIC;
diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control
index c8ba6d7c9bc..2f329ef2cf4 100644
--- a/contrib/amcheck/amcheck.control
+++ b/contrib/amcheck/amcheck.control
@@ -1,5 +1,5 @@
# amcheck extension
comment = 'functions for verifying relation integrity'
-default_version = '1.5'
+default_version = '1.6'
module_pathname = '$libdir/amcheck'
relocatable = true
diff --git a/contrib/amcheck/expected/check_gist.out b/contrib/amcheck/expected/check_gist.out
new file mode 100644
index 00000000000..cbc3e27e679
--- /dev/null
+++ b/contrib/amcheck/expected/check_gist.out
@@ -0,0 +1,145 @@
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gist_check;
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+ attstorage
+------------
+ x
+(1 row)
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
+ gist_index_check
+------------------
+
+(1 row)
+
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index 1f0c347ed54..13b36b495ed 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -5,6 +5,7 @@ amcheck_sources = files(
'verify_gin.c',
'verify_heapam.c',
'verify_nbtree.c',
+ 'verify_gist.c',
)
if host_system == 'windows'
@@ -27,6 +28,7 @@ install_data(
'amcheck--1.2--1.3.sql',
'amcheck--1.3--1.4.sql',
'amcheck--1.4--1.5.sql',
+ 'amcheck--1.5--1.6.sql',
kwargs: contrib_data_args,
)
@@ -39,6 +41,7 @@ tests += {
'check',
'check_btree',
'check_gin',
+ 'check_gist',
'check_heap',
],
},
diff --git a/contrib/amcheck/sql/check_gist.sql b/contrib/amcheck/sql/check_gist.sql
new file mode 100644
index 00000000000..37966423b8b
--- /dev/null
+++ b/contrib/amcheck/sql/check_gist.sql
@@ -0,0 +1,62 @@
+
+SELECT setseed(1);
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+-- cleanup
+DROP TABLE gist_check;
+
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
\ No newline at end of file
diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c
new file mode 100644
index 00000000000..c15cd6ab556
--- /dev/null
+++ b/contrib/amcheck/verify_gist.c
@@ -0,0 +1,664 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_gist.c
+ * Verifies the integrity of GiST indexes based on invariants.
+ *
+ * Verification checks that all paths in GiST graph contain
+ * consistent keys: tuples on parent pages consistently include tuples
+ * from children pages. Also, verification checks graph invariants:
+ * internal page must have at least one downlink, internal page can
+ * reference either only leaf pages or only internal pages.
+ *
+ *
+ * Copyright (c) 2017-2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/verify_gist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gist_private.h"
+#include "access/heaptoast.h"
+#include "access/tableam.h"
+#include "catalog/index.h"
+#include "catalog/pg_am.h"
+#include "common/pg_prng.h"
+#include "lib/bloomfilter.h"
+#include "verify_common.h"
+#include "utils/memutils.h"
+
+
+/*
+ * GistScanItem represents one item of depth-first scan of GiST index.
+ */
+typedef struct GistScanItem
+{
+ int depth;
+
+ /* Referenced block number to check next */
+ BlockNumber blkno;
+
+ /*
+ * Correctness of this parent tuple will be checked against contents of
+ * referenced page. This tuple will be NULL for the root block.
+ */
+ IndexTuple parenttup;
+
+ /*
+ * LSN to handle concurrent scans of the page. It's necessary to avoid
+ * missing some subtrees from the page that was split just before we read it.
+ */
+ XLogRecPtr parentlsn;
+
+ /*
+ * Reference to parent page for re-locking in case of found parent-child
+ * tuple discrepancies.
+ */
+ BlockNumber parentblk;
+
+ /* Pointer to a next stack item. */
+ struct GistScanItem *next;
+} GistScanItem;
+
+typedef struct GistCheckState
+{
+ /* GiST state */
+ GISTSTATE *state;
+ /* Bloom filter fingerprints index tuples */
+ bloom_filter *filter;
+
+ Snapshot snapshot;
+ Relation rel;
+ Relation heaprel;
+
+ /* Debug counter for reporting percentage of work already done */
+ int64 heaptuplespresent;
+
+ /* progress reporting stuff */
+ BlockNumber totalblocks;
+ BlockNumber reportedblocks;
+ BlockNumber scannedblocks;
+ BlockNumber deltablocks;
+
+ int leafdepth;
+} GistCheckState;
+
+PG_FUNCTION_INFO_V1(gist_index_check);
+
+static void giststate_init_heapallindexed(Relation rel, GistCheckState * result);
+static void gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly);
+static void gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed,
+ BufferAccessStrategy strategy);
+static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
+static IndexTuple gist_refind_parent(Relation rel, BlockNumber parentblkno,
+ BlockNumber childblkno,
+ BufferAccessStrategy strategy);
+static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block,
+ Page page, OffsetNumber offset);
+static void gist_tuple_present_callback(Relation index, ItemPointer tid,
+ Datum *values, bool *isnull,
+ bool tupleIsAlive, void *checkstate);
+static IndexTuple gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup);
+
+/*
+ * gist_index_check(index regclass)
+ *
+ * Verify integrity of GiST index.
+ *
+ * Acquires AccessShareLock on heap & index relations.
+ */
+Datum
+gist_index_check(PG_FUNCTION_ARGS)
+{
+ Oid indrelid = PG_GETARG_OID(0);
+ bool heapallindexed = PG_GETARG_BOOL(1);
+
+ amcheck_lock_relation_and_check(indrelid,
+ GIST_AM_OID,
+ gist_check_parent_keys_consistency,
+ AccessShareLock,
+ &heapallindexed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Initialize GIST state files needed to perform.
+ * This initialized bloom filter and snapshot.
+ */
+static void
+giststate_init_heapallindexed(Relation rel, GistCheckState * result)
+{
+ int64 total_pages;
+ int64 total_elems;
+ uint64 seed;
+
+ /*
+ * Size Bloom filter based on estimated number of tuples in index. This
+ * logic is similar to B-tree, see verify_btree.c .
+ */
+ total_pages = result->totalblocks;
+ total_elems = Max(total_pages * (MaxOffsetNumber / 5),
+ (int64) rel->rd_rel->reltuples);
+ seed = pg_prng_uint64(&pg_global_prng_state);
+ result->filter = bloom_create(total_elems, maintenance_work_mem, seed);
+
+ result->snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+
+ /*
+ * GetTransactionSnapshot() always acquires a new MVCC snapshot in READ
+ * COMMITTED mode. A new snapshot is guaranteed to have all the entries
+ * it requires in the index.
+ *
+ * We must defend against the possibility that an old xact snapshot was
+ * returned at higher isolation levels when that snapshot is not safe for
+ * index scans of the target index. This is possible when the snapshot
+ * sees tuples that are before the index's indcheckxmin horizon. Throwing
+ * an error here should be very rare. It doesn't seem worth using a
+ * secondary snapshot to avoid this.
+ */
+ if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
+ !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+ result->snapshot->xmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("index \"%s\" cannot be verified using transaction snapshot",
+ RelationGetRelationName(rel))));
+}
+
+/*
+ * Main entry point for GiST check.
+ *
+ * This function verifies that tuples of internal pages cover all
+ * the key space of each tuple on the leaf page. To do this we invoke
+ * gist_check_internal_page() for every internal page.
+ *
+ * This check allocates memory context and scans through
+ * GiST graph. This scan is performed in a depth-first search using a stack of
+ * GistScanItem-s. Initially this stack contains only the root block number. On
+ * each iteration the top block number is replaced by referenced block numbers.
+ *
+ *
+ * gist_check_internal_page() in its turn takes every tuple and tries to
+ * adjust it by tuples on the referenced child page. Parent gist tuple should
+ * never require any adjustments.
+ */
+static void
+gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly)
+{
+ BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+ GistScanItem *stack;
+ MemoryContext mctx;
+ MemoryContext oldcontext;
+ GISTSTATE *state;
+ bool heapallindexed = *((bool *) callback_state);
+ GistCheckState *check_state = palloc0(sizeof(GistCheckState));
+
+ mctx = AllocSetContextCreate(CurrentMemoryContext,
+ "amcheck context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(mctx);
+
+ state = initGISTstate(rel);
+
+ check_state->state = state;
+ check_state->rel = rel;
+ check_state->heaprel = heaprel;
+
+ /*
+ * We don't know the height of the tree yet, but as soon as we encounter a
+ * leaf page, we will set 'leafdepth' to its depth.
+ */
+ check_state->leafdepth = -1;
+
+ check_state->totalblocks = RelationGetNumberOfBlocks(rel);
+ /* report every 100 blocks or 5%, whichever is bigger */
+ check_state->deltablocks = Max(check_state->totalblocks / 20, 100);
+
+ if (heapallindexed)
+ giststate_init_heapallindexed(rel, check_state);
+
+ /* Start the scan at the root page */
+ stack = (GistScanItem *) palloc0(sizeof(GistScanItem));
+ stack->depth = 0;
+ stack->parenttup = NULL;
+ stack->parentblk = InvalidBlockNumber;
+ stack->parentlsn = InvalidXLogRecPtr;
+ stack->blkno = GIST_ROOT_BLKNO;
+
+ /*
+ * This GiST scan is effectively an "old" VACUUM version before commit
+ * fe280694d which introduced physical order scanning.
+ */
+
+ while (stack)
+ {
+ GistScanItem *stack_next;
+ Buffer buffer;
+ Page page;
+ XLogRecPtr lsn;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Report progress */
+ if (check_state->scannedblocks > check_state->reportedblocks +
+ check_state->deltablocks)
+ {
+ elog(DEBUG1, "verified level %u blocks of approximately %u total",
+ check_state->scannedblocks, check_state->totalblocks);
+ check_state->reportedblocks = check_state->scannedblocks;
+ }
+ check_state->scannedblocks++;
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+ RBM_NORMAL, strategy);
+ LockBuffer(buffer, GIST_SHARE);
+ page = (Page) BufferGetPage(buffer);
+ lsn = BufferGetLSNAtomic(buffer);
+
+ /* Do basic sanity checks on the page headers */
+ check_index_page(rel, buffer, stack->blkno);
+
+ /*
+ * It's possible that the page was split since we looked at the
+ * parent, so that we didn't missed the downlink of the right sibling
+ * when we scanned the parent. If so, add the right sibling to the
+ * stack now.
+ */
+ if (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page))
+ {
+ /* split page detected, install right link to the stack */
+ GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+
+ ptr->depth = stack->depth;
+ ptr->parenttup = CopyIndexTuple(stack->parenttup);
+ ptr->parentblk = stack->parentblk;
+ ptr->parentlsn = stack->parentlsn;
+ ptr->blkno = GistPageGetOpaque(page)->rightlink;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+
+ gist_check_page(check_state, stack, page, heapallindexed, strategy);
+
+ if (!GistPageIsLeaf(page))
+ {
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ /* Internal page, so recurse to the child */
+ GistScanItem *ptr;
+ ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+ ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+ ptr->depth = stack->depth + 1;
+ ptr->parenttup = CopyIndexTuple(idxtuple);
+ ptr->parentblk = stack->blkno;
+ ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+ ptr->parentlsn = lsn;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ }
+
+ LockBuffer(buffer, GIST_UNLOCK);
+ ReleaseBuffer(buffer);
+
+ /* Step to next item in the queue */
+ stack_next = stack->next;
+ if (stack->parenttup)
+ pfree(stack->parenttup);
+ pfree(stack);
+ stack = stack_next;
+ }
+
+ if (heapallindexed)
+ {
+ IndexInfo *indexinfo = BuildIndexInfo(rel);
+ TableScanDesc scan;
+
+ scan = table_beginscan_strat(heaprel, /* relation */
+ check_state->snapshot, /* snapshot */
+ 0, /* number of keys */
+ NULL, /* scan key */
+ true, /* buffer access strategy OK */
+ true); /* syncscan OK? */
+
+ /*
+ * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY.
+ */
+ indexinfo->ii_Concurrent = true;
+
+ indexinfo->ii_Unique = false;
+ indexinfo->ii_ExclusionOps = NULL;
+ indexinfo->ii_ExclusionProcs = NULL;
+ indexinfo->ii_ExclusionStrats = NULL;
+
+ elog(DEBUG1, "verifying that tuples from index \"%s\" are present in \"%s\"",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(heaprel));
+
+ table_index_build_scan(heaprel, rel, indexinfo, true, false,
+ gist_tuple_present_callback, (void *) check_state, scan);
+
+ ereport(DEBUG1,
+ (errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set",
+ check_state->heaptuplespresent,
+ RelationGetRelationName(heaprel),
+ 100.0 * bloom_prop_bits_set(check_state->filter))));
+
+ UnregisterSnapshot(check_state->snapshot);
+ bloom_free(check_state->filter);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(mctx);
+ pfree(check_state);
+}
+
+static void
+gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed, BufferAccessStrategy strategy)
+{
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ /* Check that the tree has the same height in all branches */
+ if (GistPageIsLeaf(page))
+ {
+ if (check_state->leafdepth == -1)
+ check_state->leafdepth = stack->depth;
+ else if (stack->depth != check_state->leafdepth)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+ RelationGetRelationName(check_state->rel), stack->blkno)));
+ }
+
+ /*
+ * Check that each tuple looks valid, and is consistent with the downlink
+ * we followed when we stepped on this page.
+ */
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ ItemId iid = PageGetItemIdCareful(check_state->rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+ IndexTuple tmpTuple = NULL;
+
+ /*
+ * Check that it's not a leftover invalid tuple from pre-9.1 See also
+ * gistdoinsert() and gistbulkdelete() handling of such tuples. We do
+ * consider it an error here.
+ */
+ if (GistTupleIsInvalid(idxtuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("index \"%s\" contains an inner tuple marked as invalid, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i),
+ errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
+ errhint("Please REINDEX it.")));
+
+ if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+
+ /*
+ * Check if this tuple is consistent with the downlink in the parent.
+ */
+ if (stack->parenttup)
+ tmpTuple = gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state);
+
+ if (tmpTuple)
+ {
+ /*
+ * There was a discrepancy between parent and child tuples. We
+ * need to verify it is not a result of concurrent call of
+ * gistplacetopage(). So, lock parent and try to find downlink for
+ * current page. It may be missing due to concurrent page split,
+ * this is OK.
+ *
+ * Note that when we acquire parent tuple now we hold lock for both
+ * parent and child buffers. Thus the parent tuple must include the
+ * keyspace of the child.
+ */
+
+ pfree(tmpTuple);
+ pfree(stack->parenttup);
+ stack->parenttup = gist_refind_parent(check_state->rel, stack->parentblk,
+ stack->blkno, strategy);
+
+ /* We found it - make a final check before failing */
+ if (!stack->parenttup)
+ elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
+ stack->blkno, stack->parentblk);
+ else if (gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+ else
+ {
+ /*
+ * But now it is properly adjusted - nothing to do here.
+ */
+ }
+ }
+
+ if (GistPageIsLeaf(page))
+ {
+ if (heapallindexed)
+ {
+ IndexTuple norm;
+
+ norm = gistFormNormalizedTuple(check_state, idxtuple);
+
+ bloom_add_element(check_state->filter,
+ (unsigned char *) norm,
+ IndexTupleSize(norm));
+
+ /* Be tidy */
+ if (norm != idxtuple)
+ pfree(norm);
+ }
+ }
+ else
+ {
+ OffsetNumber off = ItemPointerGetOffsetNumber(&(idxtuple->t_tid));
+
+ if (off != TUPLE_IS_VALID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has on page %u offset %u has item id not pointing to 0xffff, but %hu",
+ RelationGetRelationName(check_state->rel), stack->blkno, i, off)));
+ }
+ }
+}
+
+/*
+ * gistFormNormalizedTuple - analogue to gistFormTuple, but performs deTOASTing
+ * of all included data (for covering indexes). While we do not expect
+ * toasted attributes in normal indexes, this can happen as a result of
+ * intervention into system catalog. Detoasting of key attributes is expected
+ * to be done by opclass decompression methods, if the indexed type might be
+ * toasted.
+ */
+static IndexTuple
+gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup)
+{
+ return amcheck_normalize_tuple(giststate->rel, itup);
+}
+
+static void
+gist_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
+ bool *isnull, bool tupleIsAlive, void *checkstate)
+{
+ GistCheckState *state = (GistCheckState *) checkstate;
+ IndexTuple itup, norm;
+ Datum compatt[INDEX_MAX_KEYS];
+
+ /* Generate a normalized index tuple for fingerprinting */
+ gistCompressValues(state->state, index, values, isnull, true, compatt);
+ itup = index_form_tuple(RelationGetDescr(index), compatt, isnull);
+ itup->t_tid = *tid;
+
+ norm = gistFormNormalizedTuple(state, itup);
+
+ /* Probe Bloom filter -- tuple should be present */
+ if (bloom_lacks_element(state->filter, (unsigned char *) norm,
+ IndexTupleSize(norm)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
+ ItemPointerGetBlockNumber(&(norm->t_tid)),
+ ItemPointerGetOffsetNumber(&(norm->t_tid)),
+ RelationGetRelationName(state->heaprel),
+ RelationGetRelationName(state->rel))));
+
+ state->heaptuplespresent++;
+
+ pfree(itup);
+ /* Be tidy */
+ if (norm != itup)
+ pfree(norm);
+}
+
+/*
+ * check_index_page - verification of basic invariants about GiST page data
+ * This function does not do any tuple analysis.
+ */
+static void
+check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
+{
+ Page page = BufferGetPage(buffer);
+
+ gistcheckpage(rel, buffer);
+
+ if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has corrupted page %d",
+ RelationGetRelationName(rel), blockNo)));
+
+ if (GistPageIsDeleted(page))
+ {
+ if (!GistPageIsLeaf(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted internal page %d",
+ RelationGetRelationName(rel), blockNo)));
+ if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted page %d with tuples",
+ RelationGetRelationName(rel), blockNo)));
+ }
+ else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has page %d with exceeding count of tuples",
+ RelationGetRelationName(rel), blockNo)));
+}
+
+/*
+ * Try to re-find the downlink pointing to 'blkno', in 'parentblkno'.
+ *
+ * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
+ * returns NULL.
+ */
+static IndexTuple
+gist_refind_parent(Relation rel,
+ BlockNumber parentblkno, BlockNumber childblkno,
+ BufferAccessStrategy strategy)
+{
+ Buffer parentbuf;
+ Page parentpage;
+ OffsetNumber parent_maxoff;
+ IndexTuple result = NULL;
+
+ parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
+ strategy);
+
+ LockBuffer(parentbuf, GIST_SHARE);
+ parentpage = BufferGetPage(parentbuf);
+
+ if (GistPageIsLeaf(parentpage))
+ {
+ /*
+ * Currently GiST never deletes internal pages, thus they can never
+ * become leaf.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" internal page %d became leaf",
+ RelationGetRelationName(rel), parentblkno)));
+ }
+
+ parent_maxoff = PageGetMaxOffsetNumber(parentpage);
+ for (OffsetNumber o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o))
+ {
+ ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o);
+ IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid);
+
+ if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
+ {
+ /*
+ * Found it! Make a copy and return it while both parent and child
+ * pages are locked. This guarantees that at this particular
+ * moment tuples must be coherent to each other.
+ */
+ result = CopyIndexTuple(itup);
+ break;
+ }
+ }
+
+ UnlockReleaseBuffer(parentbuf);
+
+ return result;
+}
+
+static ItemId
+PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
+ OffsetNumber offset)
+{
+ ItemId itemid = PageGetItemId(page, offset);
+
+ if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
+ BLCKSZ - MAXALIGN(sizeof(GISTPageOpaqueData)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("line pointer points past end of tuple space in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ /*
+ * Verify that the line pointer isn't LP_REDIRECT or LP_UNUSED, since gist
+ * never uses either. Verify that the line pointer has storage, too, since
+ * even LP_DEAD items should.
+ */
+ if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
+ ItemIdGetLength(itemid) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("invalid line pointer storage in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ return itemid;
+}
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index 08006856579..b19c2975195 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -208,6 +208,25 @@ ORDER BY c.relpages DESC LIMIT 10;
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ <function>gist_index_check(index regclass, heapallindexed boolean) returns void</function>
+ <indexterm>
+ <primary>gist_index_check</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ <function>gist_index_check</function> tests that its target GiST
+ has consistent parent-child tuples relations (no parent tuples
+ require tuple adjustement) and page graph respects balanced-tree
+ invariants (internal pages reference only leaf page or only internal
+ pages).
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<tip>
<para>
--
2.51.2
Hello,
On Wed, Oct 22, 2025 at 11:58 AM Kirill Reshke <reshkekirill@gmail.com> wrote:
1) There are several typos in verify_gist.c:
downlinks -> downlink (header comment)
discrepencies -> discrepancies
Correctess -> Correctness
hande -> handle
Initaliaze -> Initialize
numbmer -> number
replcaed -> replaced
aquire -> aqcuire2) Copyright year is 2023 in the patch. Time flies:)
These two are (trivially) fixed.
I found a few more typos. Maybe one is left over from Arseniy's
review. Referencing the latest patch files from Andrey, here is what I
see:
in v20251216-0002-Add-gist_index_check-function-to-verify-Gi.patch:
This function traverses GiST with a depth-fisrt search and checks
"fisrt" should be "first".
This traverse takes lock of any page until some discapency found.
"discapency" should be "discrepancy"
To re-check suspicious pair of parent and child tuples it aqcuires
"aqcuires" should be "acquires"
amcheck.sgml:
+ require tuple adjustement) and page graph respects balanced-tree
"adjustement" should be "adjustment"
Also the Makefile ordering is not quite right:
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -4,16 +4,17 @@ MODULE_big = amcheck
OBJS = \
$(WIN32RES) \
verify_common.o \
+ verify_gist.o \
verify_gin.o \
verify_heapam.o \
verify_nbtree.o
We should put verify_gist.o after verify_gin.o.
Yours,
--
Paul ~{:-)
pj@illuminatedcomputing.com
On Tue, 16 Dec 2025 at 20:24, Paul A Jungwirth
<pj@illuminatedcomputing.com> wrote:
Hello,
On Wed, Oct 22, 2025 at 11:58 AM Kirill Reshke <reshkekirill@gmail.com> wrote:
1) There are several typos in verify_gist.c:
downlinks -> downlink (header comment)
discrepencies -> discrepancies
Correctess -> Correctness
hande -> handle
Initaliaze -> Initialize
numbmer -> number
replcaed -> replaced
aquire -> aqcuire2) Copyright year is 2023 in the patch. Time flies:)
These two are (trivially) fixed.
I found a few more typos. Maybe one is left over from Arseniy's
review. Referencing the latest patch files from Andrey, here is what I
see:in v20251216-0002-Add-gist_index_check-function-to-verify-Gi.patch:
This function traverses GiST with a depth-fisrt search and checks
"fisrt" should be "first".
This traverse takes lock of any page until some discapency found.
"discapency" should be "discrepancy"
To re-check suspicious pair of parent and child tuples it aqcuires
"aqcuires" should be "acquires"
amcheck.sgml:
+ require tuple adjustement) and page graph respects balanced-tree
"adjustement" should be "adjustment"
Also the Makefile ordering is not quite right:
--- a/contrib/amcheck/Makefile +++ b/contrib/amcheck/Makefile @@ -4,16 +4,17 @@ MODULE_big = amcheck OBJS = \ $(WIN32RES) \ verify_common.o \ + verify_gist.o \ verify_gin.o \ verify_heapam.o \ verify_nbtree.oWe should put verify_gist.o after verify_gin.o.
Yours,
--
Paul ~{:-)
pj@illuminatedcomputing.com
Hi!
Thank you for taking a look. Sending new version which is Andreys's
[0]: + 0003 applied + your review comments addressed + my changes, including:
including:
Commit message:
This function traverses GiST with a depth-first search and checks
that all downlink tuples are included into parent tuple keyspace.
This traverse takes lock of any page until some discrepancy found.
To re-check suspicious pair of parent and child tuples it acquires
locks on both parent and child pages in the same order as page
split does.
" discrepancy found" -> " discrepancy is found"
" re-check suspicious " -> " re-check a suspicious "
I also added you, Arseniy and Miłosz in commit message, in reviewed-by section
+ /* Pointer to a next stack item. */
+ struct GistScanItem *next;
+} GistScanItem;
+
a next -> the next
+ /*
+ * It's possible that the page was split since we looked at the
+ * parent, so that we didn't missed the downlink of the right sibling
+ * when we scanned the parent. If so, add the right sibling to the
+ * stack now.
+ */
"didn't miss" not "didn't missed "
+ /*
+ * There was a discrepancy between parent and child tuples. We
+ * need to verify it is not a result of concurrent call of
+ * gistplacetopage(). So, lock parent and try to find downlink for
+ * current page. It may be missing due to concurrent page split,
+ * this is OK.
"find a downlink"
also this:
--- a/contrib/amcheck/verify_gist.c
+++ b/contrib/amcheck/verify_gist.c
@@ -583,7 +583,8 @@ gist_refind_parent(Relation rel,
{
Buffer parentbuf;
Page parentpage;
- OffsetNumber parent_maxoff;
+ OffsetNumber parent_maxoff,
+ off;
IndexTuple result = NULL;
parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno,
RBM_NORMAL,
@@ -605,9 +606,9 @@ gist_refind_parent(Relation rel,
}
parent_maxoff = PageGetMaxOffsetNumber(parentpage);
- for (OffsetNumber o = FirstOffsetNumber; o <= parent_maxoff; o
= OffsetNumberNext(o))
+ for (off = FirstOffsetNumber; off <= parent_maxoff; off =
OffsetNumberNext(off))
{
- ItemId p_iid = PageGetItemIdCareful(rel,
parentblkno, parentpage, o);
+ ItemId p_iid = PageGetItemIdCareful(rel,
parentblkno, parentpage, off);
IndexTuple itup = (IndexTuple)
PageGetItem(parentpage, p_iid);
if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
--
Best regards,
Kirill Reshke
Attachments:
v20251218-0001-Move-normalize-tuple-logic-from-nbtcheck-t.patchapplication/octet-stream; name=v20251218-0001-Move-normalize-tuple-logic-from-nbtcheck-t.patchDownload
From 96448839f1cac8ad7dc390eb2f8be74d71bab9f4 Mon Sep 17 00:00:00 2001
From: reshke <reshke@double.cloud>
Date: Wed, 22 Oct 2025 15:29:26 +0000
Subject: [PATCH v20251218 1/2] Move `normalize tuple` logic from nbtcheck to
verify_common
Preparational patch to reuse index tuple normalize logic
in GiST amcheck.
---
contrib/amcheck/verify_common.c | 112 ++++++++++++++++++++++++++++++++
contrib/amcheck/verify_common.h | 2 +
contrib/amcheck/verify_nbtree.c | 107 +-----------------------------
3 files changed, 115 insertions(+), 106 deletions(-)
diff --git a/contrib/amcheck/verify_common.c b/contrib/amcheck/verify_common.c
index a31ce06ed99..e9b4887f65e 100644
--- a/contrib/amcheck/verify_common.c
+++ b/contrib/amcheck/verify_common.c
@@ -13,6 +13,7 @@
#include "postgres.h"
#include "access/genam.h"
+#include "access/heaptoast.h"
#include "access/table.h"
#include "access/tableam.h"
#include "verify_common.h"
@@ -189,3 +190,114 @@ index_checkable(Relation rel, Oid am_id)
return amcheck_index_mainfork_expected(rel);
}
+
+IndexTuple
+amcheck_normalize_tuple(Relation irel, IndexTuple itup)
+{
+ TupleDesc tupleDescriptor = RelationGetDescr(irel);
+ Datum normalized[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ bool need_free[INDEX_MAX_KEYS];
+ bool formnewtup = false;
+ IndexTuple reformed;
+ int i;
+
+ /* Easy case: It's immediately clear that tuple has no varlena datums */
+ if (!IndexTupleHasVarwidths(itup))
+ return itup;
+
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ {
+ Form_pg_attribute att;
+
+ att = TupleDescAttr(tupleDescriptor, i);
+
+ /* Assume untoasted/already normalized datum initially */
+ need_free[i] = false;
+ normalized[i] = index_getattr(itup, att->attnum,
+ tupleDescriptor,
+ &isnull[i]);
+ if (att->attbyval || att->attlen != -1 || isnull[i])
+ continue;
+
+ /*
+ * Callers always pass a tuple that could safely be inserted into the
+ * index without further processing, so an external varlena header
+ * should never be encountered here
+ */
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
+ ItemPointerGetBlockNumber(&(itup->t_tid)),
+ ItemPointerGetOffsetNumber(&(itup->t_tid)),
+ RelationGetRelationName(irel))));
+ else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
+ VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
+ (att->attstorage == TYPSTORAGE_EXTENDED ||
+ att->attstorage == TYPSTORAGE_MAIN))
+ {
+ /*
+ * This value will be compressed by index_form_tuple() with the
+ * current storage settings. We may be here because this tuple
+ * was formed with different storage settings. So, force forming.
+ */
+ formnewtup = true;
+ }
+ else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
+ {
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
+ need_free[i] = true;
+ }
+
+ /*
+ * Short tuples may have 1B or 4B header. Convert 4B header of short
+ * tuples to 1B
+ */
+ else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
+ {
+ /* convert to short varlena */
+ Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
+ char *data = palloc(len);
+
+ SET_VARSIZE_SHORT(data, len);
+ memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
+
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(data);
+ need_free[i] = true;
+ }
+ }
+
+ /*
+ * Easier case: Tuple has varlena datums, none of which are compressed or
+ * short with 4B header
+ */
+ if (!formnewtup)
+ return itup;
+
+ /*
+ * Hard case: Tuple had compressed varlena datums that necessitate
+ * creating normalized version of the tuple from uncompressed input datums
+ * (normalized input datums). This is rather naive, but shouldn't be
+ * necessary too often.
+ *
+ * In the heap, tuples may contain short varlena datums with both 1B
+ * header and 4B headers. But the corresponding index tuple should always
+ * have such varlena's with 1B headers. So, if there is a short varlena
+ * with 4B header, we need to convert it for fingerprinting.
+ *
+ * Note that we rely on deterministic index_form_tuple() TOAST compression
+ * of normalized input.
+ */
+ reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
+ reformed->t_tid = itup->t_tid;
+
+ /* Cannot leak memory here */
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ if (need_free[i])
+ pfree(DatumGetPointer(normalized[i]));
+
+ return reformed;
+}
\ No newline at end of file
diff --git a/contrib/amcheck/verify_common.h b/contrib/amcheck/verify_common.h
index 3fa63d2121a..ffe0d30beb3 100644
--- a/contrib/amcheck/verify_common.h
+++ b/contrib/amcheck/verify_common.h
@@ -26,3 +26,5 @@ extern void amcheck_lock_relation_and_check(Oid indrelid,
Oid am_id,
IndexDoCheckCallback check,
LOCKMODE lockmode, void *state);
+
+extern IndexTuple amcheck_normalize_tuple(Relation irel, IndexTuple itup);
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index f91392a3a49..2ad27eb5c2b 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -2845,115 +2845,10 @@ bt_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
static IndexTuple
bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
{
- TupleDesc tupleDescriptor = RelationGetDescr(state->rel);
- Datum normalized[INDEX_MAX_KEYS];
- bool isnull[INDEX_MAX_KEYS];
- bool need_free[INDEX_MAX_KEYS];
- bool formnewtup = false;
- IndexTuple reformed;
- int i;
-
/* Caller should only pass "logical" non-pivot tuples here */
Assert(!BTreeTupleIsPosting(itup) && !BTreeTupleIsPivot(itup));
- /* Easy case: It's immediately clear that tuple has no varlena datums */
- if (!IndexTupleHasVarwidths(itup))
- return itup;
-
- for (i = 0; i < tupleDescriptor->natts; i++)
- {
- Form_pg_attribute att;
-
- att = TupleDescAttr(tupleDescriptor, i);
-
- /* Assume untoasted/already normalized datum initially */
- need_free[i] = false;
- normalized[i] = index_getattr(itup, att->attnum,
- tupleDescriptor,
- &isnull[i]);
- if (att->attbyval || att->attlen != -1 || isnull[i])
- continue;
-
- /*
- * Callers always pass a tuple that could safely be inserted into the
- * index without further processing, so an external varlena header
- * should never be encountered here
- */
- if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
- ItemPointerGetBlockNumber(&(itup->t_tid)),
- ItemPointerGetOffsetNumber(&(itup->t_tid)),
- RelationGetRelationName(state->rel))));
- else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
- VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
- (att->attstorage == TYPSTORAGE_EXTENDED ||
- att->attstorage == TYPSTORAGE_MAIN))
- {
- /*
- * This value will be compressed by index_form_tuple() with the
- * current storage settings. We may be here because this tuple
- * was formed with different storage settings. So, force forming.
- */
- formnewtup = true;
- }
- else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
- {
- formnewtup = true;
- normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
- need_free[i] = true;
- }
-
- /*
- * Short tuples may have 1B or 4B header. Convert 4B header of short
- * tuples to 1B
- */
- else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
- {
- /* convert to short varlena */
- Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
- char *data = palloc(len);
-
- SET_VARSIZE_SHORT(data, len);
- memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
-
- formnewtup = true;
- normalized[i] = PointerGetDatum(data);
- need_free[i] = true;
- }
- }
-
- /*
- * Easier case: Tuple has varlena datums, none of which are compressed or
- * short with 4B header
- */
- if (!formnewtup)
- return itup;
-
- /*
- * Hard case: Tuple had compressed varlena datums that necessitate
- * creating normalized version of the tuple from uncompressed input datums
- * (normalized input datums). This is rather naive, but shouldn't be
- * necessary too often.
- *
- * In the heap, tuples may contain short varlena datums with both 1B
- * header and 4B headers. But the corresponding index tuple should always
- * have such varlena's with 1B headers. So, if there is a short varlena
- * with 4B header, we need to convert it for fingerprinting.
- *
- * Note that we rely on deterministic index_form_tuple() TOAST compression
- * of normalized input.
- */
- reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
- reformed->t_tid = itup->t_tid;
-
- /* Cannot leak memory here */
- for (i = 0; i < tupleDescriptor->natts; i++)
- if (need_free[i])
- pfree(DatumGetPointer(normalized[i]));
-
- return reformed;
+ return amcheck_normalize_tuple(state->rel, itup);
}
/*
--
2.43.0
v20251218-4-0002-Add-gist_index_check-function-to-verify-.patchapplication/octet-stream; name=v20251218-4-0002-Add-gist_index_check-function-to-verify-.patchDownload
From 08815707d0be1455af8bd836abf0e179f381141d Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Wed, 18 Jun 2025 20:33:28 +0300
Subject: [PATCH v20251218-4 2/2] Add gist_index_check() function to verify
GiST index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This function traverses GiST with a depth-first search and checks
that all downlink tuples are included into parent tuple keyspace.
This traverse takes lock of any page until some discrepancy is found.
To re-check a suspicious pair of parent and child tuples it acquires
locks on both parent and child pages in the same order as page
split does.
Author: Andrey Borodin <amborodin@acm.org>
Author: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-By: José Villanova <jose.arthur@gmail.com>
Reviewed-By: Aleksander Alekseev <aleksander@timescale.com>
Reviewed-By: Nikolay Samokhvalov <samokhvalov@gmail.com>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Tomas Vondra <tomas.vondra@enterprisedb.com>
Reviewed-By: Arseniy Mukhin <arseniy(dot)mukhin(dot)dev(at)gmail(dot)com>
Reviewed-By: Miłosz Bieniek <bieniek.milosz@proton.me>
Reviewed-By: Paul A Jungwirth <pj(at)illuminatedcomputing(dot)com>
Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru
---
contrib/amcheck/Makefile | 5 +-
contrib/amcheck/amcheck--1.5--1.6.sql | 14 +
contrib/amcheck/amcheck.control | 2 +-
contrib/amcheck/expected/check_gist.out | 145 ++++++
contrib/amcheck/meson.build | 3 +
contrib/amcheck/sql/check_gist.sql | 65 +++
contrib/amcheck/verify_gist.c | 665 ++++++++++++++++++++++++
doc/src/sgml/amcheck.sgml | 19 +
8 files changed, 915 insertions(+), 3 deletions(-)
create mode 100644 contrib/amcheck/amcheck--1.5--1.6.sql
create mode 100644 contrib/amcheck/expected/check_gist.out
create mode 100644 contrib/amcheck/sql/check_gist.sql
create mode 100644 contrib/amcheck/verify_gist.c
diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index 1b7a63cbaa4..be28f0b8275 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -5,15 +5,16 @@ OBJS = \
$(WIN32RES) \
verify_common.o \
verify_gin.o \
+ verify_gist.o \
verify_heapam.o \
verify_nbtree.o
EXTENSION = amcheck
DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \
- amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql
+ amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql amcheck--1.5--1.6.sql
PGFILEDESC = "amcheck - function for verifying relation integrity"
-REGRESS = check check_btree check_gin check_heap
+REGRESS = check check_btree check_gin check_gist check_heap
EXTRA_INSTALL = contrib/pg_walinspect
TAP_TESTS = 1
diff --git a/contrib/amcheck/amcheck--1.5--1.6.sql b/contrib/amcheck/amcheck--1.5--1.6.sql
new file mode 100644
index 00000000000..e2d4ca60d5e
--- /dev/null
+++ b/contrib/amcheck/amcheck--1.5--1.6.sql
@@ -0,0 +1,14 @@
+/* contrib/amcheck/amcheck--1.5--1.6.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.6'" to load this file. \quit
+
+
+-- gist_index_check()
+--
+CREATE FUNCTION gist_index_check(index regclass, heapallindexed boolean)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'gist_index_check'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION gist_index_check(regclass, boolean) FROM PUBLIC;
diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control
index c8ba6d7c9bc..2f329ef2cf4 100644
--- a/contrib/amcheck/amcheck.control
+++ b/contrib/amcheck/amcheck.control
@@ -1,5 +1,5 @@
# amcheck extension
comment = 'functions for verifying relation integrity'
-default_version = '1.5'
+default_version = '1.6'
module_pathname = '$libdir/amcheck'
relocatable = true
diff --git a/contrib/amcheck/expected/check_gist.out b/contrib/amcheck/expected/check_gist.out
new file mode 100644
index 00000000000..cbc3e27e679
--- /dev/null
+++ b/contrib/amcheck/expected/check_gist.out
@@ -0,0 +1,145 @@
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gist_check;
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+ attstorage
+------------
+ x
+(1 row)
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
+ gist_index_check
+------------------
+
+(1 row)
+
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index 1f0c347ed54..2c79017054c 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -3,6 +3,7 @@
amcheck_sources = files(
'verify_common.c',
'verify_gin.c',
+ 'verify_gist.c',
'verify_heapam.c',
'verify_nbtree.c',
)
@@ -27,6 +28,7 @@ install_data(
'amcheck--1.2--1.3.sql',
'amcheck--1.3--1.4.sql',
'amcheck--1.4--1.5.sql',
+ 'amcheck--1.5--1.6.sql',
kwargs: contrib_data_args,
)
@@ -39,6 +41,7 @@ tests += {
'check',
'check_btree',
'check_gin',
+ 'check_gist',
'check_heap',
],
},
diff --git a/contrib/amcheck/sql/check_gist.sql b/contrib/amcheck/sql/check_gist.sql
new file mode 100644
index 00000000000..bc5737a7710
--- /dev/null
+++ b/contrib/amcheck/sql/check_gist.sql
@@ -0,0 +1,65 @@
+
+SELECT setseed(1);
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+-- cleanup
+DROP TABLE gist_check;
+
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
+
+-- cleanup
+DROP TABLE toast_bug;
\ No newline at end of file
diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c
new file mode 100644
index 00000000000..b0ff7d98bf9
--- /dev/null
+++ b/contrib/amcheck/verify_gist.c
@@ -0,0 +1,665 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_gist.c
+ * Verifies the integrity of GiST indexes based on invariants.
+ *
+ * Verification checks that all paths in GiST graph contain
+ * consistent keys: tuples on parent pages consistently include tuples
+ * from children pages. Also, verification checks graph invariants:
+ * internal page must have at least one downlink, internal page can
+ * reference either only leaf pages or only internal pages.
+ *
+ *
+ * Copyright (c) 2017-2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/verify_gist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gist_private.h"
+#include "access/heaptoast.h"
+#include "access/tableam.h"
+#include "catalog/index.h"
+#include "catalog/pg_am.h"
+#include "common/pg_prng.h"
+#include "lib/bloomfilter.h"
+#include "verify_common.h"
+#include "utils/memutils.h"
+
+
+/*
+ * GistScanItem represents one item of depth-first scan of GiST index.
+ */
+typedef struct GistScanItem
+{
+ int depth;
+
+ /* Referenced block number to check next */
+ BlockNumber blkno;
+
+ /*
+ * Correctness of this parent tuple will be checked against contents of
+ * referenced page. This tuple will be NULL for the root block.
+ */
+ IndexTuple parenttup;
+
+ /*
+ * LSN to handle concurrent scans of the page. It's necessary to avoid
+ * missing some subtrees from the page that was split just before we read it.
+ */
+ XLogRecPtr parentlsn;
+
+ /*
+ * Reference to parent page for re-locking in case of found parent-child
+ * tuple discrepancies.
+ */
+ BlockNumber parentblk;
+
+ /* Pointer to the next stack item. */
+ struct GistScanItem *next;
+} GistScanItem;
+
+typedef struct GistCheckState
+{
+ /* GiST state */
+ GISTSTATE *state;
+ /* Bloom filter fingerprints index tuples */
+ bloom_filter *filter;
+
+ Snapshot snapshot;
+ Relation rel;
+ Relation heaprel;
+
+ /* Debug counter for reporting percentage of work already done */
+ int64 heaptuplespresent;
+
+ /* progress reporting stuff */
+ BlockNumber totalblocks;
+ BlockNumber reportedblocks;
+ BlockNumber scannedblocks;
+ BlockNumber deltablocks;
+
+ int leafdepth;
+} GistCheckState;
+
+PG_FUNCTION_INFO_V1(gist_index_check);
+
+static void giststate_init_heapallindexed(Relation rel, GistCheckState * result);
+static void gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly);
+static void gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed,
+ BufferAccessStrategy strategy);
+static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
+static IndexTuple gist_refind_parent(Relation rel, BlockNumber parentblkno,
+ BlockNumber childblkno,
+ BufferAccessStrategy strategy);
+static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block,
+ Page page, OffsetNumber offset);
+static void gist_tuple_present_callback(Relation index, ItemPointer tid,
+ Datum *values, bool *isnull,
+ bool tupleIsAlive, void *checkstate);
+static IndexTuple gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup);
+
+/*
+ * gist_index_check(index regclass)
+ *
+ * Verify integrity of GiST index.
+ *
+ * Acquires AccessShareLock on heap & index relations.
+ */
+Datum
+gist_index_check(PG_FUNCTION_ARGS)
+{
+ Oid indrelid = PG_GETARG_OID(0);
+ bool heapallindexed = PG_GETARG_BOOL(1);
+
+ amcheck_lock_relation_and_check(indrelid,
+ GIST_AM_OID,
+ gist_check_parent_keys_consistency,
+ AccessShareLock,
+ &heapallindexed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Initialize GIST state files needed to perform.
+ * This initialized bloom filter and snapshot.
+ */
+static void
+giststate_init_heapallindexed(Relation rel, GistCheckState * result)
+{
+ int64 total_pages;
+ int64 total_elems;
+ uint64 seed;
+
+ /*
+ * Size Bloom filter based on estimated number of tuples in index. This
+ * logic is similar to B-tree, see verify_btree.c .
+ */
+ total_pages = result->totalblocks;
+ total_elems = Max(total_pages * (MaxOffsetNumber / 5),
+ (int64) rel->rd_rel->reltuples);
+ seed = pg_prng_uint64(&pg_global_prng_state);
+ result->filter = bloom_create(total_elems, maintenance_work_mem, seed);
+
+ result->snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+
+ /*
+ * GetTransactionSnapshot() always acquires a new MVCC snapshot in READ
+ * COMMITTED mode. A new snapshot is guaranteed to have all the entries
+ * it requires in the index.
+ *
+ * We must defend against the possibility that an old xact snapshot was
+ * returned at higher isolation levels when that snapshot is not safe for
+ * index scans of the target index. This is possible when the snapshot
+ * sees tuples that are before the index's indcheckxmin horizon. Throwing
+ * an error here should be very rare. It doesn't seem worth using a
+ * secondary snapshot to avoid this.
+ */
+ if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
+ !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+ result->snapshot->xmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("index \"%s\" cannot be verified using transaction snapshot",
+ RelationGetRelationName(rel))));
+}
+
+/*
+ * Main entry point for GiST check.
+ *
+ * This function verifies that tuples of internal pages cover all
+ * the key space of each tuple on the leaf page. To do this we invoke
+ * gist_check_internal_page() for every internal page.
+ *
+ * This check allocates memory context and scans through
+ * GiST graph. This scan is performed in a depth-first search using a stack of
+ * GistScanItem-s. Initially this stack contains only the root block number. On
+ * each iteration the top block number is replaced by referenced block numbers.
+ *
+ *
+ * gist_check_internal_page() in its turn takes every tuple and tries to
+ * adjust it by tuples on the referenced child page. Parent gist tuple should
+ * never require any adjustments.
+ */
+static void
+gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly)
+{
+ BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+ GistScanItem *stack;
+ MemoryContext mctx;
+ MemoryContext oldcontext;
+ GISTSTATE *state;
+ bool heapallindexed = *((bool *) callback_state);
+ GistCheckState *check_state = palloc0(sizeof(GistCheckState));
+
+ mctx = AllocSetContextCreate(CurrentMemoryContext,
+ "amcheck context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(mctx);
+
+ state = initGISTstate(rel);
+
+ check_state->state = state;
+ check_state->rel = rel;
+ check_state->heaprel = heaprel;
+
+ /*
+ * We don't know the height of the tree yet, but as soon as we encounter a
+ * leaf page, we will set 'leafdepth' to its depth.
+ */
+ check_state->leafdepth = -1;
+
+ check_state->totalblocks = RelationGetNumberOfBlocks(rel);
+ /* report every 100 blocks or 5%, whichever is bigger */
+ check_state->deltablocks = Max(check_state->totalblocks / 20, 100);
+
+ if (heapallindexed)
+ giststate_init_heapallindexed(rel, check_state);
+
+ /* Start the scan at the root page */
+ stack = (GistScanItem *) palloc0(sizeof(GistScanItem));
+ stack->depth = 0;
+ stack->parenttup = NULL;
+ stack->parentblk = InvalidBlockNumber;
+ stack->parentlsn = InvalidXLogRecPtr;
+ stack->blkno = GIST_ROOT_BLKNO;
+
+ /*
+ * This GiST scan is effectively an "old" VACUUM version before commit
+ * fe280694d which introduced physical order scanning.
+ */
+
+ while (stack)
+ {
+ GistScanItem *stack_next;
+ Buffer buffer;
+ Page page;
+ XLogRecPtr lsn;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Report progress */
+ if (check_state->scannedblocks > check_state->reportedblocks +
+ check_state->deltablocks)
+ {
+ elog(DEBUG1, "verified level %u blocks of approximately %u total",
+ check_state->scannedblocks, check_state->totalblocks);
+ check_state->reportedblocks = check_state->scannedblocks;
+ }
+ check_state->scannedblocks++;
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+ RBM_NORMAL, strategy);
+ LockBuffer(buffer, GIST_SHARE);
+ page = (Page) BufferGetPage(buffer);
+ lsn = BufferGetLSNAtomic(buffer);
+
+ /* Do basic sanity checks on the page headers */
+ check_index_page(rel, buffer, stack->blkno);
+
+ /*
+ * It's possible that the page was split since we looked at the
+ * parent, so that we didn't miss the downlink of the right sibling
+ * when we scanned the parent. If so, add the right sibling to the
+ * stack now.
+ */
+ if (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page))
+ {
+ /* split page detected, install right link to the stack */
+ GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+
+ ptr->depth = stack->depth;
+ ptr->parenttup = CopyIndexTuple(stack->parenttup);
+ ptr->parentblk = stack->parentblk;
+ ptr->parentlsn = stack->parentlsn;
+ ptr->blkno = GistPageGetOpaque(page)->rightlink;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+
+ gist_check_page(check_state, stack, page, heapallindexed, strategy);
+
+ if (!GistPageIsLeaf(page))
+ {
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ /* Internal page, so recurse to the child */
+ GistScanItem *ptr;
+ ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+ ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+ ptr->depth = stack->depth + 1;
+ ptr->parenttup = CopyIndexTuple(idxtuple);
+ ptr->parentblk = stack->blkno;
+ ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+ ptr->parentlsn = lsn;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ }
+
+ LockBuffer(buffer, GIST_UNLOCK);
+ ReleaseBuffer(buffer);
+
+ /* Step to next item in the queue */
+ stack_next = stack->next;
+ if (stack->parenttup)
+ pfree(stack->parenttup);
+ pfree(stack);
+ stack = stack_next;
+ }
+
+ if (heapallindexed)
+ {
+ IndexInfo *indexinfo = BuildIndexInfo(rel);
+ TableScanDesc scan;
+
+ scan = table_beginscan_strat(heaprel, /* relation */
+ check_state->snapshot, /* snapshot */
+ 0, /* number of keys */
+ NULL, /* scan key */
+ true, /* buffer access strategy OK */
+ true); /* syncscan OK? */
+
+ /*
+ * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY.
+ */
+ indexinfo->ii_Concurrent = true;
+
+ indexinfo->ii_Unique = false;
+ indexinfo->ii_ExclusionOps = NULL;
+ indexinfo->ii_ExclusionProcs = NULL;
+ indexinfo->ii_ExclusionStrats = NULL;
+
+ elog(DEBUG1, "verifying that tuples from index \"%s\" are present in \"%s\"",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(heaprel));
+
+ table_index_build_scan(heaprel, rel, indexinfo, true, false,
+ gist_tuple_present_callback, (void *) check_state, scan);
+
+ ereport(DEBUG1,
+ (errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set",
+ check_state->heaptuplespresent,
+ RelationGetRelationName(heaprel),
+ 100.0 * bloom_prop_bits_set(check_state->filter))));
+
+ UnregisterSnapshot(check_state->snapshot);
+ bloom_free(check_state->filter);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(mctx);
+ pfree(check_state);
+}
+
+static void
+gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed, BufferAccessStrategy strategy)
+{
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ /* Check that the tree has the same height in all branches */
+ if (GistPageIsLeaf(page))
+ {
+ if (check_state->leafdepth == -1)
+ check_state->leafdepth = stack->depth;
+ else if (stack->depth != check_state->leafdepth)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+ RelationGetRelationName(check_state->rel), stack->blkno)));
+ }
+
+ /*
+ * Check that each tuple looks valid, and is consistent with the downlink
+ * we followed when we stepped on this page.
+ */
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ ItemId iid = PageGetItemIdCareful(check_state->rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+ IndexTuple tmpTuple = NULL;
+
+ /*
+ * Check that it's not a leftover invalid tuple from pre-9.1 See also
+ * gistdoinsert() and gistbulkdelete() handling of such tuples. We do
+ * consider it an error here.
+ */
+ if (GistTupleIsInvalid(idxtuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("index \"%s\" contains an inner tuple marked as invalid, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i),
+ errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
+ errhint("Please REINDEX it.")));
+
+ if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+
+ /*
+ * Check if this tuple is consistent with the downlink in the parent.
+ */
+ if (stack->parenttup)
+ tmpTuple = gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state);
+
+ if (tmpTuple)
+ {
+ /*
+ * There was a discrepancy between parent and child tuples. We
+ * need to verify it is not a result of concurrent call of
+ * gistplacetopage(). So, lock parent and try to find a downlink for
+ * current page. It may be missing due to concurrent page split,
+ * this is OK.
+ *
+ * Note that when we acquire parent tuple now we hold lock for both
+ * parent and child buffers. Thus the parent tuple must include the
+ * keyspace of the child.
+ */
+
+ pfree(tmpTuple);
+ pfree(stack->parenttup);
+ stack->parenttup = gist_refind_parent(check_state->rel, stack->parentblk,
+ stack->blkno, strategy);
+
+ /* We found it - make a final check before failing */
+ if (!stack->parenttup)
+ elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
+ stack->blkno, stack->parentblk);
+ else if (gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+ else
+ {
+ /*
+ * But now it is properly adjusted - nothing to do here.
+ */
+ }
+ }
+
+ if (GistPageIsLeaf(page))
+ {
+ if (heapallindexed)
+ {
+ IndexTuple norm;
+
+ norm = gistFormNormalizedTuple(check_state, idxtuple);
+
+ bloom_add_element(check_state->filter,
+ (unsigned char *) norm,
+ IndexTupleSize(norm));
+
+ /* Be tidy */
+ if (norm != idxtuple)
+ pfree(norm);
+ }
+ }
+ else
+ {
+ OffsetNumber off = ItemPointerGetOffsetNumber(&(idxtuple->t_tid));
+
+ if (off != TUPLE_IS_VALID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has on page %u offset %u has item id not pointing to 0xffff, but %hu",
+ RelationGetRelationName(check_state->rel), stack->blkno, i, off)));
+ }
+ }
+}
+
+/*
+ * gistFormNormalizedTuple - analogue to gistFormTuple, but performs deTOASTing
+ * of all included data (for covering indexes). While we do not expect
+ * toasted attributes in normal indexes, this can happen as a result of
+ * intervention into system catalog. Detoasting of key attributes is expected
+ * to be done by opclass decompression methods, if the indexed type might be
+ * toasted.
+ */
+static IndexTuple
+gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup)
+{
+ return amcheck_normalize_tuple(giststate->rel, itup);
+}
+
+static void
+gist_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
+ bool *isnull, bool tupleIsAlive, void *checkstate)
+{
+ GistCheckState *state = (GistCheckState *) checkstate;
+ IndexTuple itup, norm;
+ Datum compatt[INDEX_MAX_KEYS];
+
+ /* Generate a normalized index tuple for fingerprinting */
+ gistCompressValues(state->state, index, values, isnull, true, compatt);
+ itup = index_form_tuple(RelationGetDescr(index), compatt, isnull);
+ itup->t_tid = *tid;
+
+ norm = gistFormNormalizedTuple(state, itup);
+
+ /* Probe Bloom filter -- tuple should be present */
+ if (bloom_lacks_element(state->filter, (unsigned char *) norm,
+ IndexTupleSize(norm)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
+ ItemPointerGetBlockNumber(&(norm->t_tid)),
+ ItemPointerGetOffsetNumber(&(norm->t_tid)),
+ RelationGetRelationName(state->heaprel),
+ RelationGetRelationName(state->rel))));
+
+ state->heaptuplespresent++;
+
+ pfree(itup);
+ /* Be tidy */
+ if (norm != itup)
+ pfree(norm);
+}
+
+/*
+ * check_index_page - verification of basic invariants about GiST page data
+ * This function does not do any tuple analysis.
+ */
+static void
+check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
+{
+ Page page = BufferGetPage(buffer);
+
+ gistcheckpage(rel, buffer);
+
+ if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has corrupted page %d",
+ RelationGetRelationName(rel), blockNo)));
+
+ if (GistPageIsDeleted(page))
+ {
+ if (!GistPageIsLeaf(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted internal page %d",
+ RelationGetRelationName(rel), blockNo)));
+ if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted page %d with tuples",
+ RelationGetRelationName(rel), blockNo)));
+ }
+ else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has page %d with exceeding count of tuples",
+ RelationGetRelationName(rel), blockNo)));
+}
+
+/*
+ * Try to re-find the downlink pointing to 'blkno', in 'parentblkno'.
+ *
+ * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
+ * returns NULL.
+ */
+static IndexTuple
+gist_refind_parent(Relation rel,
+ BlockNumber parentblkno, BlockNumber childblkno,
+ BufferAccessStrategy strategy)
+{
+ Buffer parentbuf;
+ Page parentpage;
+ OffsetNumber parent_maxoff,
+ off;
+ IndexTuple result = NULL;
+
+ parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
+ strategy);
+
+ LockBuffer(parentbuf, GIST_SHARE);
+ parentpage = BufferGetPage(parentbuf);
+
+ if (GistPageIsLeaf(parentpage))
+ {
+ /*
+ * Currently GiST never deletes internal pages, thus they can never
+ * become leaf.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" internal page %d became leaf",
+ RelationGetRelationName(rel), parentblkno)));
+ }
+
+ parent_maxoff = PageGetMaxOffsetNumber(parentpage);
+ for (off = FirstOffsetNumber; off <= parent_maxoff; off = OffsetNumberNext(off))
+ {
+ ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, off);
+ IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid);
+
+ if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
+ {
+ /*
+ * Found it! Make a copy and return it while both parent and child
+ * pages are locked. This guarantees that at this particular
+ * moment tuples must be coherent to each other.
+ */
+ result = CopyIndexTuple(itup);
+ break;
+ }
+ }
+
+ UnlockReleaseBuffer(parentbuf);
+
+ return result;
+}
+
+static ItemId
+PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
+ OffsetNumber offset)
+{
+ ItemId itemid = PageGetItemId(page, offset);
+
+ if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
+ BLCKSZ - MAXALIGN(sizeof(GISTPageOpaqueData)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("line pointer points past end of tuple space in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ /*
+ * Verify that the line pointer isn't LP_REDIRECT or LP_UNUSED, since gist
+ * never uses either. Verify that the line pointer has storage, too, since
+ * even LP_DEAD items should.
+ */
+ if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
+ ItemIdGetLength(itemid) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("invalid line pointer storage in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ return itemid;
+}
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index 08006856579..917a4936b6d 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -208,6 +208,25 @@ ORDER BY c.relpages DESC LIMIT 10;
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ <function>gist_index_check(index regclass, heapallindexed boolean) returns void</function>
+ <indexterm>
+ <primary>gist_index_check</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ <function>gist_index_check</function> tests that its target GiST
+ has consistent parent-child tuples relations (no parent tuples
+ require tuple adjustment) and page graph respects balanced-tree
+ invariants (internal pages reference only leaf page or only internal
+ pages).
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<tip>
<para>
--
2.43.0
CF bot was unhappy about the last version due to obvious bug, PFA new
version with fixes.
The problem was "DROP TABLE toast_bug;" missing in expected regression output.
[0]: https://cirrus-ci.com/task/6378051304423424
--
Best regards,
Kirill Reshke
Attachments:
v2026-01-01-0002-Add-gist_index_check-function-to-verify-.patchapplication/octet-stream; name=v2026-01-01-0002-Add-gist_index_check-function-to-verify-.patchDownload
From 613a72f16fbb2bf0c16a624827a85bcc31aec815 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Wed, 18 Jun 2025 20:33:28 +0300
Subject: [PATCH v2026-01-01 2/2] Add gist_index_check() function to verify
GiST index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This function traverses GiST with a depth-first search and checks
that all downlink tuples are included into parent tuple keyspace.
This traverse takes lock of any page until some discrepancy is found.
To re-check a suspicious pair of parent and child tuples it acquires
locks on both parent and child pages in the same order as page
split does.
Author: Andrey Borodin <amborodin@acm.org>
Author: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-By: José Villanova <jose.arthur@gmail.com>
Reviewed-By: Aleksander Alekseev <aleksander@timescale.com>
Reviewed-By: Nikolay Samokhvalov <samokhvalov@gmail.com>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Tomas Vondra <tomas.vondra@enterprisedb.com>
Reviewed-By: Arseniy Mukhin <arseniy(dot)mukhin(dot)dev(at)gmail(dot)com>
Reviewed-By: Miłosz Bieniek <bieniek.milosz@proton.me>
Reviewed-By: Paul A Jungwirth <pj(at)illuminatedcomputing(dot)com>
Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru
---
contrib/amcheck/Makefile | 5 +-
contrib/amcheck/amcheck--1.5--1.6.sql | 14 +
contrib/amcheck/amcheck.control | 2 +-
contrib/amcheck/expected/check_gist.out | 147 ++++++
contrib/amcheck/meson.build | 3 +
contrib/amcheck/sql/check_gist.sql | 65 +++
contrib/amcheck/verify_gist.c | 665 ++++++++++++++++++++++++
doc/src/sgml/amcheck.sgml | 19 +
8 files changed, 917 insertions(+), 3 deletions(-)
create mode 100644 contrib/amcheck/amcheck--1.5--1.6.sql
create mode 100644 contrib/amcheck/expected/check_gist.out
create mode 100644 contrib/amcheck/sql/check_gist.sql
create mode 100644 contrib/amcheck/verify_gist.c
diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index 1b7a63cbaa4..be28f0b8275 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -5,15 +5,16 @@ OBJS = \
$(WIN32RES) \
verify_common.o \
verify_gin.o \
+ verify_gist.o \
verify_heapam.o \
verify_nbtree.o
EXTENSION = amcheck
DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \
- amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql
+ amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql amcheck--1.5--1.6.sql
PGFILEDESC = "amcheck - function for verifying relation integrity"
-REGRESS = check check_btree check_gin check_heap
+REGRESS = check check_btree check_gin check_gist check_heap
EXTRA_INSTALL = contrib/pg_walinspect
TAP_TESTS = 1
diff --git a/contrib/amcheck/amcheck--1.5--1.6.sql b/contrib/amcheck/amcheck--1.5--1.6.sql
new file mode 100644
index 00000000000..e2d4ca60d5e
--- /dev/null
+++ b/contrib/amcheck/amcheck--1.5--1.6.sql
@@ -0,0 +1,14 @@
+/* contrib/amcheck/amcheck--1.5--1.6.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.6'" to load this file. \quit
+
+
+-- gist_index_check()
+--
+CREATE FUNCTION gist_index_check(index regclass, heapallindexed boolean)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'gist_index_check'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION gist_index_check(regclass, boolean) FROM PUBLIC;
diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control
index c8ba6d7c9bc..2f329ef2cf4 100644
--- a/contrib/amcheck/amcheck.control
+++ b/contrib/amcheck/amcheck.control
@@ -1,5 +1,5 @@
# amcheck extension
comment = 'functions for verifying relation integrity'
-default_version = '1.5'
+default_version = '1.6'
module_pathname = '$libdir/amcheck'
relocatable = true
diff --git a/contrib/amcheck/expected/check_gist.out b/contrib/amcheck/expected/check_gist.out
new file mode 100644
index 00000000000..e229f38aa48
--- /dev/null
+++ b/contrib/amcheck/expected/check_gist.out
@@ -0,0 +1,147 @@
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gist_check;
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+ attstorage
+------------
+ x
+(1 row)
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE toast_bug;
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index 1f0c347ed54..2c79017054c 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -3,6 +3,7 @@
amcheck_sources = files(
'verify_common.c',
'verify_gin.c',
+ 'verify_gist.c',
'verify_heapam.c',
'verify_nbtree.c',
)
@@ -27,6 +28,7 @@ install_data(
'amcheck--1.2--1.3.sql',
'amcheck--1.3--1.4.sql',
'amcheck--1.4--1.5.sql',
+ 'amcheck--1.5--1.6.sql',
kwargs: contrib_data_args,
)
@@ -39,6 +41,7 @@ tests += {
'check',
'check_btree',
'check_gin',
+ 'check_gist',
'check_heap',
],
},
diff --git a/contrib/amcheck/sql/check_gist.sql b/contrib/amcheck/sql/check_gist.sql
new file mode 100644
index 00000000000..bc5737a7710
--- /dev/null
+++ b/contrib/amcheck/sql/check_gist.sql
@@ -0,0 +1,65 @@
+
+SELECT setseed(1);
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+-- cleanup
+DROP TABLE gist_check;
+
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
+
+-- cleanup
+DROP TABLE toast_bug;
\ No newline at end of file
diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c
new file mode 100644
index 00000000000..b0ff7d98bf9
--- /dev/null
+++ b/contrib/amcheck/verify_gist.c
@@ -0,0 +1,665 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_gist.c
+ * Verifies the integrity of GiST indexes based on invariants.
+ *
+ * Verification checks that all paths in GiST graph contain
+ * consistent keys: tuples on parent pages consistently include tuples
+ * from children pages. Also, verification checks graph invariants:
+ * internal page must have at least one downlink, internal page can
+ * reference either only leaf pages or only internal pages.
+ *
+ *
+ * Copyright (c) 2017-2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/verify_gist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gist_private.h"
+#include "access/heaptoast.h"
+#include "access/tableam.h"
+#include "catalog/index.h"
+#include "catalog/pg_am.h"
+#include "common/pg_prng.h"
+#include "lib/bloomfilter.h"
+#include "verify_common.h"
+#include "utils/memutils.h"
+
+
+/*
+ * GistScanItem represents one item of depth-first scan of GiST index.
+ */
+typedef struct GistScanItem
+{
+ int depth;
+
+ /* Referenced block number to check next */
+ BlockNumber blkno;
+
+ /*
+ * Correctness of this parent tuple will be checked against contents of
+ * referenced page. This tuple will be NULL for the root block.
+ */
+ IndexTuple parenttup;
+
+ /*
+ * LSN to handle concurrent scans of the page. It's necessary to avoid
+ * missing some subtrees from the page that was split just before we read it.
+ */
+ XLogRecPtr parentlsn;
+
+ /*
+ * Reference to parent page for re-locking in case of found parent-child
+ * tuple discrepancies.
+ */
+ BlockNumber parentblk;
+
+ /* Pointer to the next stack item. */
+ struct GistScanItem *next;
+} GistScanItem;
+
+typedef struct GistCheckState
+{
+ /* GiST state */
+ GISTSTATE *state;
+ /* Bloom filter fingerprints index tuples */
+ bloom_filter *filter;
+
+ Snapshot snapshot;
+ Relation rel;
+ Relation heaprel;
+
+ /* Debug counter for reporting percentage of work already done */
+ int64 heaptuplespresent;
+
+ /* progress reporting stuff */
+ BlockNumber totalblocks;
+ BlockNumber reportedblocks;
+ BlockNumber scannedblocks;
+ BlockNumber deltablocks;
+
+ int leafdepth;
+} GistCheckState;
+
+PG_FUNCTION_INFO_V1(gist_index_check);
+
+static void giststate_init_heapallindexed(Relation rel, GistCheckState * result);
+static void gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly);
+static void gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed,
+ BufferAccessStrategy strategy);
+static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
+static IndexTuple gist_refind_parent(Relation rel, BlockNumber parentblkno,
+ BlockNumber childblkno,
+ BufferAccessStrategy strategy);
+static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block,
+ Page page, OffsetNumber offset);
+static void gist_tuple_present_callback(Relation index, ItemPointer tid,
+ Datum *values, bool *isnull,
+ bool tupleIsAlive, void *checkstate);
+static IndexTuple gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup);
+
+/*
+ * gist_index_check(index regclass)
+ *
+ * Verify integrity of GiST index.
+ *
+ * Acquires AccessShareLock on heap & index relations.
+ */
+Datum
+gist_index_check(PG_FUNCTION_ARGS)
+{
+ Oid indrelid = PG_GETARG_OID(0);
+ bool heapallindexed = PG_GETARG_BOOL(1);
+
+ amcheck_lock_relation_and_check(indrelid,
+ GIST_AM_OID,
+ gist_check_parent_keys_consistency,
+ AccessShareLock,
+ &heapallindexed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Initialize GIST state files needed to perform.
+ * This initialized bloom filter and snapshot.
+ */
+static void
+giststate_init_heapallindexed(Relation rel, GistCheckState * result)
+{
+ int64 total_pages;
+ int64 total_elems;
+ uint64 seed;
+
+ /*
+ * Size Bloom filter based on estimated number of tuples in index. This
+ * logic is similar to B-tree, see verify_btree.c .
+ */
+ total_pages = result->totalblocks;
+ total_elems = Max(total_pages * (MaxOffsetNumber / 5),
+ (int64) rel->rd_rel->reltuples);
+ seed = pg_prng_uint64(&pg_global_prng_state);
+ result->filter = bloom_create(total_elems, maintenance_work_mem, seed);
+
+ result->snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+
+ /*
+ * GetTransactionSnapshot() always acquires a new MVCC snapshot in READ
+ * COMMITTED mode. A new snapshot is guaranteed to have all the entries
+ * it requires in the index.
+ *
+ * We must defend against the possibility that an old xact snapshot was
+ * returned at higher isolation levels when that snapshot is not safe for
+ * index scans of the target index. This is possible when the snapshot
+ * sees tuples that are before the index's indcheckxmin horizon. Throwing
+ * an error here should be very rare. It doesn't seem worth using a
+ * secondary snapshot to avoid this.
+ */
+ if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
+ !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+ result->snapshot->xmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("index \"%s\" cannot be verified using transaction snapshot",
+ RelationGetRelationName(rel))));
+}
+
+/*
+ * Main entry point for GiST check.
+ *
+ * This function verifies that tuples of internal pages cover all
+ * the key space of each tuple on the leaf page. To do this we invoke
+ * gist_check_internal_page() for every internal page.
+ *
+ * This check allocates memory context and scans through
+ * GiST graph. This scan is performed in a depth-first search using a stack of
+ * GistScanItem-s. Initially this stack contains only the root block number. On
+ * each iteration the top block number is replaced by referenced block numbers.
+ *
+ *
+ * gist_check_internal_page() in its turn takes every tuple and tries to
+ * adjust it by tuples on the referenced child page. Parent gist tuple should
+ * never require any adjustments.
+ */
+static void
+gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly)
+{
+ BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+ GistScanItem *stack;
+ MemoryContext mctx;
+ MemoryContext oldcontext;
+ GISTSTATE *state;
+ bool heapallindexed = *((bool *) callback_state);
+ GistCheckState *check_state = palloc0(sizeof(GistCheckState));
+
+ mctx = AllocSetContextCreate(CurrentMemoryContext,
+ "amcheck context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(mctx);
+
+ state = initGISTstate(rel);
+
+ check_state->state = state;
+ check_state->rel = rel;
+ check_state->heaprel = heaprel;
+
+ /*
+ * We don't know the height of the tree yet, but as soon as we encounter a
+ * leaf page, we will set 'leafdepth' to its depth.
+ */
+ check_state->leafdepth = -1;
+
+ check_state->totalblocks = RelationGetNumberOfBlocks(rel);
+ /* report every 100 blocks or 5%, whichever is bigger */
+ check_state->deltablocks = Max(check_state->totalblocks / 20, 100);
+
+ if (heapallindexed)
+ giststate_init_heapallindexed(rel, check_state);
+
+ /* Start the scan at the root page */
+ stack = (GistScanItem *) palloc0(sizeof(GistScanItem));
+ stack->depth = 0;
+ stack->parenttup = NULL;
+ stack->parentblk = InvalidBlockNumber;
+ stack->parentlsn = InvalidXLogRecPtr;
+ stack->blkno = GIST_ROOT_BLKNO;
+
+ /*
+ * This GiST scan is effectively an "old" VACUUM version before commit
+ * fe280694d which introduced physical order scanning.
+ */
+
+ while (stack)
+ {
+ GistScanItem *stack_next;
+ Buffer buffer;
+ Page page;
+ XLogRecPtr lsn;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Report progress */
+ if (check_state->scannedblocks > check_state->reportedblocks +
+ check_state->deltablocks)
+ {
+ elog(DEBUG1, "verified level %u blocks of approximately %u total",
+ check_state->scannedblocks, check_state->totalblocks);
+ check_state->reportedblocks = check_state->scannedblocks;
+ }
+ check_state->scannedblocks++;
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+ RBM_NORMAL, strategy);
+ LockBuffer(buffer, GIST_SHARE);
+ page = (Page) BufferGetPage(buffer);
+ lsn = BufferGetLSNAtomic(buffer);
+
+ /* Do basic sanity checks on the page headers */
+ check_index_page(rel, buffer, stack->blkno);
+
+ /*
+ * It's possible that the page was split since we looked at the
+ * parent, so that we didn't miss the downlink of the right sibling
+ * when we scanned the parent. If so, add the right sibling to the
+ * stack now.
+ */
+ if (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page))
+ {
+ /* split page detected, install right link to the stack */
+ GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+
+ ptr->depth = stack->depth;
+ ptr->parenttup = CopyIndexTuple(stack->parenttup);
+ ptr->parentblk = stack->parentblk;
+ ptr->parentlsn = stack->parentlsn;
+ ptr->blkno = GistPageGetOpaque(page)->rightlink;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+
+ gist_check_page(check_state, stack, page, heapallindexed, strategy);
+
+ if (!GistPageIsLeaf(page))
+ {
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ /* Internal page, so recurse to the child */
+ GistScanItem *ptr;
+ ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+ ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+ ptr->depth = stack->depth + 1;
+ ptr->parenttup = CopyIndexTuple(idxtuple);
+ ptr->parentblk = stack->blkno;
+ ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+ ptr->parentlsn = lsn;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ }
+
+ LockBuffer(buffer, GIST_UNLOCK);
+ ReleaseBuffer(buffer);
+
+ /* Step to next item in the queue */
+ stack_next = stack->next;
+ if (stack->parenttup)
+ pfree(stack->parenttup);
+ pfree(stack);
+ stack = stack_next;
+ }
+
+ if (heapallindexed)
+ {
+ IndexInfo *indexinfo = BuildIndexInfo(rel);
+ TableScanDesc scan;
+
+ scan = table_beginscan_strat(heaprel, /* relation */
+ check_state->snapshot, /* snapshot */
+ 0, /* number of keys */
+ NULL, /* scan key */
+ true, /* buffer access strategy OK */
+ true); /* syncscan OK? */
+
+ /*
+ * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY.
+ */
+ indexinfo->ii_Concurrent = true;
+
+ indexinfo->ii_Unique = false;
+ indexinfo->ii_ExclusionOps = NULL;
+ indexinfo->ii_ExclusionProcs = NULL;
+ indexinfo->ii_ExclusionStrats = NULL;
+
+ elog(DEBUG1, "verifying that tuples from index \"%s\" are present in \"%s\"",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(heaprel));
+
+ table_index_build_scan(heaprel, rel, indexinfo, true, false,
+ gist_tuple_present_callback, (void *) check_state, scan);
+
+ ereport(DEBUG1,
+ (errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set",
+ check_state->heaptuplespresent,
+ RelationGetRelationName(heaprel),
+ 100.0 * bloom_prop_bits_set(check_state->filter))));
+
+ UnregisterSnapshot(check_state->snapshot);
+ bloom_free(check_state->filter);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(mctx);
+ pfree(check_state);
+}
+
+static void
+gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed, BufferAccessStrategy strategy)
+{
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ /* Check that the tree has the same height in all branches */
+ if (GistPageIsLeaf(page))
+ {
+ if (check_state->leafdepth == -1)
+ check_state->leafdepth = stack->depth;
+ else if (stack->depth != check_state->leafdepth)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+ RelationGetRelationName(check_state->rel), stack->blkno)));
+ }
+
+ /*
+ * Check that each tuple looks valid, and is consistent with the downlink
+ * we followed when we stepped on this page.
+ */
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ ItemId iid = PageGetItemIdCareful(check_state->rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+ IndexTuple tmpTuple = NULL;
+
+ /*
+ * Check that it's not a leftover invalid tuple from pre-9.1 See also
+ * gistdoinsert() and gistbulkdelete() handling of such tuples. We do
+ * consider it an error here.
+ */
+ if (GistTupleIsInvalid(idxtuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("index \"%s\" contains an inner tuple marked as invalid, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i),
+ errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
+ errhint("Please REINDEX it.")));
+
+ if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+
+ /*
+ * Check if this tuple is consistent with the downlink in the parent.
+ */
+ if (stack->parenttup)
+ tmpTuple = gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state);
+
+ if (tmpTuple)
+ {
+ /*
+ * There was a discrepancy between parent and child tuples. We
+ * need to verify it is not a result of concurrent call of
+ * gistplacetopage(). So, lock parent and try to find a downlink for
+ * current page. It may be missing due to concurrent page split,
+ * this is OK.
+ *
+ * Note that when we acquire parent tuple now we hold lock for both
+ * parent and child buffers. Thus the parent tuple must include the
+ * keyspace of the child.
+ */
+
+ pfree(tmpTuple);
+ pfree(stack->parenttup);
+ stack->parenttup = gist_refind_parent(check_state->rel, stack->parentblk,
+ stack->blkno, strategy);
+
+ /* We found it - make a final check before failing */
+ if (!stack->parenttup)
+ elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
+ stack->blkno, stack->parentblk);
+ else if (gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+ else
+ {
+ /*
+ * But now it is properly adjusted - nothing to do here.
+ */
+ }
+ }
+
+ if (GistPageIsLeaf(page))
+ {
+ if (heapallindexed)
+ {
+ IndexTuple norm;
+
+ norm = gistFormNormalizedTuple(check_state, idxtuple);
+
+ bloom_add_element(check_state->filter,
+ (unsigned char *) norm,
+ IndexTupleSize(norm));
+
+ /* Be tidy */
+ if (norm != idxtuple)
+ pfree(norm);
+ }
+ }
+ else
+ {
+ OffsetNumber off = ItemPointerGetOffsetNumber(&(idxtuple->t_tid));
+
+ if (off != TUPLE_IS_VALID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has on page %u offset %u has item id not pointing to 0xffff, but %hu",
+ RelationGetRelationName(check_state->rel), stack->blkno, i, off)));
+ }
+ }
+}
+
+/*
+ * gistFormNormalizedTuple - analogue to gistFormTuple, but performs deTOASTing
+ * of all included data (for covering indexes). While we do not expect
+ * toasted attributes in normal indexes, this can happen as a result of
+ * intervention into system catalog. Detoasting of key attributes is expected
+ * to be done by opclass decompression methods, if the indexed type might be
+ * toasted.
+ */
+static IndexTuple
+gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup)
+{
+ return amcheck_normalize_tuple(giststate->rel, itup);
+}
+
+static void
+gist_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
+ bool *isnull, bool tupleIsAlive, void *checkstate)
+{
+ GistCheckState *state = (GistCheckState *) checkstate;
+ IndexTuple itup, norm;
+ Datum compatt[INDEX_MAX_KEYS];
+
+ /* Generate a normalized index tuple for fingerprinting */
+ gistCompressValues(state->state, index, values, isnull, true, compatt);
+ itup = index_form_tuple(RelationGetDescr(index), compatt, isnull);
+ itup->t_tid = *tid;
+
+ norm = gistFormNormalizedTuple(state, itup);
+
+ /* Probe Bloom filter -- tuple should be present */
+ if (bloom_lacks_element(state->filter, (unsigned char *) norm,
+ IndexTupleSize(norm)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
+ ItemPointerGetBlockNumber(&(norm->t_tid)),
+ ItemPointerGetOffsetNumber(&(norm->t_tid)),
+ RelationGetRelationName(state->heaprel),
+ RelationGetRelationName(state->rel))));
+
+ state->heaptuplespresent++;
+
+ pfree(itup);
+ /* Be tidy */
+ if (norm != itup)
+ pfree(norm);
+}
+
+/*
+ * check_index_page - verification of basic invariants about GiST page data
+ * This function does not do any tuple analysis.
+ */
+static void
+check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
+{
+ Page page = BufferGetPage(buffer);
+
+ gistcheckpage(rel, buffer);
+
+ if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has corrupted page %d",
+ RelationGetRelationName(rel), blockNo)));
+
+ if (GistPageIsDeleted(page))
+ {
+ if (!GistPageIsLeaf(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted internal page %d",
+ RelationGetRelationName(rel), blockNo)));
+ if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted page %d with tuples",
+ RelationGetRelationName(rel), blockNo)));
+ }
+ else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has page %d with exceeding count of tuples",
+ RelationGetRelationName(rel), blockNo)));
+}
+
+/*
+ * Try to re-find the downlink pointing to 'blkno', in 'parentblkno'.
+ *
+ * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
+ * returns NULL.
+ */
+static IndexTuple
+gist_refind_parent(Relation rel,
+ BlockNumber parentblkno, BlockNumber childblkno,
+ BufferAccessStrategy strategy)
+{
+ Buffer parentbuf;
+ Page parentpage;
+ OffsetNumber parent_maxoff,
+ off;
+ IndexTuple result = NULL;
+
+ parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
+ strategy);
+
+ LockBuffer(parentbuf, GIST_SHARE);
+ parentpage = BufferGetPage(parentbuf);
+
+ if (GistPageIsLeaf(parentpage))
+ {
+ /*
+ * Currently GiST never deletes internal pages, thus they can never
+ * become leaf.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" internal page %d became leaf",
+ RelationGetRelationName(rel), parentblkno)));
+ }
+
+ parent_maxoff = PageGetMaxOffsetNumber(parentpage);
+ for (off = FirstOffsetNumber; off <= parent_maxoff; off = OffsetNumberNext(off))
+ {
+ ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, off);
+ IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid);
+
+ if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
+ {
+ /*
+ * Found it! Make a copy and return it while both parent and child
+ * pages are locked. This guarantees that at this particular
+ * moment tuples must be coherent to each other.
+ */
+ result = CopyIndexTuple(itup);
+ break;
+ }
+ }
+
+ UnlockReleaseBuffer(parentbuf);
+
+ return result;
+}
+
+static ItemId
+PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
+ OffsetNumber offset)
+{
+ ItemId itemid = PageGetItemId(page, offset);
+
+ if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
+ BLCKSZ - MAXALIGN(sizeof(GISTPageOpaqueData)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("line pointer points past end of tuple space in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ /*
+ * Verify that the line pointer isn't LP_REDIRECT or LP_UNUSED, since gist
+ * never uses either. Verify that the line pointer has storage, too, since
+ * even LP_DEAD items should.
+ */
+ if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
+ ItemIdGetLength(itemid) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("invalid line pointer storage in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ return itemid;
+}
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index 08006856579..917a4936b6d 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -208,6 +208,25 @@ ORDER BY c.relpages DESC LIMIT 10;
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ <function>gist_index_check(index regclass, heapallindexed boolean) returns void</function>
+ <indexterm>
+ <primary>gist_index_check</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ <function>gist_index_check</function> tests that its target GiST
+ has consistent parent-child tuples relations (no parent tuples
+ require tuple adjustment) and page graph respects balanced-tree
+ invariants (internal pages reference only leaf page or only internal
+ pages).
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<tip>
<para>
--
2.43.0
v2026-01-01-0001-Move-normalize-tuple-logic-from-nbtcheck.patchapplication/octet-stream; name=v2026-01-01-0001-Move-normalize-tuple-logic-from-nbtcheck.patchDownload
From 742f41cf0555cb763aa204cda9e72791dcc33003 Mon Sep 17 00:00:00 2001
From: reshke <reshke@double.cloud>
Date: Wed, 22 Oct 2025 15:29:26 +0000
Subject: [PATCH v2026-01-01 1/2] Move `normalize tuple` logic from nbtcheck to
verify_common
Preparational patch to reuse index tuple normalize logic
in GiST amcheck.
---
contrib/amcheck/verify_common.c | 112 ++++++++++++++++++++++++++++++++
contrib/amcheck/verify_common.h | 2 +
contrib/amcheck/verify_nbtree.c | 107 +-----------------------------
3 files changed, 115 insertions(+), 106 deletions(-)
diff --git a/contrib/amcheck/verify_common.c b/contrib/amcheck/verify_common.c
index a31ce06ed99..e9b4887f65e 100644
--- a/contrib/amcheck/verify_common.c
+++ b/contrib/amcheck/verify_common.c
@@ -13,6 +13,7 @@
#include "postgres.h"
#include "access/genam.h"
+#include "access/heaptoast.h"
#include "access/table.h"
#include "access/tableam.h"
#include "verify_common.h"
@@ -189,3 +190,114 @@ index_checkable(Relation rel, Oid am_id)
return amcheck_index_mainfork_expected(rel);
}
+
+IndexTuple
+amcheck_normalize_tuple(Relation irel, IndexTuple itup)
+{
+ TupleDesc tupleDescriptor = RelationGetDescr(irel);
+ Datum normalized[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ bool need_free[INDEX_MAX_KEYS];
+ bool formnewtup = false;
+ IndexTuple reformed;
+ int i;
+
+ /* Easy case: It's immediately clear that tuple has no varlena datums */
+ if (!IndexTupleHasVarwidths(itup))
+ return itup;
+
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ {
+ Form_pg_attribute att;
+
+ att = TupleDescAttr(tupleDescriptor, i);
+
+ /* Assume untoasted/already normalized datum initially */
+ need_free[i] = false;
+ normalized[i] = index_getattr(itup, att->attnum,
+ tupleDescriptor,
+ &isnull[i]);
+ if (att->attbyval || att->attlen != -1 || isnull[i])
+ continue;
+
+ /*
+ * Callers always pass a tuple that could safely be inserted into the
+ * index without further processing, so an external varlena header
+ * should never be encountered here
+ */
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
+ ItemPointerGetBlockNumber(&(itup->t_tid)),
+ ItemPointerGetOffsetNumber(&(itup->t_tid)),
+ RelationGetRelationName(irel))));
+ else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
+ VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
+ (att->attstorage == TYPSTORAGE_EXTENDED ||
+ att->attstorage == TYPSTORAGE_MAIN))
+ {
+ /*
+ * This value will be compressed by index_form_tuple() with the
+ * current storage settings. We may be here because this tuple
+ * was formed with different storage settings. So, force forming.
+ */
+ formnewtup = true;
+ }
+ else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
+ {
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
+ need_free[i] = true;
+ }
+
+ /*
+ * Short tuples may have 1B or 4B header. Convert 4B header of short
+ * tuples to 1B
+ */
+ else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
+ {
+ /* convert to short varlena */
+ Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
+ char *data = palloc(len);
+
+ SET_VARSIZE_SHORT(data, len);
+ memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
+
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(data);
+ need_free[i] = true;
+ }
+ }
+
+ /*
+ * Easier case: Tuple has varlena datums, none of which are compressed or
+ * short with 4B header
+ */
+ if (!formnewtup)
+ return itup;
+
+ /*
+ * Hard case: Tuple had compressed varlena datums that necessitate
+ * creating normalized version of the tuple from uncompressed input datums
+ * (normalized input datums). This is rather naive, but shouldn't be
+ * necessary too often.
+ *
+ * In the heap, tuples may contain short varlena datums with both 1B
+ * header and 4B headers. But the corresponding index tuple should always
+ * have such varlena's with 1B headers. So, if there is a short varlena
+ * with 4B header, we need to convert it for fingerprinting.
+ *
+ * Note that we rely on deterministic index_form_tuple() TOAST compression
+ * of normalized input.
+ */
+ reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
+ reformed->t_tid = itup->t_tid;
+
+ /* Cannot leak memory here */
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ if (need_free[i])
+ pfree(DatumGetPointer(normalized[i]));
+
+ return reformed;
+}
\ No newline at end of file
diff --git a/contrib/amcheck/verify_common.h b/contrib/amcheck/verify_common.h
index 3fa63d2121a..ffe0d30beb3 100644
--- a/contrib/amcheck/verify_common.h
+++ b/contrib/amcheck/verify_common.h
@@ -26,3 +26,5 @@ extern void amcheck_lock_relation_and_check(Oid indrelid,
Oid am_id,
IndexDoCheckCallback check,
LOCKMODE lockmode, void *state);
+
+extern IndexTuple amcheck_normalize_tuple(Relation irel, IndexTuple itup);
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index f91392a3a49..2ad27eb5c2b 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -2845,115 +2845,10 @@ bt_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
static IndexTuple
bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
{
- TupleDesc tupleDescriptor = RelationGetDescr(state->rel);
- Datum normalized[INDEX_MAX_KEYS];
- bool isnull[INDEX_MAX_KEYS];
- bool need_free[INDEX_MAX_KEYS];
- bool formnewtup = false;
- IndexTuple reformed;
- int i;
-
/* Caller should only pass "logical" non-pivot tuples here */
Assert(!BTreeTupleIsPosting(itup) && !BTreeTupleIsPivot(itup));
- /* Easy case: It's immediately clear that tuple has no varlena datums */
- if (!IndexTupleHasVarwidths(itup))
- return itup;
-
- for (i = 0; i < tupleDescriptor->natts; i++)
- {
- Form_pg_attribute att;
-
- att = TupleDescAttr(tupleDescriptor, i);
-
- /* Assume untoasted/already normalized datum initially */
- need_free[i] = false;
- normalized[i] = index_getattr(itup, att->attnum,
- tupleDescriptor,
- &isnull[i]);
- if (att->attbyval || att->attlen != -1 || isnull[i])
- continue;
-
- /*
- * Callers always pass a tuple that could safely be inserted into the
- * index without further processing, so an external varlena header
- * should never be encountered here
- */
- if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
- ItemPointerGetBlockNumber(&(itup->t_tid)),
- ItemPointerGetOffsetNumber(&(itup->t_tid)),
- RelationGetRelationName(state->rel))));
- else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
- VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
- (att->attstorage == TYPSTORAGE_EXTENDED ||
- att->attstorage == TYPSTORAGE_MAIN))
- {
- /*
- * This value will be compressed by index_form_tuple() with the
- * current storage settings. We may be here because this tuple
- * was formed with different storage settings. So, force forming.
- */
- formnewtup = true;
- }
- else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
- {
- formnewtup = true;
- normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
- need_free[i] = true;
- }
-
- /*
- * Short tuples may have 1B or 4B header. Convert 4B header of short
- * tuples to 1B
- */
- else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
- {
- /* convert to short varlena */
- Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
- char *data = palloc(len);
-
- SET_VARSIZE_SHORT(data, len);
- memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
-
- formnewtup = true;
- normalized[i] = PointerGetDatum(data);
- need_free[i] = true;
- }
- }
-
- /*
- * Easier case: Tuple has varlena datums, none of which are compressed or
- * short with 4B header
- */
- if (!formnewtup)
- return itup;
-
- /*
- * Hard case: Tuple had compressed varlena datums that necessitate
- * creating normalized version of the tuple from uncompressed input datums
- * (normalized input datums). This is rather naive, but shouldn't be
- * necessary too often.
- *
- * In the heap, tuples may contain short varlena datums with both 1B
- * header and 4B headers. But the corresponding index tuple should always
- * have such varlena's with 1B headers. So, if there is a short varlena
- * with 4B header, we need to convert it for fingerprinting.
- *
- * Note that we rely on deterministic index_form_tuple() TOAST compression
- * of normalized input.
- */
- reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
- reformed->t_tid = itup->t_tid;
-
- /* Cannot leak memory here */
- for (i = 0; i < tupleDescriptor->natts; i++)
- if (need_free[i])
- pfree(DatumGetPointer(normalized[i]));
-
- return reformed;
+ return amcheck_normalize_tuple(state->rel, itup);
}
/*
--
2.43.0
On Thu, 1 Jan 2026 at 17:05, Kirill Reshke <reshkekirill@gmail.com> wrote:
CF bot was unhappy about the last version due to obvious bug, PFA new
version with fixes.The problem was "DROP TABLE toast_bug;" missing in expected regression output.
[0] https://cirrus-ci.com/task/6378051304423424
--
Best regards,
Kirill Reshke
Attached new version with commit message polishing, and address CF
feedback, which was unhappy due to headercheck
--
Best regards,
Kirill Reshke
Attachments:
v2026-01-10-0001-Move-normalize-tuple-logic-from-nbtcheck.patchapplication/octet-stream; name=v2026-01-10-0001-Move-normalize-tuple-logic-from-nbtcheck.patchDownload
From f03a0c6c9fd8d0b6ca018ff7d62dc51324e630b1 Mon Sep 17 00:00:00 2001
From: reshke <reshke@double.cloud>
Date: Wed, 22 Oct 2025 15:29:26 +0000
Subject: [PATCH v2026-01-10 1/2] Move `normalize tuple` logic from nbtcheck to
verify_common
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Preparational patch to reuse index tuple normalize logic
in GiST amcheck.
Reviewed-by: Sergey Dudoladov <sergey.dudoladov@gmail.com>
Reviewed-by: Paul A Jungwirth <pj@illuminatedcomputing.com>
Reviewed-by: Andrey Borodin <x4mmm@yandex-team.ru>
Reviewed-by: Miłosz Bieniek <bieniek.milosz@proton.me>
Discussion: https:://postgr.es/m/5FC1B5B6-FB35-44A2-AB62-632F14E958C5@yandex-team.ru
---
contrib/amcheck/verify_common.c | 112 ++++++++++++++++++++++++++++++++
contrib/amcheck/verify_common.h | 4 ++
contrib/amcheck/verify_nbtree.c | 107 +-----------------------------
3 files changed, 117 insertions(+), 106 deletions(-)
diff --git a/contrib/amcheck/verify_common.c b/contrib/amcheck/verify_common.c
index 54ce901716b..9a617ae3bb4 100644
--- a/contrib/amcheck/verify_common.c
+++ b/contrib/amcheck/verify_common.c
@@ -13,6 +13,7 @@
#include "postgres.h"
#include "access/genam.h"
+#include "access/heaptoast.h"
#include "access/table.h"
#include "access/tableam.h"
#include "verify_common.h"
@@ -189,3 +190,114 @@ index_checkable(Relation rel, Oid am_id)
return amcheck_index_mainfork_expected(rel);
}
+
+IndexTuple
+amcheck_normalize_tuple(Relation irel, IndexTuple itup)
+{
+ TupleDesc tupleDescriptor = RelationGetDescr(irel);
+ Datum normalized[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ bool need_free[INDEX_MAX_KEYS];
+ bool formnewtup = false;
+ IndexTuple reformed;
+ int i;
+
+ /* Easy case: It's immediately clear that tuple has no varlena datums */
+ if (!IndexTupleHasVarwidths(itup))
+ return itup;
+
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ {
+ Form_pg_attribute att;
+
+ att = TupleDescAttr(tupleDescriptor, i);
+
+ /* Assume untoasted/already normalized datum initially */
+ need_free[i] = false;
+ normalized[i] = index_getattr(itup, att->attnum,
+ tupleDescriptor,
+ &isnull[i]);
+ if (att->attbyval || att->attlen != -1 || isnull[i])
+ continue;
+
+ /*
+ * Callers always pass a tuple that could safely be inserted into the
+ * index without further processing, so an external varlena header
+ * should never be encountered here
+ */
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
+ ItemPointerGetBlockNumber(&(itup->t_tid)),
+ ItemPointerGetOffsetNumber(&(itup->t_tid)),
+ RelationGetRelationName(irel))));
+ else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
+ VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
+ (att->attstorage == TYPSTORAGE_EXTENDED ||
+ att->attstorage == TYPSTORAGE_MAIN))
+ {
+ /*
+ * This value will be compressed by index_form_tuple() with the
+ * current storage settings. We may be here because this tuple
+ * was formed with different storage settings. So, force forming.
+ */
+ formnewtup = true;
+ }
+ else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
+ {
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
+ need_free[i] = true;
+ }
+
+ /*
+ * Short tuples may have 1B or 4B header. Convert 4B header of short
+ * tuples to 1B
+ */
+ else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
+ {
+ /* convert to short varlena */
+ Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
+ char *data = palloc(len);
+
+ SET_VARSIZE_SHORT(data, len);
+ memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
+
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(data);
+ need_free[i] = true;
+ }
+ }
+
+ /*
+ * Easier case: Tuple has varlena datums, none of which are compressed or
+ * short with 4B header
+ */
+ if (!formnewtup)
+ return itup;
+
+ /*
+ * Hard case: Tuple had compressed varlena datums that necessitate
+ * creating normalized version of the tuple from uncompressed input datums
+ * (normalized input datums). This is rather naive, but shouldn't be
+ * necessary too often.
+ *
+ * In the heap, tuples may contain short varlena datums with both 1B
+ * header and 4B headers. But the corresponding index tuple should always
+ * have such varlena's with 1B headers. So, if there is a short varlena
+ * with 4B header, we need to convert it for fingerprinting.
+ *
+ * Note that we rely on deterministic index_form_tuple() TOAST compression
+ * of normalized input.
+ */
+ reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
+ reformed->t_tid = itup->t_tid;
+
+ /* Cannot leak memory here */
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ if (need_free[i])
+ pfree(DatumGetPointer(normalized[i]));
+
+ return reformed;
+}
\ No newline at end of file
diff --git a/contrib/amcheck/verify_common.h b/contrib/amcheck/verify_common.h
index 4c4ddc01aa7..916bf30d193 100644
--- a/contrib/amcheck/verify_common.h
+++ b/contrib/amcheck/verify_common.h
@@ -10,6 +10,8 @@
*
*-------------------------------------------------------------------------
*/
+
+#include "access/itup.h"
#include "storage/bufpage.h"
#include "storage/lmgr.h"
#include "storage/lockdefs.h"
@@ -26,3 +28,5 @@ extern void amcheck_lock_relation_and_check(Oid indrelid,
Oid am_id,
IndexDoCheckCallback check,
LOCKMODE lockmode, void *state);
+
+extern IndexTuple amcheck_normalize_tuple(Relation irel, IndexTuple itup);
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index 7733ab202e3..c1e24338361 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -2845,115 +2845,10 @@ bt_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
static IndexTuple
bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
{
- TupleDesc tupleDescriptor = RelationGetDescr(state->rel);
- Datum normalized[INDEX_MAX_KEYS];
- bool isnull[INDEX_MAX_KEYS];
- bool need_free[INDEX_MAX_KEYS];
- bool formnewtup = false;
- IndexTuple reformed;
- int i;
-
/* Caller should only pass "logical" non-pivot tuples here */
Assert(!BTreeTupleIsPosting(itup) && !BTreeTupleIsPivot(itup));
- /* Easy case: It's immediately clear that tuple has no varlena datums */
- if (!IndexTupleHasVarwidths(itup))
- return itup;
-
- for (i = 0; i < tupleDescriptor->natts; i++)
- {
- Form_pg_attribute att;
-
- att = TupleDescAttr(tupleDescriptor, i);
-
- /* Assume untoasted/already normalized datum initially */
- need_free[i] = false;
- normalized[i] = index_getattr(itup, att->attnum,
- tupleDescriptor,
- &isnull[i]);
- if (att->attbyval || att->attlen != -1 || isnull[i])
- continue;
-
- /*
- * Callers always pass a tuple that could safely be inserted into the
- * index without further processing, so an external varlena header
- * should never be encountered here
- */
- if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
- ItemPointerGetBlockNumber(&(itup->t_tid)),
- ItemPointerGetOffsetNumber(&(itup->t_tid)),
- RelationGetRelationName(state->rel))));
- else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
- VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
- (att->attstorage == TYPSTORAGE_EXTENDED ||
- att->attstorage == TYPSTORAGE_MAIN))
- {
- /*
- * This value will be compressed by index_form_tuple() with the
- * current storage settings. We may be here because this tuple
- * was formed with different storage settings. So, force forming.
- */
- formnewtup = true;
- }
- else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
- {
- formnewtup = true;
- normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
- need_free[i] = true;
- }
-
- /*
- * Short tuples may have 1B or 4B header. Convert 4B header of short
- * tuples to 1B
- */
- else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
- {
- /* convert to short varlena */
- Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
- char *data = palloc(len);
-
- SET_VARSIZE_SHORT(data, len);
- memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
-
- formnewtup = true;
- normalized[i] = PointerGetDatum(data);
- need_free[i] = true;
- }
- }
-
- /*
- * Easier case: Tuple has varlena datums, none of which are compressed or
- * short with 4B header
- */
- if (!formnewtup)
- return itup;
-
- /*
- * Hard case: Tuple had compressed varlena datums that necessitate
- * creating normalized version of the tuple from uncompressed input datums
- * (normalized input datums). This is rather naive, but shouldn't be
- * necessary too often.
- *
- * In the heap, tuples may contain short varlena datums with both 1B
- * header and 4B headers. But the corresponding index tuple should always
- * have such varlena's with 1B headers. So, if there is a short varlena
- * with 4B header, we need to convert it for fingerprinting.
- *
- * Note that we rely on deterministic index_form_tuple() TOAST compression
- * of normalized input.
- */
- reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
- reformed->t_tid = itup->t_tid;
-
- /* Cannot leak memory here */
- for (i = 0; i < tupleDescriptor->natts; i++)
- if (need_free[i])
- pfree(DatumGetPointer(normalized[i]));
-
- return reformed;
+ return amcheck_normalize_tuple(state->rel, itup);
}
/*
--
2.43.0
v2026-01-10-0002-Add-gist_index_check-function-to-verify-.patchapplication/octet-stream; name=v2026-01-10-0002-Add-gist_index_check-function-to-verify-.patchDownload
From 464a432e19bbac93e3e28a2f753c9eaecbc52496 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Wed, 18 Jun 2025 20:33:28 +0300
Subject: [PATCH v2026-01-10 2/2] Add gist_index_check() function to verify
GiST index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This function traverses GiST with a depth-first search and checks
that all downlink tuples are included into parent tuple keyspace.
This traverse takes lock of any page until some discrepancy is found.
To re-check a suspicious pair of parent and child tuples it acquires
locks on both parent and child pages in the same order as page
split does.
Author: Andrey Borodin <amborodin@acm.org>
Author: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-By: José Villanova <jose.arthur@gmail.com>
Reviewed-By: Aleksander Alekseev <aleksander@timescale.com>
Reviewed-By: Nikolay Samokhvalov <samokhvalov@gmail.com>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Tomas Vondra <tomas.vondra@enterprisedb.com>
Reviewed-By: Arseniy Mukhin <arseniy(dot)mukhin(dot)dev(at)gmail(dot)com>
Reviewed-By: Miłosz Bieniek <bieniek.milosz@proton.me>
Reviewed-By: Paul A Jungwirth <pj(at)illuminatedcomputing(dot)com>
Reviewed-by: Sergey Dudoladov <sergey.dudoladov@gmail.com>
Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru
Discussion: https:://postgr.es/m/5FC1B5B6-FB35-44A2-AB62-632F14E958C5@yandex-team.ru
---
contrib/amcheck/Makefile | 5 +-
contrib/amcheck/amcheck--1.5--1.6.sql | 14 +
contrib/amcheck/amcheck.control | 2 +-
contrib/amcheck/expected/check_gist.out | 147 ++++++
contrib/amcheck/meson.build | 3 +
contrib/amcheck/sql/check_gist.sql | 65 +++
contrib/amcheck/verify_gist.c | 665 ++++++++++++++++++++++++
doc/src/sgml/amcheck.sgml | 19 +
8 files changed, 917 insertions(+), 3 deletions(-)
create mode 100644 contrib/amcheck/amcheck--1.5--1.6.sql
create mode 100644 contrib/amcheck/expected/check_gist.out
create mode 100644 contrib/amcheck/sql/check_gist.sql
create mode 100644 contrib/amcheck/verify_gist.c
diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index 1b7a63cbaa4..be28f0b8275 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -5,15 +5,16 @@ OBJS = \
$(WIN32RES) \
verify_common.o \
verify_gin.o \
+ verify_gist.o \
verify_heapam.o \
verify_nbtree.o
EXTENSION = amcheck
DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \
- amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql
+ amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql amcheck--1.5--1.6.sql
PGFILEDESC = "amcheck - function for verifying relation integrity"
-REGRESS = check check_btree check_gin check_heap
+REGRESS = check check_btree check_gin check_gist check_heap
EXTRA_INSTALL = contrib/pg_walinspect
TAP_TESTS = 1
diff --git a/contrib/amcheck/amcheck--1.5--1.6.sql b/contrib/amcheck/amcheck--1.5--1.6.sql
new file mode 100644
index 00000000000..e2d4ca60d5e
--- /dev/null
+++ b/contrib/amcheck/amcheck--1.5--1.6.sql
@@ -0,0 +1,14 @@
+/* contrib/amcheck/amcheck--1.5--1.6.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.6'" to load this file. \quit
+
+
+-- gist_index_check()
+--
+CREATE FUNCTION gist_index_check(index regclass, heapallindexed boolean)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'gist_index_check'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION gist_index_check(regclass, boolean) FROM PUBLIC;
diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control
index c8ba6d7c9bc..2f329ef2cf4 100644
--- a/contrib/amcheck/amcheck.control
+++ b/contrib/amcheck/amcheck.control
@@ -1,5 +1,5 @@
# amcheck extension
comment = 'functions for verifying relation integrity'
-default_version = '1.5'
+default_version = '1.6'
module_pathname = '$libdir/amcheck'
relocatable = true
diff --git a/contrib/amcheck/expected/check_gist.out b/contrib/amcheck/expected/check_gist.out
new file mode 100644
index 00000000000..e229f38aa48
--- /dev/null
+++ b/contrib/amcheck/expected/check_gist.out
@@ -0,0 +1,147 @@
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gist_check;
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+ attstorage
+------------
+ x
+(1 row)
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE toast_bug;
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index d5137ef691d..18a87fa8f8c 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -3,6 +3,7 @@
amcheck_sources = files(
'verify_common.c',
'verify_gin.c',
+ 'verify_gist.c',
'verify_heapam.c',
'verify_nbtree.c',
)
@@ -27,6 +28,7 @@ install_data(
'amcheck--1.2--1.3.sql',
'amcheck--1.3--1.4.sql',
'amcheck--1.4--1.5.sql',
+ 'amcheck--1.5--1.6.sql',
kwargs: contrib_data_args,
)
@@ -39,6 +41,7 @@ tests += {
'check',
'check_btree',
'check_gin',
+ 'check_gist',
'check_heap',
],
},
diff --git a/contrib/amcheck/sql/check_gist.sql b/contrib/amcheck/sql/check_gist.sql
new file mode 100644
index 00000000000..bc5737a7710
--- /dev/null
+++ b/contrib/amcheck/sql/check_gist.sql
@@ -0,0 +1,65 @@
+
+SELECT setseed(1);
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+-- cleanup
+DROP TABLE gist_check;
+
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
+
+-- cleanup
+DROP TABLE toast_bug;
\ No newline at end of file
diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c
new file mode 100644
index 00000000000..b0ff7d98bf9
--- /dev/null
+++ b/contrib/amcheck/verify_gist.c
@@ -0,0 +1,665 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_gist.c
+ * Verifies the integrity of GiST indexes based on invariants.
+ *
+ * Verification checks that all paths in GiST graph contain
+ * consistent keys: tuples on parent pages consistently include tuples
+ * from children pages. Also, verification checks graph invariants:
+ * internal page must have at least one downlink, internal page can
+ * reference either only leaf pages or only internal pages.
+ *
+ *
+ * Copyright (c) 2017-2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/verify_gist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gist_private.h"
+#include "access/heaptoast.h"
+#include "access/tableam.h"
+#include "catalog/index.h"
+#include "catalog/pg_am.h"
+#include "common/pg_prng.h"
+#include "lib/bloomfilter.h"
+#include "verify_common.h"
+#include "utils/memutils.h"
+
+
+/*
+ * GistScanItem represents one item of depth-first scan of GiST index.
+ */
+typedef struct GistScanItem
+{
+ int depth;
+
+ /* Referenced block number to check next */
+ BlockNumber blkno;
+
+ /*
+ * Correctness of this parent tuple will be checked against contents of
+ * referenced page. This tuple will be NULL for the root block.
+ */
+ IndexTuple parenttup;
+
+ /*
+ * LSN to handle concurrent scans of the page. It's necessary to avoid
+ * missing some subtrees from the page that was split just before we read it.
+ */
+ XLogRecPtr parentlsn;
+
+ /*
+ * Reference to parent page for re-locking in case of found parent-child
+ * tuple discrepancies.
+ */
+ BlockNumber parentblk;
+
+ /* Pointer to the next stack item. */
+ struct GistScanItem *next;
+} GistScanItem;
+
+typedef struct GistCheckState
+{
+ /* GiST state */
+ GISTSTATE *state;
+ /* Bloom filter fingerprints index tuples */
+ bloom_filter *filter;
+
+ Snapshot snapshot;
+ Relation rel;
+ Relation heaprel;
+
+ /* Debug counter for reporting percentage of work already done */
+ int64 heaptuplespresent;
+
+ /* progress reporting stuff */
+ BlockNumber totalblocks;
+ BlockNumber reportedblocks;
+ BlockNumber scannedblocks;
+ BlockNumber deltablocks;
+
+ int leafdepth;
+} GistCheckState;
+
+PG_FUNCTION_INFO_V1(gist_index_check);
+
+static void giststate_init_heapallindexed(Relation rel, GistCheckState * result);
+static void gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly);
+static void gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed,
+ BufferAccessStrategy strategy);
+static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
+static IndexTuple gist_refind_parent(Relation rel, BlockNumber parentblkno,
+ BlockNumber childblkno,
+ BufferAccessStrategy strategy);
+static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block,
+ Page page, OffsetNumber offset);
+static void gist_tuple_present_callback(Relation index, ItemPointer tid,
+ Datum *values, bool *isnull,
+ bool tupleIsAlive, void *checkstate);
+static IndexTuple gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup);
+
+/*
+ * gist_index_check(index regclass)
+ *
+ * Verify integrity of GiST index.
+ *
+ * Acquires AccessShareLock on heap & index relations.
+ */
+Datum
+gist_index_check(PG_FUNCTION_ARGS)
+{
+ Oid indrelid = PG_GETARG_OID(0);
+ bool heapallindexed = PG_GETARG_BOOL(1);
+
+ amcheck_lock_relation_and_check(indrelid,
+ GIST_AM_OID,
+ gist_check_parent_keys_consistency,
+ AccessShareLock,
+ &heapallindexed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Initialize GIST state files needed to perform.
+ * This initialized bloom filter and snapshot.
+ */
+static void
+giststate_init_heapallindexed(Relation rel, GistCheckState * result)
+{
+ int64 total_pages;
+ int64 total_elems;
+ uint64 seed;
+
+ /*
+ * Size Bloom filter based on estimated number of tuples in index. This
+ * logic is similar to B-tree, see verify_btree.c .
+ */
+ total_pages = result->totalblocks;
+ total_elems = Max(total_pages * (MaxOffsetNumber / 5),
+ (int64) rel->rd_rel->reltuples);
+ seed = pg_prng_uint64(&pg_global_prng_state);
+ result->filter = bloom_create(total_elems, maintenance_work_mem, seed);
+
+ result->snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+
+ /*
+ * GetTransactionSnapshot() always acquires a new MVCC snapshot in READ
+ * COMMITTED mode. A new snapshot is guaranteed to have all the entries
+ * it requires in the index.
+ *
+ * We must defend against the possibility that an old xact snapshot was
+ * returned at higher isolation levels when that snapshot is not safe for
+ * index scans of the target index. This is possible when the snapshot
+ * sees tuples that are before the index's indcheckxmin horizon. Throwing
+ * an error here should be very rare. It doesn't seem worth using a
+ * secondary snapshot to avoid this.
+ */
+ if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
+ !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+ result->snapshot->xmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("index \"%s\" cannot be verified using transaction snapshot",
+ RelationGetRelationName(rel))));
+}
+
+/*
+ * Main entry point for GiST check.
+ *
+ * This function verifies that tuples of internal pages cover all
+ * the key space of each tuple on the leaf page. To do this we invoke
+ * gist_check_internal_page() for every internal page.
+ *
+ * This check allocates memory context and scans through
+ * GiST graph. This scan is performed in a depth-first search using a stack of
+ * GistScanItem-s. Initially this stack contains only the root block number. On
+ * each iteration the top block number is replaced by referenced block numbers.
+ *
+ *
+ * gist_check_internal_page() in its turn takes every tuple and tries to
+ * adjust it by tuples on the referenced child page. Parent gist tuple should
+ * never require any adjustments.
+ */
+static void
+gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly)
+{
+ BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+ GistScanItem *stack;
+ MemoryContext mctx;
+ MemoryContext oldcontext;
+ GISTSTATE *state;
+ bool heapallindexed = *((bool *) callback_state);
+ GistCheckState *check_state = palloc0(sizeof(GistCheckState));
+
+ mctx = AllocSetContextCreate(CurrentMemoryContext,
+ "amcheck context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(mctx);
+
+ state = initGISTstate(rel);
+
+ check_state->state = state;
+ check_state->rel = rel;
+ check_state->heaprel = heaprel;
+
+ /*
+ * We don't know the height of the tree yet, but as soon as we encounter a
+ * leaf page, we will set 'leafdepth' to its depth.
+ */
+ check_state->leafdepth = -1;
+
+ check_state->totalblocks = RelationGetNumberOfBlocks(rel);
+ /* report every 100 blocks or 5%, whichever is bigger */
+ check_state->deltablocks = Max(check_state->totalblocks / 20, 100);
+
+ if (heapallindexed)
+ giststate_init_heapallindexed(rel, check_state);
+
+ /* Start the scan at the root page */
+ stack = (GistScanItem *) palloc0(sizeof(GistScanItem));
+ stack->depth = 0;
+ stack->parenttup = NULL;
+ stack->parentblk = InvalidBlockNumber;
+ stack->parentlsn = InvalidXLogRecPtr;
+ stack->blkno = GIST_ROOT_BLKNO;
+
+ /*
+ * This GiST scan is effectively an "old" VACUUM version before commit
+ * fe280694d which introduced physical order scanning.
+ */
+
+ while (stack)
+ {
+ GistScanItem *stack_next;
+ Buffer buffer;
+ Page page;
+ XLogRecPtr lsn;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Report progress */
+ if (check_state->scannedblocks > check_state->reportedblocks +
+ check_state->deltablocks)
+ {
+ elog(DEBUG1, "verified level %u blocks of approximately %u total",
+ check_state->scannedblocks, check_state->totalblocks);
+ check_state->reportedblocks = check_state->scannedblocks;
+ }
+ check_state->scannedblocks++;
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+ RBM_NORMAL, strategy);
+ LockBuffer(buffer, GIST_SHARE);
+ page = (Page) BufferGetPage(buffer);
+ lsn = BufferGetLSNAtomic(buffer);
+
+ /* Do basic sanity checks on the page headers */
+ check_index_page(rel, buffer, stack->blkno);
+
+ /*
+ * It's possible that the page was split since we looked at the
+ * parent, so that we didn't miss the downlink of the right sibling
+ * when we scanned the parent. If so, add the right sibling to the
+ * stack now.
+ */
+ if (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page))
+ {
+ /* split page detected, install right link to the stack */
+ GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+
+ ptr->depth = stack->depth;
+ ptr->parenttup = CopyIndexTuple(stack->parenttup);
+ ptr->parentblk = stack->parentblk;
+ ptr->parentlsn = stack->parentlsn;
+ ptr->blkno = GistPageGetOpaque(page)->rightlink;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+
+ gist_check_page(check_state, stack, page, heapallindexed, strategy);
+
+ if (!GistPageIsLeaf(page))
+ {
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ /* Internal page, so recurse to the child */
+ GistScanItem *ptr;
+ ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+ ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+ ptr->depth = stack->depth + 1;
+ ptr->parenttup = CopyIndexTuple(idxtuple);
+ ptr->parentblk = stack->blkno;
+ ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+ ptr->parentlsn = lsn;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ }
+
+ LockBuffer(buffer, GIST_UNLOCK);
+ ReleaseBuffer(buffer);
+
+ /* Step to next item in the queue */
+ stack_next = stack->next;
+ if (stack->parenttup)
+ pfree(stack->parenttup);
+ pfree(stack);
+ stack = stack_next;
+ }
+
+ if (heapallindexed)
+ {
+ IndexInfo *indexinfo = BuildIndexInfo(rel);
+ TableScanDesc scan;
+
+ scan = table_beginscan_strat(heaprel, /* relation */
+ check_state->snapshot, /* snapshot */
+ 0, /* number of keys */
+ NULL, /* scan key */
+ true, /* buffer access strategy OK */
+ true); /* syncscan OK? */
+
+ /*
+ * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY.
+ */
+ indexinfo->ii_Concurrent = true;
+
+ indexinfo->ii_Unique = false;
+ indexinfo->ii_ExclusionOps = NULL;
+ indexinfo->ii_ExclusionProcs = NULL;
+ indexinfo->ii_ExclusionStrats = NULL;
+
+ elog(DEBUG1, "verifying that tuples from index \"%s\" are present in \"%s\"",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(heaprel));
+
+ table_index_build_scan(heaprel, rel, indexinfo, true, false,
+ gist_tuple_present_callback, (void *) check_state, scan);
+
+ ereport(DEBUG1,
+ (errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set",
+ check_state->heaptuplespresent,
+ RelationGetRelationName(heaprel),
+ 100.0 * bloom_prop_bits_set(check_state->filter))));
+
+ UnregisterSnapshot(check_state->snapshot);
+ bloom_free(check_state->filter);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(mctx);
+ pfree(check_state);
+}
+
+static void
+gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed, BufferAccessStrategy strategy)
+{
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ /* Check that the tree has the same height in all branches */
+ if (GistPageIsLeaf(page))
+ {
+ if (check_state->leafdepth == -1)
+ check_state->leafdepth = stack->depth;
+ else if (stack->depth != check_state->leafdepth)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+ RelationGetRelationName(check_state->rel), stack->blkno)));
+ }
+
+ /*
+ * Check that each tuple looks valid, and is consistent with the downlink
+ * we followed when we stepped on this page.
+ */
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ ItemId iid = PageGetItemIdCareful(check_state->rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+ IndexTuple tmpTuple = NULL;
+
+ /*
+ * Check that it's not a leftover invalid tuple from pre-9.1 See also
+ * gistdoinsert() and gistbulkdelete() handling of such tuples. We do
+ * consider it an error here.
+ */
+ if (GistTupleIsInvalid(idxtuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("index \"%s\" contains an inner tuple marked as invalid, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i),
+ errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
+ errhint("Please REINDEX it.")));
+
+ if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+
+ /*
+ * Check if this tuple is consistent with the downlink in the parent.
+ */
+ if (stack->parenttup)
+ tmpTuple = gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state);
+
+ if (tmpTuple)
+ {
+ /*
+ * There was a discrepancy between parent and child tuples. We
+ * need to verify it is not a result of concurrent call of
+ * gistplacetopage(). So, lock parent and try to find a downlink for
+ * current page. It may be missing due to concurrent page split,
+ * this is OK.
+ *
+ * Note that when we acquire parent tuple now we hold lock for both
+ * parent and child buffers. Thus the parent tuple must include the
+ * keyspace of the child.
+ */
+
+ pfree(tmpTuple);
+ pfree(stack->parenttup);
+ stack->parenttup = gist_refind_parent(check_state->rel, stack->parentblk,
+ stack->blkno, strategy);
+
+ /* We found it - make a final check before failing */
+ if (!stack->parenttup)
+ elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
+ stack->blkno, stack->parentblk);
+ else if (gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+ else
+ {
+ /*
+ * But now it is properly adjusted - nothing to do here.
+ */
+ }
+ }
+
+ if (GistPageIsLeaf(page))
+ {
+ if (heapallindexed)
+ {
+ IndexTuple norm;
+
+ norm = gistFormNormalizedTuple(check_state, idxtuple);
+
+ bloom_add_element(check_state->filter,
+ (unsigned char *) norm,
+ IndexTupleSize(norm));
+
+ /* Be tidy */
+ if (norm != idxtuple)
+ pfree(norm);
+ }
+ }
+ else
+ {
+ OffsetNumber off = ItemPointerGetOffsetNumber(&(idxtuple->t_tid));
+
+ if (off != TUPLE_IS_VALID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has on page %u offset %u has item id not pointing to 0xffff, but %hu",
+ RelationGetRelationName(check_state->rel), stack->blkno, i, off)));
+ }
+ }
+}
+
+/*
+ * gistFormNormalizedTuple - analogue to gistFormTuple, but performs deTOASTing
+ * of all included data (for covering indexes). While we do not expect
+ * toasted attributes in normal indexes, this can happen as a result of
+ * intervention into system catalog. Detoasting of key attributes is expected
+ * to be done by opclass decompression methods, if the indexed type might be
+ * toasted.
+ */
+static IndexTuple
+gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup)
+{
+ return amcheck_normalize_tuple(giststate->rel, itup);
+}
+
+static void
+gist_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
+ bool *isnull, bool tupleIsAlive, void *checkstate)
+{
+ GistCheckState *state = (GistCheckState *) checkstate;
+ IndexTuple itup, norm;
+ Datum compatt[INDEX_MAX_KEYS];
+
+ /* Generate a normalized index tuple for fingerprinting */
+ gistCompressValues(state->state, index, values, isnull, true, compatt);
+ itup = index_form_tuple(RelationGetDescr(index), compatt, isnull);
+ itup->t_tid = *tid;
+
+ norm = gistFormNormalizedTuple(state, itup);
+
+ /* Probe Bloom filter -- tuple should be present */
+ if (bloom_lacks_element(state->filter, (unsigned char *) norm,
+ IndexTupleSize(norm)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
+ ItemPointerGetBlockNumber(&(norm->t_tid)),
+ ItemPointerGetOffsetNumber(&(norm->t_tid)),
+ RelationGetRelationName(state->heaprel),
+ RelationGetRelationName(state->rel))));
+
+ state->heaptuplespresent++;
+
+ pfree(itup);
+ /* Be tidy */
+ if (norm != itup)
+ pfree(norm);
+}
+
+/*
+ * check_index_page - verification of basic invariants about GiST page data
+ * This function does not do any tuple analysis.
+ */
+static void
+check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
+{
+ Page page = BufferGetPage(buffer);
+
+ gistcheckpage(rel, buffer);
+
+ if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has corrupted page %d",
+ RelationGetRelationName(rel), blockNo)));
+
+ if (GistPageIsDeleted(page))
+ {
+ if (!GistPageIsLeaf(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted internal page %d",
+ RelationGetRelationName(rel), blockNo)));
+ if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted page %d with tuples",
+ RelationGetRelationName(rel), blockNo)));
+ }
+ else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has page %d with exceeding count of tuples",
+ RelationGetRelationName(rel), blockNo)));
+}
+
+/*
+ * Try to re-find the downlink pointing to 'blkno', in 'parentblkno'.
+ *
+ * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
+ * returns NULL.
+ */
+static IndexTuple
+gist_refind_parent(Relation rel,
+ BlockNumber parentblkno, BlockNumber childblkno,
+ BufferAccessStrategy strategy)
+{
+ Buffer parentbuf;
+ Page parentpage;
+ OffsetNumber parent_maxoff,
+ off;
+ IndexTuple result = NULL;
+
+ parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
+ strategy);
+
+ LockBuffer(parentbuf, GIST_SHARE);
+ parentpage = BufferGetPage(parentbuf);
+
+ if (GistPageIsLeaf(parentpage))
+ {
+ /*
+ * Currently GiST never deletes internal pages, thus they can never
+ * become leaf.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" internal page %d became leaf",
+ RelationGetRelationName(rel), parentblkno)));
+ }
+
+ parent_maxoff = PageGetMaxOffsetNumber(parentpage);
+ for (off = FirstOffsetNumber; off <= parent_maxoff; off = OffsetNumberNext(off))
+ {
+ ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, off);
+ IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid);
+
+ if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
+ {
+ /*
+ * Found it! Make a copy and return it while both parent and child
+ * pages are locked. This guarantees that at this particular
+ * moment tuples must be coherent to each other.
+ */
+ result = CopyIndexTuple(itup);
+ break;
+ }
+ }
+
+ UnlockReleaseBuffer(parentbuf);
+
+ return result;
+}
+
+static ItemId
+PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
+ OffsetNumber offset)
+{
+ ItemId itemid = PageGetItemId(page, offset);
+
+ if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
+ BLCKSZ - MAXALIGN(sizeof(GISTPageOpaqueData)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("line pointer points past end of tuple space in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ /*
+ * Verify that the line pointer isn't LP_REDIRECT or LP_UNUSED, since gist
+ * never uses either. Verify that the line pointer has storage, too, since
+ * even LP_DEAD items should.
+ */
+ if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
+ ItemIdGetLength(itemid) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("invalid line pointer storage in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ return itemid;
+}
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index 08006856579..917a4936b6d 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -208,6 +208,25 @@ ORDER BY c.relpages DESC LIMIT 10;
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ <function>gist_index_check(index regclass, heapallindexed boolean) returns void</function>
+ <indexterm>
+ <primary>gist_index_check</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ <function>gist_index_check</function> tests that its target GiST
+ has consistent parent-child tuples relations (no parent tuples
+ require tuple adjustment) and page graph respects balanced-tree
+ invariants (internal pages reference only leaf page or only internal
+ pages).
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<tip>
<para>
--
2.43.0
On Sat, 10 Jan 2026 at 19:56, Kirill Reshke <reshkekirill@gmail.com> wrote:
On Thu, 1 Jan 2026 at 17:05, Kirill Reshke <reshkekirill@gmail.com> wrote:
CF bot was unhappy about the last version due to obvious bug, PFA new
version with fixes.The problem was "DROP TABLE toast_bug;" missing in expected regression output.
[0] https://cirrus-ci.com/task/6378051304423424
--
Best regards,
Kirill ReshkeAttached new version with commit message polishing, and address CF
feedback, which was unhappy due to headercheck
After a quick preliminary review, here are some comments.
v2026-01-10-0001
================
1.
I'm pretty sure access/heaptoast.h is not needed by verify_nbtree.c.
v2026-01-10-0002
================
1.
+ if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has corrupted page %d",
+ RelationGetRelationName(rel), blockNo)));
+
+ if (GistPageIsDeleted(page))
+ {
+ if (!GistPageIsLeaf(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted internal page %d",
+ RelationGetRelationName(rel), blockNo)));
+ if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted page %d with tuples",
+ RelationGetRelationName(rel), blockNo)));
blockNo is unsigned integer, so we should use %u in the format string.
2.
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" internal page %d became leaf",
+ RelationGetRelationName(rel), parentblkno)));
The same goes for parentblkno — it should also use %u.
--
Best regards,
Kirill Reshke[2. text/x-diff; v2026-01-10-0001-Move-normalize-tuple-logic-from-nbtcheck.patch]...
[3. text/x-diff; v2026-01-10-0002-Add-gist_index_check-function-to-verify-.patch]...
--
Regards,
Japin Li
ChengDu WenWu Information Technology Co., Ltd.
Hi!
Thanks for your review!
We discussed offline about the patch, and I decided to make a review of it and fixes
up to your comments.
v2026-01-10-0001
================1.
I'm pretty sure access/heaptoast.h is not needed by verify_nbtree.c.
In fact this include is necessary because of line verify_common.c:236 which
uses TOAST_INDEX_TARGET
v2026-01-10-0002
================blockNo is unsigned integer, so we should use %u in the format string.
The same goes for parentblkno — it should also use %u.
Yes seems like there was misusage of format symbol.. I attached the v2026-01-12 with
fixed symbol. Thanks!
--
Best regards,
Roman Khapov
Attachments:
v2026-01-12-0001-Move-normalize-tuple-logic-from-nbtcheck.patchapplication/octet-stream; name=v2026-01-12-0001-Move-normalize-tuple-logic-from-nbtcheck.patch; x-unix-mode=0644Download
From 8a884265ecd17ab3aceb6c5477ec49618a3db03d Mon Sep 17 00:00:00 2001
From: reshke <reshke@double.cloud>
Date: Wed, 22 Oct 2025 15:29:26 +0000
Subject: [PATCH v2026-01-12 1/2] Move `normalize tuple` logic from nbtcheck to
verify_common
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Preparational patch to reuse index tuple normalize logic
in GiST amcheck.
Reviewed-by: Sergey Dudoladov <sergey.dudoladov@gmail.com>
Reviewed-by: Paul A Jungwirth <pj@illuminatedcomputing.com>
Reviewed-by: Andrey Borodin <x4mmm@yandex-team.ru>
Reviewed-by: Miłosz Bieniek <bieniek.milosz@proton.me>
Discussion: https:://postgr.es/m/5FC1B5B6-FB35-44A2-AB62-632F14E958C5@yandex-team.ru
---
contrib/amcheck/verify_common.c | 112 ++++++++++++++++++++++++++++++++
contrib/amcheck/verify_common.h | 4 ++
contrib/amcheck/verify_nbtree.c | 107 +-----------------------------
3 files changed, 117 insertions(+), 106 deletions(-)
diff --git a/contrib/amcheck/verify_common.c b/contrib/amcheck/verify_common.c
index 54ce901716b..9a617ae3bb4 100644
--- a/contrib/amcheck/verify_common.c
+++ b/contrib/amcheck/verify_common.c
@@ -13,6 +13,7 @@
#include "postgres.h"
#include "access/genam.h"
+#include "access/heaptoast.h"
#include "access/table.h"
#include "access/tableam.h"
#include "verify_common.h"
@@ -189,3 +190,114 @@ index_checkable(Relation rel, Oid am_id)
return amcheck_index_mainfork_expected(rel);
}
+
+IndexTuple
+amcheck_normalize_tuple(Relation irel, IndexTuple itup)
+{
+ TupleDesc tupleDescriptor = RelationGetDescr(irel);
+ Datum normalized[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ bool need_free[INDEX_MAX_KEYS];
+ bool formnewtup = false;
+ IndexTuple reformed;
+ int i;
+
+ /* Easy case: It's immediately clear that tuple has no varlena datums */
+ if (!IndexTupleHasVarwidths(itup))
+ return itup;
+
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ {
+ Form_pg_attribute att;
+
+ att = TupleDescAttr(tupleDescriptor, i);
+
+ /* Assume untoasted/already normalized datum initially */
+ need_free[i] = false;
+ normalized[i] = index_getattr(itup, att->attnum,
+ tupleDescriptor,
+ &isnull[i]);
+ if (att->attbyval || att->attlen != -1 || isnull[i])
+ continue;
+
+ /*
+ * Callers always pass a tuple that could safely be inserted into the
+ * index without further processing, so an external varlena header
+ * should never be encountered here
+ */
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
+ ItemPointerGetBlockNumber(&(itup->t_tid)),
+ ItemPointerGetOffsetNumber(&(itup->t_tid)),
+ RelationGetRelationName(irel))));
+ else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
+ VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
+ (att->attstorage == TYPSTORAGE_EXTENDED ||
+ att->attstorage == TYPSTORAGE_MAIN))
+ {
+ /*
+ * This value will be compressed by index_form_tuple() with the
+ * current storage settings. We may be here because this tuple
+ * was formed with different storage settings. So, force forming.
+ */
+ formnewtup = true;
+ }
+ else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
+ {
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
+ need_free[i] = true;
+ }
+
+ /*
+ * Short tuples may have 1B or 4B header. Convert 4B header of short
+ * tuples to 1B
+ */
+ else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
+ {
+ /* convert to short varlena */
+ Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
+ char *data = palloc(len);
+
+ SET_VARSIZE_SHORT(data, len);
+ memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
+
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(data);
+ need_free[i] = true;
+ }
+ }
+
+ /*
+ * Easier case: Tuple has varlena datums, none of which are compressed or
+ * short with 4B header
+ */
+ if (!formnewtup)
+ return itup;
+
+ /*
+ * Hard case: Tuple had compressed varlena datums that necessitate
+ * creating normalized version of the tuple from uncompressed input datums
+ * (normalized input datums). This is rather naive, but shouldn't be
+ * necessary too often.
+ *
+ * In the heap, tuples may contain short varlena datums with both 1B
+ * header and 4B headers. But the corresponding index tuple should always
+ * have such varlena's with 1B headers. So, if there is a short varlena
+ * with 4B header, we need to convert it for fingerprinting.
+ *
+ * Note that we rely on deterministic index_form_tuple() TOAST compression
+ * of normalized input.
+ */
+ reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
+ reformed->t_tid = itup->t_tid;
+
+ /* Cannot leak memory here */
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ if (need_free[i])
+ pfree(DatumGetPointer(normalized[i]));
+
+ return reformed;
+}
\ No newline at end of file
diff --git a/contrib/amcheck/verify_common.h b/contrib/amcheck/verify_common.h
index 4c4ddc01aa7..916bf30d193 100644
--- a/contrib/amcheck/verify_common.h
+++ b/contrib/amcheck/verify_common.h
@@ -10,6 +10,8 @@
*
*-------------------------------------------------------------------------
*/
+
+#include "access/itup.h"
#include "storage/bufpage.h"
#include "storage/lmgr.h"
#include "storage/lockdefs.h"
@@ -26,3 +28,5 @@ extern void amcheck_lock_relation_and_check(Oid indrelid,
Oid am_id,
IndexDoCheckCallback check,
LOCKMODE lockmode, void *state);
+
+extern IndexTuple amcheck_normalize_tuple(Relation irel, IndexTuple itup);
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index 7733ab202e3..c1e24338361 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -2845,115 +2845,10 @@ bt_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
static IndexTuple
bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
{
- TupleDesc tupleDescriptor = RelationGetDescr(state->rel);
- Datum normalized[INDEX_MAX_KEYS];
- bool isnull[INDEX_MAX_KEYS];
- bool need_free[INDEX_MAX_KEYS];
- bool formnewtup = false;
- IndexTuple reformed;
- int i;
-
/* Caller should only pass "logical" non-pivot tuples here */
Assert(!BTreeTupleIsPosting(itup) && !BTreeTupleIsPivot(itup));
- /* Easy case: It's immediately clear that tuple has no varlena datums */
- if (!IndexTupleHasVarwidths(itup))
- return itup;
-
- for (i = 0; i < tupleDescriptor->natts; i++)
- {
- Form_pg_attribute att;
-
- att = TupleDescAttr(tupleDescriptor, i);
-
- /* Assume untoasted/already normalized datum initially */
- need_free[i] = false;
- normalized[i] = index_getattr(itup, att->attnum,
- tupleDescriptor,
- &isnull[i]);
- if (att->attbyval || att->attlen != -1 || isnull[i])
- continue;
-
- /*
- * Callers always pass a tuple that could safely be inserted into the
- * index without further processing, so an external varlena header
- * should never be encountered here
- */
- if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
- ItemPointerGetBlockNumber(&(itup->t_tid)),
- ItemPointerGetOffsetNumber(&(itup->t_tid)),
- RelationGetRelationName(state->rel))));
- else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
- VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
- (att->attstorage == TYPSTORAGE_EXTENDED ||
- att->attstorage == TYPSTORAGE_MAIN))
- {
- /*
- * This value will be compressed by index_form_tuple() with the
- * current storage settings. We may be here because this tuple
- * was formed with different storage settings. So, force forming.
- */
- formnewtup = true;
- }
- else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
- {
- formnewtup = true;
- normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
- need_free[i] = true;
- }
-
- /*
- * Short tuples may have 1B or 4B header. Convert 4B header of short
- * tuples to 1B
- */
- else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
- {
- /* convert to short varlena */
- Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
- char *data = palloc(len);
-
- SET_VARSIZE_SHORT(data, len);
- memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
-
- formnewtup = true;
- normalized[i] = PointerGetDatum(data);
- need_free[i] = true;
- }
- }
-
- /*
- * Easier case: Tuple has varlena datums, none of which are compressed or
- * short with 4B header
- */
- if (!formnewtup)
- return itup;
-
- /*
- * Hard case: Tuple had compressed varlena datums that necessitate
- * creating normalized version of the tuple from uncompressed input datums
- * (normalized input datums). This is rather naive, but shouldn't be
- * necessary too often.
- *
- * In the heap, tuples may contain short varlena datums with both 1B
- * header and 4B headers. But the corresponding index tuple should always
- * have such varlena's with 1B headers. So, if there is a short varlena
- * with 4B header, we need to convert it for fingerprinting.
- *
- * Note that we rely on deterministic index_form_tuple() TOAST compression
- * of normalized input.
- */
- reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
- reformed->t_tid = itup->t_tid;
-
- /* Cannot leak memory here */
- for (i = 0; i < tupleDescriptor->natts; i++)
- if (need_free[i])
- pfree(DatumGetPointer(normalized[i]));
-
- return reformed;
+ return amcheck_normalize_tuple(state->rel, itup);
}
/*
--
2.43.0
v2026-01-12-0002-Add-gist_index_check-function-to-verify-.patchapplication/octet-stream; name=v2026-01-12-0002-Add-gist_index_check-function-to-verify-.patch; x-unix-mode=0644Download
From b92ea02011744ced7e43e23b58cdce5fa0f9f8af Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Wed, 18 Jun 2025 20:33:28 +0300
Subject: [PATCH v2026-01-12 2/2] Add gist_index_check() function to verify
GiST index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This function traverses GiST with a depth-first search and checks
that all downlink tuples are included into parent tuple keyspace.
This traverse takes lock of any page until some discrepancy is found.
To re-check a suspicious pair of parent and child tuples it acquires
locks on both parent and child pages in the same order as page
split does.
Author: Andrey Borodin <amborodin@acm.org>
Author: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-By: José Villanova <jose.arthur@gmail.com>
Reviewed-By: Aleksander Alekseev <aleksander@timescale.com>
Reviewed-By: Nikolay Samokhvalov <samokhvalov@gmail.com>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Tomas Vondra <tomas.vondra@enterprisedb.com>
Reviewed-By: Arseniy Mukhin <arseniy(dot)mukhin(dot)dev(at)gmail(dot)com>
Reviewed-By: Miłosz Bieniek <bieniek.milosz@proton.me>
Reviewed-By: Paul A Jungwirth <pj(at)illuminatedcomputing(dot)com>
Reviewed-by: Sergey Dudoladov <sergey.dudoladov@gmail.com>
Reviewed-By: Roman Khapov <r.khapov@ya.ru>
Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru
Discussion: https:://postgr.es/m/5FC1B5B6-FB35-44A2-AB62-632F14E958C5@yandex-team.ru
---
contrib/amcheck/Makefile | 5 +-
contrib/amcheck/amcheck--1.5--1.6.sql | 14 +
contrib/amcheck/amcheck.control | 2 +-
contrib/amcheck/expected/check_gist.out | 147 ++++++
contrib/amcheck/meson.build | 3 +
contrib/amcheck/sql/check_gist.sql | 65 +++
contrib/amcheck/verify_gist.c | 665 ++++++++++++++++++++++++
doc/src/sgml/amcheck.sgml | 19 +
8 files changed, 917 insertions(+), 3 deletions(-)
create mode 100644 contrib/amcheck/amcheck--1.5--1.6.sql
create mode 100644 contrib/amcheck/expected/check_gist.out
create mode 100644 contrib/amcheck/sql/check_gist.sql
create mode 100644 contrib/amcheck/verify_gist.c
diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index 1b7a63cbaa4..be28f0b8275 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -5,15 +5,16 @@ OBJS = \
$(WIN32RES) \
verify_common.o \
verify_gin.o \
+ verify_gist.o \
verify_heapam.o \
verify_nbtree.o
EXTENSION = amcheck
DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \
- amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql
+ amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql amcheck--1.5--1.6.sql
PGFILEDESC = "amcheck - function for verifying relation integrity"
-REGRESS = check check_btree check_gin check_heap
+REGRESS = check check_btree check_gin check_gist check_heap
EXTRA_INSTALL = contrib/pg_walinspect
TAP_TESTS = 1
diff --git a/contrib/amcheck/amcheck--1.5--1.6.sql b/contrib/amcheck/amcheck--1.5--1.6.sql
new file mode 100644
index 00000000000..e2d4ca60d5e
--- /dev/null
+++ b/contrib/amcheck/amcheck--1.5--1.6.sql
@@ -0,0 +1,14 @@
+/* contrib/amcheck/amcheck--1.5--1.6.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.6'" to load this file. \quit
+
+
+-- gist_index_check()
+--
+CREATE FUNCTION gist_index_check(index regclass, heapallindexed boolean)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'gist_index_check'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION gist_index_check(regclass, boolean) FROM PUBLIC;
diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control
index c8ba6d7c9bc..2f329ef2cf4 100644
--- a/contrib/amcheck/amcheck.control
+++ b/contrib/amcheck/amcheck.control
@@ -1,5 +1,5 @@
# amcheck extension
comment = 'functions for verifying relation integrity'
-default_version = '1.5'
+default_version = '1.6'
module_pathname = '$libdir/amcheck'
relocatable = true
diff --git a/contrib/amcheck/expected/check_gist.out b/contrib/amcheck/expected/check_gist.out
new file mode 100644
index 00000000000..e229f38aa48
--- /dev/null
+++ b/contrib/amcheck/expected/check_gist.out
@@ -0,0 +1,147 @@
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gist_check;
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+ attstorage
+------------
+ x
+(1 row)
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE toast_bug;
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index d5137ef691d..18a87fa8f8c 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -3,6 +3,7 @@
amcheck_sources = files(
'verify_common.c',
'verify_gin.c',
+ 'verify_gist.c',
'verify_heapam.c',
'verify_nbtree.c',
)
@@ -27,6 +28,7 @@ install_data(
'amcheck--1.2--1.3.sql',
'amcheck--1.3--1.4.sql',
'amcheck--1.4--1.5.sql',
+ 'amcheck--1.5--1.6.sql',
kwargs: contrib_data_args,
)
@@ -39,6 +41,7 @@ tests += {
'check',
'check_btree',
'check_gin',
+ 'check_gist',
'check_heap',
],
},
diff --git a/contrib/amcheck/sql/check_gist.sql b/contrib/amcheck/sql/check_gist.sql
new file mode 100644
index 00000000000..bc5737a7710
--- /dev/null
+++ b/contrib/amcheck/sql/check_gist.sql
@@ -0,0 +1,65 @@
+
+SELECT setseed(1);
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+-- cleanup
+DROP TABLE gist_check;
+
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
+
+-- cleanup
+DROP TABLE toast_bug;
\ No newline at end of file
diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c
new file mode 100644
index 00000000000..0d9a50c9dd4
--- /dev/null
+++ b/contrib/amcheck/verify_gist.c
@@ -0,0 +1,665 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_gist.c
+ * Verifies the integrity of GiST indexes based on invariants.
+ *
+ * Verification checks that all paths in GiST graph contain
+ * consistent keys: tuples on parent pages consistently include tuples
+ * from children pages. Also, verification checks graph invariants:
+ * internal page must have at least one downlink, internal page can
+ * reference either only leaf pages or only internal pages.
+ *
+ *
+ * Copyright (c) 2017-2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/verify_gist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gist_private.h"
+#include "access/heaptoast.h"
+#include "access/tableam.h"
+#include "catalog/index.h"
+#include "catalog/pg_am.h"
+#include "common/pg_prng.h"
+#include "lib/bloomfilter.h"
+#include "verify_common.h"
+#include "utils/memutils.h"
+
+
+/*
+ * GistScanItem represents one item of depth-first scan of GiST index.
+ */
+typedef struct GistScanItem
+{
+ int depth;
+
+ /* Referenced block number to check next */
+ BlockNumber blkno;
+
+ /*
+ * Correctness of this parent tuple will be checked against contents of
+ * referenced page. This tuple will be NULL for the root block.
+ */
+ IndexTuple parenttup;
+
+ /*
+ * LSN to handle concurrent scans of the page. It's necessary to avoid
+ * missing some subtrees from the page that was split just before we read it.
+ */
+ XLogRecPtr parentlsn;
+
+ /*
+ * Reference to parent page for re-locking in case of found parent-child
+ * tuple discrepancies.
+ */
+ BlockNumber parentblk;
+
+ /* Pointer to the next stack item. */
+ struct GistScanItem *next;
+} GistScanItem;
+
+typedef struct GistCheckState
+{
+ /* GiST state */
+ GISTSTATE *state;
+ /* Bloom filter fingerprints index tuples */
+ bloom_filter *filter;
+
+ Snapshot snapshot;
+ Relation rel;
+ Relation heaprel;
+
+ /* Debug counter for reporting percentage of work already done */
+ int64 heaptuplespresent;
+
+ /* progress reporting stuff */
+ BlockNumber totalblocks;
+ BlockNumber reportedblocks;
+ BlockNumber scannedblocks;
+ BlockNumber deltablocks;
+
+ int leafdepth;
+} GistCheckState;
+
+PG_FUNCTION_INFO_V1(gist_index_check);
+
+static void giststate_init_heapallindexed(Relation rel, GistCheckState * result);
+static void gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly);
+static void gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed,
+ BufferAccessStrategy strategy);
+static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
+static IndexTuple gist_refind_parent(Relation rel, BlockNumber parentblkno,
+ BlockNumber childblkno,
+ BufferAccessStrategy strategy);
+static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block,
+ Page page, OffsetNumber offset);
+static void gist_tuple_present_callback(Relation index, ItemPointer tid,
+ Datum *values, bool *isnull,
+ bool tupleIsAlive, void *checkstate);
+static IndexTuple gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup);
+
+/*
+ * gist_index_check(index regclass)
+ *
+ * Verify integrity of GiST index.
+ *
+ * Acquires AccessShareLock on heap & index relations.
+ */
+Datum
+gist_index_check(PG_FUNCTION_ARGS)
+{
+ Oid indrelid = PG_GETARG_OID(0);
+ bool heapallindexed = PG_GETARG_BOOL(1);
+
+ amcheck_lock_relation_and_check(indrelid,
+ GIST_AM_OID,
+ gist_check_parent_keys_consistency,
+ AccessShareLock,
+ &heapallindexed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Initialize GIST state files needed to perform.
+ * This initialized bloom filter and snapshot.
+ */
+static void
+giststate_init_heapallindexed(Relation rel, GistCheckState * result)
+{
+ int64 total_pages;
+ int64 total_elems;
+ uint64 seed;
+
+ /*
+ * Size Bloom filter based on estimated number of tuples in index. This
+ * logic is similar to B-tree, see verify_btree.c .
+ */
+ total_pages = result->totalblocks;
+ total_elems = Max(total_pages * (MaxOffsetNumber / 5),
+ (int64) rel->rd_rel->reltuples);
+ seed = pg_prng_uint64(&pg_global_prng_state);
+ result->filter = bloom_create(total_elems, maintenance_work_mem, seed);
+
+ result->snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+
+ /*
+ * GetTransactionSnapshot() always acquires a new MVCC snapshot in READ
+ * COMMITTED mode. A new snapshot is guaranteed to have all the entries
+ * it requires in the index.
+ *
+ * We must defend against the possibility that an old xact snapshot was
+ * returned at higher isolation levels when that snapshot is not safe for
+ * index scans of the target index. This is possible when the snapshot
+ * sees tuples that are before the index's indcheckxmin horizon. Throwing
+ * an error here should be very rare. It doesn't seem worth using a
+ * secondary snapshot to avoid this.
+ */
+ if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
+ !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+ result->snapshot->xmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("index \"%s\" cannot be verified using transaction snapshot",
+ RelationGetRelationName(rel))));
+}
+
+/*
+ * Main entry point for GiST check.
+ *
+ * This function verifies that tuples of internal pages cover all
+ * the key space of each tuple on the leaf page. To do this we invoke
+ * gist_check_internal_page() for every internal page.
+ *
+ * This check allocates memory context and scans through
+ * GiST graph. This scan is performed in a depth-first search using a stack of
+ * GistScanItem-s. Initially this stack contains only the root block number. On
+ * each iteration the top block number is replaced by referenced block numbers.
+ *
+ *
+ * gist_check_internal_page() in its turn takes every tuple and tries to
+ * adjust it by tuples on the referenced child page. Parent gist tuple should
+ * never require any adjustments.
+ */
+static void
+gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly)
+{
+ BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+ GistScanItem *stack;
+ MemoryContext mctx;
+ MemoryContext oldcontext;
+ GISTSTATE *state;
+ bool heapallindexed = *((bool *) callback_state);
+ GistCheckState *check_state = palloc0(sizeof(GistCheckState));
+
+ mctx = AllocSetContextCreate(CurrentMemoryContext,
+ "amcheck context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(mctx);
+
+ state = initGISTstate(rel);
+
+ check_state->state = state;
+ check_state->rel = rel;
+ check_state->heaprel = heaprel;
+
+ /*
+ * We don't know the height of the tree yet, but as soon as we encounter a
+ * leaf page, we will set 'leafdepth' to its depth.
+ */
+ check_state->leafdepth = -1;
+
+ check_state->totalblocks = RelationGetNumberOfBlocks(rel);
+ /* report every 100 blocks or 5%, whichever is bigger */
+ check_state->deltablocks = Max(check_state->totalblocks / 20, 100);
+
+ if (heapallindexed)
+ giststate_init_heapallindexed(rel, check_state);
+
+ /* Start the scan at the root page */
+ stack = (GistScanItem *) palloc0(sizeof(GistScanItem));
+ stack->depth = 0;
+ stack->parenttup = NULL;
+ stack->parentblk = InvalidBlockNumber;
+ stack->parentlsn = InvalidXLogRecPtr;
+ stack->blkno = GIST_ROOT_BLKNO;
+
+ /*
+ * This GiST scan is effectively an "old" VACUUM version before commit
+ * fe280694d which introduced physical order scanning.
+ */
+
+ while (stack)
+ {
+ GistScanItem *stack_next;
+ Buffer buffer;
+ Page page;
+ XLogRecPtr lsn;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Report progress */
+ if (check_state->scannedblocks > check_state->reportedblocks +
+ check_state->deltablocks)
+ {
+ elog(DEBUG1, "verified level %u blocks of approximately %u total",
+ check_state->scannedblocks, check_state->totalblocks);
+ check_state->reportedblocks = check_state->scannedblocks;
+ }
+ check_state->scannedblocks++;
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+ RBM_NORMAL, strategy);
+ LockBuffer(buffer, GIST_SHARE);
+ page = (Page) BufferGetPage(buffer);
+ lsn = BufferGetLSNAtomic(buffer);
+
+ /* Do basic sanity checks on the page headers */
+ check_index_page(rel, buffer, stack->blkno);
+
+ /*
+ * It's possible that the page was split since we looked at the
+ * parent, so that we didn't miss the downlink of the right sibling
+ * when we scanned the parent. If so, add the right sibling to the
+ * stack now.
+ */
+ if (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page))
+ {
+ /* split page detected, install right link to the stack */
+ GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+
+ ptr->depth = stack->depth;
+ ptr->parenttup = CopyIndexTuple(stack->parenttup);
+ ptr->parentblk = stack->parentblk;
+ ptr->parentlsn = stack->parentlsn;
+ ptr->blkno = GistPageGetOpaque(page)->rightlink;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+
+ gist_check_page(check_state, stack, page, heapallindexed, strategy);
+
+ if (!GistPageIsLeaf(page))
+ {
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ /* Internal page, so recurse to the child */
+ GistScanItem *ptr;
+ ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+ ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+ ptr->depth = stack->depth + 1;
+ ptr->parenttup = CopyIndexTuple(idxtuple);
+ ptr->parentblk = stack->blkno;
+ ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+ ptr->parentlsn = lsn;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ }
+
+ LockBuffer(buffer, GIST_UNLOCK);
+ ReleaseBuffer(buffer);
+
+ /* Step to next item in the queue */
+ stack_next = stack->next;
+ if (stack->parenttup)
+ pfree(stack->parenttup);
+ pfree(stack);
+ stack = stack_next;
+ }
+
+ if (heapallindexed)
+ {
+ IndexInfo *indexinfo = BuildIndexInfo(rel);
+ TableScanDesc scan;
+
+ scan = table_beginscan_strat(heaprel, /* relation */
+ check_state->snapshot, /* snapshot */
+ 0, /* number of keys */
+ NULL, /* scan key */
+ true, /* buffer access strategy OK */
+ true); /* syncscan OK? */
+
+ /*
+ * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY.
+ */
+ indexinfo->ii_Concurrent = true;
+
+ indexinfo->ii_Unique = false;
+ indexinfo->ii_ExclusionOps = NULL;
+ indexinfo->ii_ExclusionProcs = NULL;
+ indexinfo->ii_ExclusionStrats = NULL;
+
+ elog(DEBUG1, "verifying that tuples from index \"%s\" are present in \"%s\"",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(heaprel));
+
+ table_index_build_scan(heaprel, rel, indexinfo, true, false,
+ gist_tuple_present_callback, (void *) check_state, scan);
+
+ ereport(DEBUG1,
+ (errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set",
+ check_state->heaptuplespresent,
+ RelationGetRelationName(heaprel),
+ 100.0 * bloom_prop_bits_set(check_state->filter))));
+
+ UnregisterSnapshot(check_state->snapshot);
+ bloom_free(check_state->filter);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(mctx);
+ pfree(check_state);
+}
+
+static void
+gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed, BufferAccessStrategy strategy)
+{
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ /* Check that the tree has the same height in all branches */
+ if (GistPageIsLeaf(page))
+ {
+ if (check_state->leafdepth == -1)
+ check_state->leafdepth = stack->depth;
+ else if (stack->depth != check_state->leafdepth)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+ RelationGetRelationName(check_state->rel), stack->blkno)));
+ }
+
+ /*
+ * Check that each tuple looks valid, and is consistent with the downlink
+ * we followed when we stepped on this page.
+ */
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ ItemId iid = PageGetItemIdCareful(check_state->rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+ IndexTuple tmpTuple = NULL;
+
+ /*
+ * Check that it's not a leftover invalid tuple from pre-9.1 See also
+ * gistdoinsert() and gistbulkdelete() handling of such tuples. We do
+ * consider it an error here.
+ */
+ if (GistTupleIsInvalid(idxtuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("index \"%s\" contains an inner tuple marked as invalid, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i),
+ errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
+ errhint("Please REINDEX it.")));
+
+ if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+
+ /*
+ * Check if this tuple is consistent with the downlink in the parent.
+ */
+ if (stack->parenttup)
+ tmpTuple = gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state);
+
+ if (tmpTuple)
+ {
+ /*
+ * There was a discrepancy between parent and child tuples. We
+ * need to verify it is not a result of concurrent call of
+ * gistplacetopage(). So, lock parent and try to find a downlink for
+ * current page. It may be missing due to concurrent page split,
+ * this is OK.
+ *
+ * Note that when we acquire parent tuple now we hold lock for both
+ * parent and child buffers. Thus the parent tuple must include the
+ * keyspace of the child.
+ */
+
+ pfree(tmpTuple);
+ pfree(stack->parenttup);
+ stack->parenttup = gist_refind_parent(check_state->rel, stack->parentblk,
+ stack->blkno, strategy);
+
+ /* We found it - make a final check before failing */
+ if (!stack->parenttup)
+ elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
+ stack->blkno, stack->parentblk);
+ else if (gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+ else
+ {
+ /*
+ * But now it is properly adjusted - nothing to do here.
+ */
+ }
+ }
+
+ if (GistPageIsLeaf(page))
+ {
+ if (heapallindexed)
+ {
+ IndexTuple norm;
+
+ norm = gistFormNormalizedTuple(check_state, idxtuple);
+
+ bloom_add_element(check_state->filter,
+ (unsigned char *) norm,
+ IndexTupleSize(norm));
+
+ /* Be tidy */
+ if (norm != idxtuple)
+ pfree(norm);
+ }
+ }
+ else
+ {
+ OffsetNumber off = ItemPointerGetOffsetNumber(&(idxtuple->t_tid));
+
+ if (off != TUPLE_IS_VALID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has on page %u offset %u has item id not pointing to 0xffff, but %hu",
+ RelationGetRelationName(check_state->rel), stack->blkno, i, off)));
+ }
+ }
+}
+
+/*
+ * gistFormNormalizedTuple - analogue to gistFormTuple, but performs deTOASTing
+ * of all included data (for covering indexes). While we do not expect
+ * toasted attributes in normal indexes, this can happen as a result of
+ * intervention into system catalog. Detoasting of key attributes is expected
+ * to be done by opclass decompression methods, if the indexed type might be
+ * toasted.
+ */
+static IndexTuple
+gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup)
+{
+ return amcheck_normalize_tuple(giststate->rel, itup);
+}
+
+static void
+gist_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
+ bool *isnull, bool tupleIsAlive, void *checkstate)
+{
+ GistCheckState *state = (GistCheckState *) checkstate;
+ IndexTuple itup, norm;
+ Datum compatt[INDEX_MAX_KEYS];
+
+ /* Generate a normalized index tuple for fingerprinting */
+ gistCompressValues(state->state, index, values, isnull, true, compatt);
+ itup = index_form_tuple(RelationGetDescr(index), compatt, isnull);
+ itup->t_tid = *tid;
+
+ norm = gistFormNormalizedTuple(state, itup);
+
+ /* Probe Bloom filter -- tuple should be present */
+ if (bloom_lacks_element(state->filter, (unsigned char *) norm,
+ IndexTupleSize(norm)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
+ ItemPointerGetBlockNumber(&(norm->t_tid)),
+ ItemPointerGetOffsetNumber(&(norm->t_tid)),
+ RelationGetRelationName(state->heaprel),
+ RelationGetRelationName(state->rel))));
+
+ state->heaptuplespresent++;
+
+ pfree(itup);
+ /* Be tidy */
+ if (norm != itup)
+ pfree(norm);
+}
+
+/*
+ * check_index_page - verification of basic invariants about GiST page data
+ * This function does not do any tuple analysis.
+ */
+static void
+check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
+{
+ Page page = BufferGetPage(buffer);
+
+ gistcheckpage(rel, buffer);
+
+ if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has corrupted page %u",
+ RelationGetRelationName(rel), blockNo)));
+
+ if (GistPageIsDeleted(page))
+ {
+ if (!GistPageIsLeaf(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted internal page %u",
+ RelationGetRelationName(rel), blockNo)));
+ if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted page %u with tuples",
+ RelationGetRelationName(rel), blockNo)));
+ }
+ else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has page %u with exceeding count of tuples",
+ RelationGetRelationName(rel), blockNo)));
+}
+
+/*
+ * Try to re-find the downlink pointing to 'blkno', in 'parentblkno'.
+ *
+ * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
+ * returns NULL.
+ */
+static IndexTuple
+gist_refind_parent(Relation rel,
+ BlockNumber parentblkno, BlockNumber childblkno,
+ BufferAccessStrategy strategy)
+{
+ Buffer parentbuf;
+ Page parentpage;
+ OffsetNumber parent_maxoff,
+ off;
+ IndexTuple result = NULL;
+
+ parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
+ strategy);
+
+ LockBuffer(parentbuf, GIST_SHARE);
+ parentpage = BufferGetPage(parentbuf);
+
+ if (GistPageIsLeaf(parentpage))
+ {
+ /*
+ * Currently GiST never deletes internal pages, thus they can never
+ * become leaf.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" internal page %u became leaf",
+ RelationGetRelationName(rel), parentblkno)));
+ }
+
+ parent_maxoff = PageGetMaxOffsetNumber(parentpage);
+ for (off = FirstOffsetNumber; off <= parent_maxoff; off = OffsetNumberNext(off))
+ {
+ ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, off);
+ IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid);
+
+ if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
+ {
+ /*
+ * Found it! Make a copy and return it while both parent and child
+ * pages are locked. This guarantees that at this particular
+ * moment tuples must be coherent to each other.
+ */
+ result = CopyIndexTuple(itup);
+ break;
+ }
+ }
+
+ UnlockReleaseBuffer(parentbuf);
+
+ return result;
+}
+
+static ItemId
+PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
+ OffsetNumber offset)
+{
+ ItemId itemid = PageGetItemId(page, offset);
+
+ if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
+ BLCKSZ - MAXALIGN(sizeof(GISTPageOpaqueData)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("line pointer points past end of tuple space in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ /*
+ * Verify that the line pointer isn't LP_REDIRECT or LP_UNUSED, since gist
+ * never uses either. Verify that the line pointer has storage, too, since
+ * even LP_DEAD items should.
+ */
+ if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
+ ItemIdGetLength(itemid) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("invalid line pointer storage in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ return itemid;
+}
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index 08006856579..917a4936b6d 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -208,6 +208,25 @@ ORDER BY c.relpages DESC LIMIT 10;
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ <function>gist_index_check(index regclass, heapallindexed boolean) returns void</function>
+ <indexterm>
+ <primary>gist_index_check</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ <function>gist_index_check</function> tests that its target GiST
+ has consistent parent-child tuples relations (no parent tuples
+ require tuple adjustment) and page graph respects balanced-tree
+ invariants (internal pages reference only leaf page or only internal
+ pages).
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<tip>
<para>
--
2.43.0
Hi, Roman Khapov
On Mon, 12 Jan 2026 at 01:36, Roman Khapov <rkhapov@yandex-team.ru> wrote:
Hi!
Thanks for your review!We discussed offline about the patch, and I decided to make a review of it and fixes
up to your comments.
Thank you for updating the patches.
v2026-01-10-0001
================1.
I'm pretty sure access/heaptoast.h is not needed by verify_nbtree.c.In fact this include is necessary because of line verify_common.c:236 which
uses TOAST_INDEX_TARGET
Yeah, verify_common.c does require the header, but what I meant was that
verify_nbtree.c no longer needs it.
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index c1e24338361..426e23d2960 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -23,7 +23,6 @@
*/
#include "postgres.h"
-#include "access/heaptoast.h"
#include "access/htup_details.h"
#include "access/nbtree.h"
#include "access/table.h"
--
Regards,
Japin Li
ChengDu WenWu Information Technology Co., Ltd.
Yeah, verify_common.c does require the header, but what I meant was that
verify_nbtree.c no longer needs it.
Oh, thanks, now I see..
Updated the patches.
--
Best regards,
Roman Khapov
Attachments:
v2026-01-12-v2-0001-Move-normalize-tuple-logic-from-nbtch.patchapplication/octet-stream; name=v2026-01-12-v2-0001-Move-normalize-tuple-logic-from-nbtch.patch; x-unix-mode=0644Download
From 55d69f702bcdc63dd911bb7d6a9e8e7799c4cac1 Mon Sep 17 00:00:00 2001
From: reshke <reshke@double.cloud>
Date: Wed, 22 Oct 2025 15:29:26 +0000
Subject: [PATCH v2026-01-12-v2 1/2] Move `normalize tuple` logic from nbtcheck
to verify_common
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Preparational patch to reuse index tuple normalize logic
in GiST amcheck.
Reviewed-by: Sergey Dudoladov <sergey.dudoladov@gmail.com>
Reviewed-by: Paul A Jungwirth <pj@illuminatedcomputing.com>
Reviewed-by: Andrey Borodin <x4mmm@yandex-team.ru>
Reviewed-by: Miłosz Bieniek <bieniek.milosz@proton.me>
Reviewed-by: Japin Li <japinli@hotmail.com>
Reviewed-by: Roman Khapov <r.khapov@ya.ru>
Discussion: https:://postgr.es/m/5FC1B5B6-FB35-44A2-AB62-632F14E958C5@yandex-team.ru
---
contrib/amcheck/verify_common.c | 112 ++++++++++++++++++++++++++++++++
contrib/amcheck/verify_common.h | 4 ++
contrib/amcheck/verify_nbtree.c | 108 +-----------------------------
3 files changed, 117 insertions(+), 107 deletions(-)
diff --git a/contrib/amcheck/verify_common.c b/contrib/amcheck/verify_common.c
index 54ce901716b..9a617ae3bb4 100644
--- a/contrib/amcheck/verify_common.c
+++ b/contrib/amcheck/verify_common.c
@@ -13,6 +13,7 @@
#include "postgres.h"
#include "access/genam.h"
+#include "access/heaptoast.h"
#include "access/table.h"
#include "access/tableam.h"
#include "verify_common.h"
@@ -189,3 +190,114 @@ index_checkable(Relation rel, Oid am_id)
return amcheck_index_mainfork_expected(rel);
}
+
+IndexTuple
+amcheck_normalize_tuple(Relation irel, IndexTuple itup)
+{
+ TupleDesc tupleDescriptor = RelationGetDescr(irel);
+ Datum normalized[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ bool need_free[INDEX_MAX_KEYS];
+ bool formnewtup = false;
+ IndexTuple reformed;
+ int i;
+
+ /* Easy case: It's immediately clear that tuple has no varlena datums */
+ if (!IndexTupleHasVarwidths(itup))
+ return itup;
+
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ {
+ Form_pg_attribute att;
+
+ att = TupleDescAttr(tupleDescriptor, i);
+
+ /* Assume untoasted/already normalized datum initially */
+ need_free[i] = false;
+ normalized[i] = index_getattr(itup, att->attnum,
+ tupleDescriptor,
+ &isnull[i]);
+ if (att->attbyval || att->attlen != -1 || isnull[i])
+ continue;
+
+ /*
+ * Callers always pass a tuple that could safely be inserted into the
+ * index without further processing, so an external varlena header
+ * should never be encountered here
+ */
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
+ ItemPointerGetBlockNumber(&(itup->t_tid)),
+ ItemPointerGetOffsetNumber(&(itup->t_tid)),
+ RelationGetRelationName(irel))));
+ else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
+ VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
+ (att->attstorage == TYPSTORAGE_EXTENDED ||
+ att->attstorage == TYPSTORAGE_MAIN))
+ {
+ /*
+ * This value will be compressed by index_form_tuple() with the
+ * current storage settings. We may be here because this tuple
+ * was formed with different storage settings. So, force forming.
+ */
+ formnewtup = true;
+ }
+ else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
+ {
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
+ need_free[i] = true;
+ }
+
+ /*
+ * Short tuples may have 1B or 4B header. Convert 4B header of short
+ * tuples to 1B
+ */
+ else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
+ {
+ /* convert to short varlena */
+ Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
+ char *data = palloc(len);
+
+ SET_VARSIZE_SHORT(data, len);
+ memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
+
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(data);
+ need_free[i] = true;
+ }
+ }
+
+ /*
+ * Easier case: Tuple has varlena datums, none of which are compressed or
+ * short with 4B header
+ */
+ if (!formnewtup)
+ return itup;
+
+ /*
+ * Hard case: Tuple had compressed varlena datums that necessitate
+ * creating normalized version of the tuple from uncompressed input datums
+ * (normalized input datums). This is rather naive, but shouldn't be
+ * necessary too often.
+ *
+ * In the heap, tuples may contain short varlena datums with both 1B
+ * header and 4B headers. But the corresponding index tuple should always
+ * have such varlena's with 1B headers. So, if there is a short varlena
+ * with 4B header, we need to convert it for fingerprinting.
+ *
+ * Note that we rely on deterministic index_form_tuple() TOAST compression
+ * of normalized input.
+ */
+ reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
+ reformed->t_tid = itup->t_tid;
+
+ /* Cannot leak memory here */
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ if (need_free[i])
+ pfree(DatumGetPointer(normalized[i]));
+
+ return reformed;
+}
\ No newline at end of file
diff --git a/contrib/amcheck/verify_common.h b/contrib/amcheck/verify_common.h
index 4c4ddc01aa7..916bf30d193 100644
--- a/contrib/amcheck/verify_common.h
+++ b/contrib/amcheck/verify_common.h
@@ -10,6 +10,8 @@
*
*-------------------------------------------------------------------------
*/
+
+#include "access/itup.h"
#include "storage/bufpage.h"
#include "storage/lmgr.h"
#include "storage/lockdefs.h"
@@ -26,3 +28,5 @@ extern void amcheck_lock_relation_and_check(Oid indrelid,
Oid am_id,
IndexDoCheckCallback check,
LOCKMODE lockmode, void *state);
+
+extern IndexTuple amcheck_normalize_tuple(Relation irel, IndexTuple itup);
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index 7733ab202e3..426e23d2960 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -23,7 +23,6 @@
*/
#include "postgres.h"
-#include "access/heaptoast.h"
#include "access/htup_details.h"
#include "access/nbtree.h"
#include "access/table.h"
@@ -2845,115 +2844,10 @@ bt_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
static IndexTuple
bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
{
- TupleDesc tupleDescriptor = RelationGetDescr(state->rel);
- Datum normalized[INDEX_MAX_KEYS];
- bool isnull[INDEX_MAX_KEYS];
- bool need_free[INDEX_MAX_KEYS];
- bool formnewtup = false;
- IndexTuple reformed;
- int i;
-
/* Caller should only pass "logical" non-pivot tuples here */
Assert(!BTreeTupleIsPosting(itup) && !BTreeTupleIsPivot(itup));
- /* Easy case: It's immediately clear that tuple has no varlena datums */
- if (!IndexTupleHasVarwidths(itup))
- return itup;
-
- for (i = 0; i < tupleDescriptor->natts; i++)
- {
- Form_pg_attribute att;
-
- att = TupleDescAttr(tupleDescriptor, i);
-
- /* Assume untoasted/already normalized datum initially */
- need_free[i] = false;
- normalized[i] = index_getattr(itup, att->attnum,
- tupleDescriptor,
- &isnull[i]);
- if (att->attbyval || att->attlen != -1 || isnull[i])
- continue;
-
- /*
- * Callers always pass a tuple that could safely be inserted into the
- * index without further processing, so an external varlena header
- * should never be encountered here
- */
- if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
- ItemPointerGetBlockNumber(&(itup->t_tid)),
- ItemPointerGetOffsetNumber(&(itup->t_tid)),
- RelationGetRelationName(state->rel))));
- else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
- VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
- (att->attstorage == TYPSTORAGE_EXTENDED ||
- att->attstorage == TYPSTORAGE_MAIN))
- {
- /*
- * This value will be compressed by index_form_tuple() with the
- * current storage settings. We may be here because this tuple
- * was formed with different storage settings. So, force forming.
- */
- formnewtup = true;
- }
- else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
- {
- formnewtup = true;
- normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
- need_free[i] = true;
- }
-
- /*
- * Short tuples may have 1B or 4B header. Convert 4B header of short
- * tuples to 1B
- */
- else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
- {
- /* convert to short varlena */
- Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
- char *data = palloc(len);
-
- SET_VARSIZE_SHORT(data, len);
- memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
-
- formnewtup = true;
- normalized[i] = PointerGetDatum(data);
- need_free[i] = true;
- }
- }
-
- /*
- * Easier case: Tuple has varlena datums, none of which are compressed or
- * short with 4B header
- */
- if (!formnewtup)
- return itup;
-
- /*
- * Hard case: Tuple had compressed varlena datums that necessitate
- * creating normalized version of the tuple from uncompressed input datums
- * (normalized input datums). This is rather naive, but shouldn't be
- * necessary too often.
- *
- * In the heap, tuples may contain short varlena datums with both 1B
- * header and 4B headers. But the corresponding index tuple should always
- * have such varlena's with 1B headers. So, if there is a short varlena
- * with 4B header, we need to convert it for fingerprinting.
- *
- * Note that we rely on deterministic index_form_tuple() TOAST compression
- * of normalized input.
- */
- reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
- reformed->t_tid = itup->t_tid;
-
- /* Cannot leak memory here */
- for (i = 0; i < tupleDescriptor->natts; i++)
- if (need_free[i])
- pfree(DatumGetPointer(normalized[i]));
-
- return reformed;
+ return amcheck_normalize_tuple(state->rel, itup);
}
/*
--
2.43.0
v2026-01-12-v2-0002-Add-gist_index_check-function-to-veri.patchapplication/octet-stream; name=v2026-01-12-v2-0002-Add-gist_index_check-function-to-veri.patch; x-unix-mode=0644Download
From ac17ea40aa866e9114b79c41cb7a5dc3fa455bf5 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Wed, 18 Jun 2025 20:33:28 +0300
Subject: [PATCH v2026-01-12-v2 2/2] Add gist_index_check() function to verify
GiST index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This function traverses GiST with a depth-first search and checks
that all downlink tuples are included into parent tuple keyspace.
This traverse takes lock of any page until some discrepancy is found.
To re-check a suspicious pair of parent and child tuples it acquires
locks on both parent and child pages in the same order as page
split does.
Author: Andrey Borodin <amborodin@acm.org>
Author: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-By: José Villanova <jose.arthur@gmail.com>
Reviewed-By: Aleksander Alekseev <aleksander@timescale.com>
Reviewed-By: Nikolay Samokhvalov <samokhvalov@gmail.com>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Tomas Vondra <tomas.vondra@enterprisedb.com>
Reviewed-By: Arseniy Mukhin <arseniy(dot)mukhin(dot)dev(at)gmail(dot)com>
Reviewed-By: Miłosz Bieniek <bieniek.milosz@proton.me>
Reviewed-By: Paul A Jungwirth <pj(at)illuminatedcomputing(dot)com>
Reviewed-by: Sergey Dudoladov <sergey.dudoladov@gmail.com>
Reviewed-by: Japin Li <japinli@hotmail.com>
Reviewed-By: Roman Khapov <r.khapov@ya.ru>
Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru
Discussion: https:://postgr.es/m/5FC1B5B6-FB35-44A2-AB62-632F14E958C5@yandex-team.ru
---
contrib/amcheck/Makefile | 5 +-
contrib/amcheck/amcheck--1.5--1.6.sql | 14 +
contrib/amcheck/amcheck.control | 2 +-
contrib/amcheck/expected/check_gist.out | 147 ++++++
contrib/amcheck/meson.build | 3 +
contrib/amcheck/sql/check_gist.sql | 65 +++
contrib/amcheck/verify_gist.c | 665 ++++++++++++++++++++++++
doc/src/sgml/amcheck.sgml | 19 +
8 files changed, 917 insertions(+), 3 deletions(-)
create mode 100644 contrib/amcheck/amcheck--1.5--1.6.sql
create mode 100644 contrib/amcheck/expected/check_gist.out
create mode 100644 contrib/amcheck/sql/check_gist.sql
create mode 100644 contrib/amcheck/verify_gist.c
diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index 1b7a63cbaa4..be28f0b8275 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -5,15 +5,16 @@ OBJS = \
$(WIN32RES) \
verify_common.o \
verify_gin.o \
+ verify_gist.o \
verify_heapam.o \
verify_nbtree.o
EXTENSION = amcheck
DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \
- amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql
+ amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql amcheck--1.5--1.6.sql
PGFILEDESC = "amcheck - function for verifying relation integrity"
-REGRESS = check check_btree check_gin check_heap
+REGRESS = check check_btree check_gin check_gist check_heap
EXTRA_INSTALL = contrib/pg_walinspect
TAP_TESTS = 1
diff --git a/contrib/amcheck/amcheck--1.5--1.6.sql b/contrib/amcheck/amcheck--1.5--1.6.sql
new file mode 100644
index 00000000000..e2d4ca60d5e
--- /dev/null
+++ b/contrib/amcheck/amcheck--1.5--1.6.sql
@@ -0,0 +1,14 @@
+/* contrib/amcheck/amcheck--1.5--1.6.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.6'" to load this file. \quit
+
+
+-- gist_index_check()
+--
+CREATE FUNCTION gist_index_check(index regclass, heapallindexed boolean)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'gist_index_check'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION gist_index_check(regclass, boolean) FROM PUBLIC;
diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control
index c8ba6d7c9bc..2f329ef2cf4 100644
--- a/contrib/amcheck/amcheck.control
+++ b/contrib/amcheck/amcheck.control
@@ -1,5 +1,5 @@
# amcheck extension
comment = 'functions for verifying relation integrity'
-default_version = '1.5'
+default_version = '1.6'
module_pathname = '$libdir/amcheck'
relocatable = true
diff --git a/contrib/amcheck/expected/check_gist.out b/contrib/amcheck/expected/check_gist.out
new file mode 100644
index 00000000000..e229f38aa48
--- /dev/null
+++ b/contrib/amcheck/expected/check_gist.out
@@ -0,0 +1,147 @@
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gist_check;
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+ attstorage
+------------
+ x
+(1 row)
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE toast_bug;
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index d5137ef691d..18a87fa8f8c 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -3,6 +3,7 @@
amcheck_sources = files(
'verify_common.c',
'verify_gin.c',
+ 'verify_gist.c',
'verify_heapam.c',
'verify_nbtree.c',
)
@@ -27,6 +28,7 @@ install_data(
'amcheck--1.2--1.3.sql',
'amcheck--1.3--1.4.sql',
'amcheck--1.4--1.5.sql',
+ 'amcheck--1.5--1.6.sql',
kwargs: contrib_data_args,
)
@@ -39,6 +41,7 @@ tests += {
'check',
'check_btree',
'check_gin',
+ 'check_gist',
'check_heap',
],
},
diff --git a/contrib/amcheck/sql/check_gist.sql b/contrib/amcheck/sql/check_gist.sql
new file mode 100644
index 00000000000..bc5737a7710
--- /dev/null
+++ b/contrib/amcheck/sql/check_gist.sql
@@ -0,0 +1,65 @@
+
+SELECT setseed(1);
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+-- cleanup
+DROP TABLE gist_check;
+
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
+
+-- cleanup
+DROP TABLE toast_bug;
\ No newline at end of file
diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c
new file mode 100644
index 00000000000..0d9a50c9dd4
--- /dev/null
+++ b/contrib/amcheck/verify_gist.c
@@ -0,0 +1,665 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_gist.c
+ * Verifies the integrity of GiST indexes based on invariants.
+ *
+ * Verification checks that all paths in GiST graph contain
+ * consistent keys: tuples on parent pages consistently include tuples
+ * from children pages. Also, verification checks graph invariants:
+ * internal page must have at least one downlink, internal page can
+ * reference either only leaf pages or only internal pages.
+ *
+ *
+ * Copyright (c) 2017-2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/verify_gist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gist_private.h"
+#include "access/heaptoast.h"
+#include "access/tableam.h"
+#include "catalog/index.h"
+#include "catalog/pg_am.h"
+#include "common/pg_prng.h"
+#include "lib/bloomfilter.h"
+#include "verify_common.h"
+#include "utils/memutils.h"
+
+
+/*
+ * GistScanItem represents one item of depth-first scan of GiST index.
+ */
+typedef struct GistScanItem
+{
+ int depth;
+
+ /* Referenced block number to check next */
+ BlockNumber blkno;
+
+ /*
+ * Correctness of this parent tuple will be checked against contents of
+ * referenced page. This tuple will be NULL for the root block.
+ */
+ IndexTuple parenttup;
+
+ /*
+ * LSN to handle concurrent scans of the page. It's necessary to avoid
+ * missing some subtrees from the page that was split just before we read it.
+ */
+ XLogRecPtr parentlsn;
+
+ /*
+ * Reference to parent page for re-locking in case of found parent-child
+ * tuple discrepancies.
+ */
+ BlockNumber parentblk;
+
+ /* Pointer to the next stack item. */
+ struct GistScanItem *next;
+} GistScanItem;
+
+typedef struct GistCheckState
+{
+ /* GiST state */
+ GISTSTATE *state;
+ /* Bloom filter fingerprints index tuples */
+ bloom_filter *filter;
+
+ Snapshot snapshot;
+ Relation rel;
+ Relation heaprel;
+
+ /* Debug counter for reporting percentage of work already done */
+ int64 heaptuplespresent;
+
+ /* progress reporting stuff */
+ BlockNumber totalblocks;
+ BlockNumber reportedblocks;
+ BlockNumber scannedblocks;
+ BlockNumber deltablocks;
+
+ int leafdepth;
+} GistCheckState;
+
+PG_FUNCTION_INFO_V1(gist_index_check);
+
+static void giststate_init_heapallindexed(Relation rel, GistCheckState * result);
+static void gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly);
+static void gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed,
+ BufferAccessStrategy strategy);
+static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
+static IndexTuple gist_refind_parent(Relation rel, BlockNumber parentblkno,
+ BlockNumber childblkno,
+ BufferAccessStrategy strategy);
+static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block,
+ Page page, OffsetNumber offset);
+static void gist_tuple_present_callback(Relation index, ItemPointer tid,
+ Datum *values, bool *isnull,
+ bool tupleIsAlive, void *checkstate);
+static IndexTuple gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup);
+
+/*
+ * gist_index_check(index regclass)
+ *
+ * Verify integrity of GiST index.
+ *
+ * Acquires AccessShareLock on heap & index relations.
+ */
+Datum
+gist_index_check(PG_FUNCTION_ARGS)
+{
+ Oid indrelid = PG_GETARG_OID(0);
+ bool heapallindexed = PG_GETARG_BOOL(1);
+
+ amcheck_lock_relation_and_check(indrelid,
+ GIST_AM_OID,
+ gist_check_parent_keys_consistency,
+ AccessShareLock,
+ &heapallindexed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Initialize GIST state files needed to perform.
+ * This initialized bloom filter and snapshot.
+ */
+static void
+giststate_init_heapallindexed(Relation rel, GistCheckState * result)
+{
+ int64 total_pages;
+ int64 total_elems;
+ uint64 seed;
+
+ /*
+ * Size Bloom filter based on estimated number of tuples in index. This
+ * logic is similar to B-tree, see verify_btree.c .
+ */
+ total_pages = result->totalblocks;
+ total_elems = Max(total_pages * (MaxOffsetNumber / 5),
+ (int64) rel->rd_rel->reltuples);
+ seed = pg_prng_uint64(&pg_global_prng_state);
+ result->filter = bloom_create(total_elems, maintenance_work_mem, seed);
+
+ result->snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+
+ /*
+ * GetTransactionSnapshot() always acquires a new MVCC snapshot in READ
+ * COMMITTED mode. A new snapshot is guaranteed to have all the entries
+ * it requires in the index.
+ *
+ * We must defend against the possibility that an old xact snapshot was
+ * returned at higher isolation levels when that snapshot is not safe for
+ * index scans of the target index. This is possible when the snapshot
+ * sees tuples that are before the index's indcheckxmin horizon. Throwing
+ * an error here should be very rare. It doesn't seem worth using a
+ * secondary snapshot to avoid this.
+ */
+ if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
+ !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+ result->snapshot->xmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("index \"%s\" cannot be verified using transaction snapshot",
+ RelationGetRelationName(rel))));
+}
+
+/*
+ * Main entry point for GiST check.
+ *
+ * This function verifies that tuples of internal pages cover all
+ * the key space of each tuple on the leaf page. To do this we invoke
+ * gist_check_internal_page() for every internal page.
+ *
+ * This check allocates memory context and scans through
+ * GiST graph. This scan is performed in a depth-first search using a stack of
+ * GistScanItem-s. Initially this stack contains only the root block number. On
+ * each iteration the top block number is replaced by referenced block numbers.
+ *
+ *
+ * gist_check_internal_page() in its turn takes every tuple and tries to
+ * adjust it by tuples on the referenced child page. Parent gist tuple should
+ * never require any adjustments.
+ */
+static void
+gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly)
+{
+ BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+ GistScanItem *stack;
+ MemoryContext mctx;
+ MemoryContext oldcontext;
+ GISTSTATE *state;
+ bool heapallindexed = *((bool *) callback_state);
+ GistCheckState *check_state = palloc0(sizeof(GistCheckState));
+
+ mctx = AllocSetContextCreate(CurrentMemoryContext,
+ "amcheck context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(mctx);
+
+ state = initGISTstate(rel);
+
+ check_state->state = state;
+ check_state->rel = rel;
+ check_state->heaprel = heaprel;
+
+ /*
+ * We don't know the height of the tree yet, but as soon as we encounter a
+ * leaf page, we will set 'leafdepth' to its depth.
+ */
+ check_state->leafdepth = -1;
+
+ check_state->totalblocks = RelationGetNumberOfBlocks(rel);
+ /* report every 100 blocks or 5%, whichever is bigger */
+ check_state->deltablocks = Max(check_state->totalblocks / 20, 100);
+
+ if (heapallindexed)
+ giststate_init_heapallindexed(rel, check_state);
+
+ /* Start the scan at the root page */
+ stack = (GistScanItem *) palloc0(sizeof(GistScanItem));
+ stack->depth = 0;
+ stack->parenttup = NULL;
+ stack->parentblk = InvalidBlockNumber;
+ stack->parentlsn = InvalidXLogRecPtr;
+ stack->blkno = GIST_ROOT_BLKNO;
+
+ /*
+ * This GiST scan is effectively an "old" VACUUM version before commit
+ * fe280694d which introduced physical order scanning.
+ */
+
+ while (stack)
+ {
+ GistScanItem *stack_next;
+ Buffer buffer;
+ Page page;
+ XLogRecPtr lsn;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Report progress */
+ if (check_state->scannedblocks > check_state->reportedblocks +
+ check_state->deltablocks)
+ {
+ elog(DEBUG1, "verified level %u blocks of approximately %u total",
+ check_state->scannedblocks, check_state->totalblocks);
+ check_state->reportedblocks = check_state->scannedblocks;
+ }
+ check_state->scannedblocks++;
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+ RBM_NORMAL, strategy);
+ LockBuffer(buffer, GIST_SHARE);
+ page = (Page) BufferGetPage(buffer);
+ lsn = BufferGetLSNAtomic(buffer);
+
+ /* Do basic sanity checks on the page headers */
+ check_index_page(rel, buffer, stack->blkno);
+
+ /*
+ * It's possible that the page was split since we looked at the
+ * parent, so that we didn't miss the downlink of the right sibling
+ * when we scanned the parent. If so, add the right sibling to the
+ * stack now.
+ */
+ if (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page))
+ {
+ /* split page detected, install right link to the stack */
+ GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+
+ ptr->depth = stack->depth;
+ ptr->parenttup = CopyIndexTuple(stack->parenttup);
+ ptr->parentblk = stack->parentblk;
+ ptr->parentlsn = stack->parentlsn;
+ ptr->blkno = GistPageGetOpaque(page)->rightlink;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+
+ gist_check_page(check_state, stack, page, heapallindexed, strategy);
+
+ if (!GistPageIsLeaf(page))
+ {
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ /* Internal page, so recurse to the child */
+ GistScanItem *ptr;
+ ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+ ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+ ptr->depth = stack->depth + 1;
+ ptr->parenttup = CopyIndexTuple(idxtuple);
+ ptr->parentblk = stack->blkno;
+ ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+ ptr->parentlsn = lsn;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ }
+
+ LockBuffer(buffer, GIST_UNLOCK);
+ ReleaseBuffer(buffer);
+
+ /* Step to next item in the queue */
+ stack_next = stack->next;
+ if (stack->parenttup)
+ pfree(stack->parenttup);
+ pfree(stack);
+ stack = stack_next;
+ }
+
+ if (heapallindexed)
+ {
+ IndexInfo *indexinfo = BuildIndexInfo(rel);
+ TableScanDesc scan;
+
+ scan = table_beginscan_strat(heaprel, /* relation */
+ check_state->snapshot, /* snapshot */
+ 0, /* number of keys */
+ NULL, /* scan key */
+ true, /* buffer access strategy OK */
+ true); /* syncscan OK? */
+
+ /*
+ * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY.
+ */
+ indexinfo->ii_Concurrent = true;
+
+ indexinfo->ii_Unique = false;
+ indexinfo->ii_ExclusionOps = NULL;
+ indexinfo->ii_ExclusionProcs = NULL;
+ indexinfo->ii_ExclusionStrats = NULL;
+
+ elog(DEBUG1, "verifying that tuples from index \"%s\" are present in \"%s\"",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(heaprel));
+
+ table_index_build_scan(heaprel, rel, indexinfo, true, false,
+ gist_tuple_present_callback, (void *) check_state, scan);
+
+ ereport(DEBUG1,
+ (errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set",
+ check_state->heaptuplespresent,
+ RelationGetRelationName(heaprel),
+ 100.0 * bloom_prop_bits_set(check_state->filter))));
+
+ UnregisterSnapshot(check_state->snapshot);
+ bloom_free(check_state->filter);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(mctx);
+ pfree(check_state);
+}
+
+static void
+gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed, BufferAccessStrategy strategy)
+{
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ /* Check that the tree has the same height in all branches */
+ if (GistPageIsLeaf(page))
+ {
+ if (check_state->leafdepth == -1)
+ check_state->leafdepth = stack->depth;
+ else if (stack->depth != check_state->leafdepth)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+ RelationGetRelationName(check_state->rel), stack->blkno)));
+ }
+
+ /*
+ * Check that each tuple looks valid, and is consistent with the downlink
+ * we followed when we stepped on this page.
+ */
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ ItemId iid = PageGetItemIdCareful(check_state->rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+ IndexTuple tmpTuple = NULL;
+
+ /*
+ * Check that it's not a leftover invalid tuple from pre-9.1 See also
+ * gistdoinsert() and gistbulkdelete() handling of such tuples. We do
+ * consider it an error here.
+ */
+ if (GistTupleIsInvalid(idxtuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("index \"%s\" contains an inner tuple marked as invalid, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i),
+ errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
+ errhint("Please REINDEX it.")));
+
+ if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+
+ /*
+ * Check if this tuple is consistent with the downlink in the parent.
+ */
+ if (stack->parenttup)
+ tmpTuple = gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state);
+
+ if (tmpTuple)
+ {
+ /*
+ * There was a discrepancy between parent and child tuples. We
+ * need to verify it is not a result of concurrent call of
+ * gistplacetopage(). So, lock parent and try to find a downlink for
+ * current page. It may be missing due to concurrent page split,
+ * this is OK.
+ *
+ * Note that when we acquire parent tuple now we hold lock for both
+ * parent and child buffers. Thus the parent tuple must include the
+ * keyspace of the child.
+ */
+
+ pfree(tmpTuple);
+ pfree(stack->parenttup);
+ stack->parenttup = gist_refind_parent(check_state->rel, stack->parentblk,
+ stack->blkno, strategy);
+
+ /* We found it - make a final check before failing */
+ if (!stack->parenttup)
+ elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
+ stack->blkno, stack->parentblk);
+ else if (gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+ else
+ {
+ /*
+ * But now it is properly adjusted - nothing to do here.
+ */
+ }
+ }
+
+ if (GistPageIsLeaf(page))
+ {
+ if (heapallindexed)
+ {
+ IndexTuple norm;
+
+ norm = gistFormNormalizedTuple(check_state, idxtuple);
+
+ bloom_add_element(check_state->filter,
+ (unsigned char *) norm,
+ IndexTupleSize(norm));
+
+ /* Be tidy */
+ if (norm != idxtuple)
+ pfree(norm);
+ }
+ }
+ else
+ {
+ OffsetNumber off = ItemPointerGetOffsetNumber(&(idxtuple->t_tid));
+
+ if (off != TUPLE_IS_VALID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has on page %u offset %u has item id not pointing to 0xffff, but %hu",
+ RelationGetRelationName(check_state->rel), stack->blkno, i, off)));
+ }
+ }
+}
+
+/*
+ * gistFormNormalizedTuple - analogue to gistFormTuple, but performs deTOASTing
+ * of all included data (for covering indexes). While we do not expect
+ * toasted attributes in normal indexes, this can happen as a result of
+ * intervention into system catalog. Detoasting of key attributes is expected
+ * to be done by opclass decompression methods, if the indexed type might be
+ * toasted.
+ */
+static IndexTuple
+gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup)
+{
+ return amcheck_normalize_tuple(giststate->rel, itup);
+}
+
+static void
+gist_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
+ bool *isnull, bool tupleIsAlive, void *checkstate)
+{
+ GistCheckState *state = (GistCheckState *) checkstate;
+ IndexTuple itup, norm;
+ Datum compatt[INDEX_MAX_KEYS];
+
+ /* Generate a normalized index tuple for fingerprinting */
+ gistCompressValues(state->state, index, values, isnull, true, compatt);
+ itup = index_form_tuple(RelationGetDescr(index), compatt, isnull);
+ itup->t_tid = *tid;
+
+ norm = gistFormNormalizedTuple(state, itup);
+
+ /* Probe Bloom filter -- tuple should be present */
+ if (bloom_lacks_element(state->filter, (unsigned char *) norm,
+ IndexTupleSize(norm)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
+ ItemPointerGetBlockNumber(&(norm->t_tid)),
+ ItemPointerGetOffsetNumber(&(norm->t_tid)),
+ RelationGetRelationName(state->heaprel),
+ RelationGetRelationName(state->rel))));
+
+ state->heaptuplespresent++;
+
+ pfree(itup);
+ /* Be tidy */
+ if (norm != itup)
+ pfree(norm);
+}
+
+/*
+ * check_index_page - verification of basic invariants about GiST page data
+ * This function does not do any tuple analysis.
+ */
+static void
+check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
+{
+ Page page = BufferGetPage(buffer);
+
+ gistcheckpage(rel, buffer);
+
+ if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has corrupted page %u",
+ RelationGetRelationName(rel), blockNo)));
+
+ if (GistPageIsDeleted(page))
+ {
+ if (!GistPageIsLeaf(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted internal page %u",
+ RelationGetRelationName(rel), blockNo)));
+ if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted page %u with tuples",
+ RelationGetRelationName(rel), blockNo)));
+ }
+ else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has page %u with exceeding count of tuples",
+ RelationGetRelationName(rel), blockNo)));
+}
+
+/*
+ * Try to re-find the downlink pointing to 'blkno', in 'parentblkno'.
+ *
+ * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
+ * returns NULL.
+ */
+static IndexTuple
+gist_refind_parent(Relation rel,
+ BlockNumber parentblkno, BlockNumber childblkno,
+ BufferAccessStrategy strategy)
+{
+ Buffer parentbuf;
+ Page parentpage;
+ OffsetNumber parent_maxoff,
+ off;
+ IndexTuple result = NULL;
+
+ parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
+ strategy);
+
+ LockBuffer(parentbuf, GIST_SHARE);
+ parentpage = BufferGetPage(parentbuf);
+
+ if (GistPageIsLeaf(parentpage))
+ {
+ /*
+ * Currently GiST never deletes internal pages, thus they can never
+ * become leaf.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" internal page %u became leaf",
+ RelationGetRelationName(rel), parentblkno)));
+ }
+
+ parent_maxoff = PageGetMaxOffsetNumber(parentpage);
+ for (off = FirstOffsetNumber; off <= parent_maxoff; off = OffsetNumberNext(off))
+ {
+ ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, off);
+ IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid);
+
+ if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
+ {
+ /*
+ * Found it! Make a copy and return it while both parent and child
+ * pages are locked. This guarantees that at this particular
+ * moment tuples must be coherent to each other.
+ */
+ result = CopyIndexTuple(itup);
+ break;
+ }
+ }
+
+ UnlockReleaseBuffer(parentbuf);
+
+ return result;
+}
+
+static ItemId
+PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
+ OffsetNumber offset)
+{
+ ItemId itemid = PageGetItemId(page, offset);
+
+ if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
+ BLCKSZ - MAXALIGN(sizeof(GISTPageOpaqueData)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("line pointer points past end of tuple space in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ /*
+ * Verify that the line pointer isn't LP_REDIRECT or LP_UNUSED, since gist
+ * never uses either. Verify that the line pointer has storage, too, since
+ * even LP_DEAD items should.
+ */
+ if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
+ ItemIdGetLength(itemid) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("invalid line pointer storage in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ return itemid;
+}
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index 08006856579..917a4936b6d 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -208,6 +208,25 @@ ORDER BY c.relpages DESC LIMIT 10;
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ <function>gist_index_check(index regclass, heapallindexed boolean) returns void</function>
+ <indexterm>
+ <primary>gist_index_check</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ <function>gist_index_check</function> tests that its target GiST
+ has consistent parent-child tuples relations (no parent tuples
+ require tuple adjustment) and page graph respects balanced-tree
+ invariants (internal pages reference only leaf page or only internal
+ pages).
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<tip>
<para>
--
2.43.0