From c82e275a02751bf82d62e6802e750e99a12a4215 Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <andrew@dunslane.net>
Date: Sat, 13 Jan 2024 08:16:11 -0500
Subject: [PATCH v3 2/3] Add support for incrementally parsing backup manifests

This adds the infrastructure for using the new non-recusrive JSON parser
in processing manifests. It's important that callers make sure that the
last piece of json handed to the incremental manifest parser contains
the entire last few lines of the manifest, including the checksum.
---
 src/common/parse_manifest.c         | 120 ++++++++++++++++++++++++++--
 src/include/common/parse_manifest.h |   5 ++
 2 files changed, 117 insertions(+), 8 deletions(-)

diff --git a/src/common/parse_manifest.c b/src/common/parse_manifest.c
index 92a97714f3..ef68b41726 100644
--- a/src/common/parse_manifest.c
+++ b/src/common/parse_manifest.c
@@ -88,6 +88,13 @@ typedef struct
 	char	   *manifest_checksum;
 } JsonManifestParseState;
 
+typedef struct JsonManifestParseIncrementalState
+{
+	JsonLexContext lex;
+	JsonSemAction sem;
+	pg_cryptohash_ctx *manifest_ctx;
+} JsonManifestParseIncrementalState;
+
 static JsonParseErrorType json_manifest_object_start(void *state);
 static JsonParseErrorType json_manifest_object_end(void *state);
 static JsonParseErrorType json_manifest_array_start(void *state);
@@ -99,7 +106,8 @@ static JsonParseErrorType json_manifest_scalar(void *state, char *token,
 static void json_manifest_finalize_file(JsonManifestParseState *parse);
 static void json_manifest_finalize_wal_range(JsonManifestParseState *parse);
 static void verify_manifest_checksum(JsonManifestParseState *parse,
-									 char *buffer, size_t size);
+									 char *buffer, size_t size,
+									 pg_cryptohash_ctx *incr_ctx);
 static void json_manifest_parse_failure(JsonManifestParseContext *context,
 										char *msg);
 
@@ -107,6 +115,89 @@ static int	hexdecode_char(char c);
 static bool hexdecode_string(uint8 *result, char *input, int nbytes);
 static bool parse_xlogrecptr(XLogRecPtr *result, char *input);
 
+/*
+ * Set up for incremental parsing of the manifest.
+ *
+ */
+
+JsonManifestParseIncrementalState *
+json_parse_manifest_incremental_init(JsonManifestParseContext *context)
+{
+	JsonManifestParseIncrementalState *incstate;
+	JsonManifestParseState *parse;
+	pg_cryptohash_ctx *manifest_ctx;
+
+	incstate = palloc(sizeof(JsonManifestParseIncrementalState));
+	parse = palloc(sizeof(JsonManifestParseState));
+
+	parse->context = context;
+	parse->state = JM_EXPECT_TOPLEVEL_START;
+	parse->saw_version_field = false;
+
+	makeJsonLexContextIncremental(&(incstate->lex), PG_UTF8, true);
+
+	incstate->sem.semstate = parse;
+	incstate->sem.object_start = json_manifest_object_start;
+	incstate->sem.object_end = json_manifest_object_end;
+	incstate->sem.array_start = json_manifest_array_start;
+	incstate->sem.array_end = json_manifest_array_end;
+	incstate->sem.object_field_start = json_manifest_object_field_start;
+	incstate->sem.object_field_end = NULL;
+	incstate->sem.array_element_start = NULL;
+	incstate->sem.array_element_end = NULL;
+	incstate->sem.scalar = json_manifest_scalar;
+
+	manifest_ctx = pg_cryptohash_create(PG_SHA256);
+	if (manifest_ctx == NULL)
+		context->error_cb(context, "out of memory");
+	if (pg_cryptohash_init(manifest_ctx) < 0)
+		context->error_cb(context, "could not initialize checksum of manifest");
+	incstate->manifest_ctx = manifest_ctx;
+
+	return incstate;
+}
+
+/*
+ * parse the manifest in pieces.
+ *
+ * The caller must ensure that the final piece contains the final lines
+ * with the complete checksum.
+ */
+
+void
+json_parse_manifest_incremental_chunk(
+									  JsonManifestParseIncrementalState *incstate, char *chunk, int size,
+									  bool is_last)
+{
+	JsonParseErrorType res,
+				expected;
+	JsonManifestParseState *parse = incstate->sem.semstate;
+	JsonManifestParseContext *context = parse->context;
+
+	res = pg_parse_json_incremental(&(incstate->lex), &(incstate->sem),
+									chunk, size, is_last);
+
+	expected = is_last ? JSON_SUCCESS : JSON_INCOMPLETE;
+
+	if (res != expected)
+		json_manifest_parse_failure(context, "parsing failed");
+
+	if (is_last && parse->state != JM_EXPECT_EOF)
+		json_manifest_parse_failure(context, "manifest ended unexpectedly");
+
+	if (!is_last)
+	{
+		if (pg_cryptohash_update(incstate->manifest_ctx,
+								 (uint8 *) chunk, size) < 0)
+			context->error_cb(context, "could not update checksum of manifest");
+	}
+	else
+	{
+		verify_manifest_checksum(parse, chunk, size, incstate->manifest_ctx);
+	}
+}
+
+
 /*
  * Main entrypoint to parse a JSON-format backup manifest.
  *
@@ -152,7 +243,7 @@ json_parse_manifest(JsonManifestParseContext *context, char *buffer,
 		json_manifest_parse_failure(context, "manifest ended unexpectedly");
 
 	/* Verify the manifest checksum. */
-	verify_manifest_checksum(&parse, buffer, size);
+	verify_manifest_checksum(&parse, buffer, size, NULL);
 
 	freeJsonLexContext(lex);
 }
@@ -378,6 +469,8 @@ json_manifest_object_field_start(void *state, char *fname, bool isnull)
 			break;
 	}
 
+	pfree(fname);
+
 	return JSON_SUCCESS;
 }
 
@@ -628,10 +721,14 @@ json_manifest_finalize_wal_range(JsonManifestParseState *parse)
  * The last line of the manifest file is excluded from the manifest checksum,
  * because the last line is expected to contain the checksum that covers
  * the rest of the file.
+ *
+ * For an incremental parse, this will just be called on the last chunk of the
+ * manifest, and the cryptohash context paswed in. For a non-incremental
+ * parse incr_ctx will be NULL.
  */
 static void
 verify_manifest_checksum(JsonManifestParseState *parse, char *buffer,
-						 size_t size)
+						 size_t size, pg_cryptohash_ctx *incr_ctx)
 {
 	JsonManifestParseContext *context = parse->context;
 	size_t		i;
@@ -666,11 +763,18 @@ verify_manifest_checksum(JsonManifestParseState *parse, char *buffer,
 									"last line not newline-terminated");
 
 	/* Checksum the rest. */
-	manifest_ctx = pg_cryptohash_create(PG_SHA256);
-	if (manifest_ctx == NULL)
-		context->error_cb(context, "out of memory");
-	if (pg_cryptohash_init(manifest_ctx) < 0)
-		context->error_cb(context, "could not initialize checksum of manifest");
+	if (incr_ctx == NULL)
+	{
+		manifest_ctx = pg_cryptohash_create(PG_SHA256);
+		if (manifest_ctx == NULL)
+			context->error_cb(context, "out of memory");
+		if (pg_cryptohash_init(manifest_ctx) < 0)
+			context->error_cb(context, "could not initialize checksum of manifest");
+	}
+	else
+	{
+		manifest_ctx = incr_ctx;
+	}
 	if (pg_cryptohash_update(manifest_ctx, (uint8 *) buffer, penultimate_newline + 1) < 0)
 		context->error_cb(context, "could not update checksum of manifest");
 	if (pg_cryptohash_final(manifest_ctx, manifest_checksum_actual,
diff --git a/src/include/common/parse_manifest.h b/src/include/common/parse_manifest.h
index f74be0db35..73db43662f 100644
--- a/src/include/common/parse_manifest.h
+++ b/src/include/common/parse_manifest.h
@@ -20,6 +20,7 @@
 
 struct JsonManifestParseContext;
 typedef struct JsonManifestParseContext JsonManifestParseContext;
+typedef struct JsonManifestParseIncrementalState JsonManifestParseIncrementalState;
 
 typedef void (*json_manifest_per_file_callback) (JsonManifestParseContext *,
 												 char *pathname,
@@ -42,5 +43,9 @@ struct JsonManifestParseContext
 
 extern void json_parse_manifest(JsonManifestParseContext *context,
 								char *buffer, size_t size);
+extern JsonManifestParseIncrementalState *json_parse_manifest_incremental_init(JsonManifestParseContext *context);
+extern void json_parse_manifest_incremental_chunk(
+												  JsonManifestParseIncrementalState *incstate, char *chunk, int size,
+												  bool is_last);
 
 #endif
-- 
2.34.1

