From 89a7ce5bb2338f65ebd42703eed11033881646cb Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Tue, 18 Oct 2022 14:51:03 +0900
Subject: [PATCH v2] Refactor regex handling for pg_ident.conf in hba.c

---
 src/include/libpq/hba.h          |  28 +++---
 src/backend/libpq/hba.c          | 162 ++++++++++++++++++++-----------
 src/backend/utils/adt/hbafuncs.c |   2 +-
 3 files changed, 121 insertions(+), 71 deletions(-)

diff --git a/src/include/libpq/hba.h b/src/include/libpq/hba.h
index d06da81806..cec2e2665f 100644
--- a/src/include/libpq/hba.h
+++ b/src/include/libpq/hba.h
@@ -77,6 +77,20 @@ typedef enum ClientCertName
 	clientCertDN
 } ClientCertName;
 
+/*
+ * A single string token lexed from an authentication configuration file
+ * (pg_ident.conf or pg_hba.conf), together with whether the token has
+ * been quoted.  If "string" begins with a slash, it may optionally
+ * contain a regular expression (currently used for pg_ident.conf when
+ * building IdentLines).
+ */
+typedef struct AuthToken
+{
+	char	   *string;
+	bool		quoted;
+	regex_t    *regex;
+} AuthToken;
+
 typedef struct HbaLine
 {
 	int			linenumber;
@@ -127,22 +141,10 @@ typedef struct IdentLine
 	int			linenumber;
 
 	char	   *usermap;
-	char	   *ident_user;
 	char	   *pg_role;
-	regex_t		re;
+	AuthToken  *token;
 } IdentLine;
 
-/*
- * A single string token lexed from an authentication configuration file
- * (pg_ident.conf or pg_hba.conf), together with whether the token has
- * been quoted.
- */
-typedef struct AuthToken
-{
-	char	   *string;
-	bool		quoted;
-} AuthToken;
-
 /*
  * TokenizedAuthLine represents one line lexed from an authentication
  * configuration file.  Each item in the "fields" list is a sub-list of
diff --git a/src/backend/libpq/hba.c b/src/backend/libpq/hba.c
index 4637426d62..ea4e955d51 100644
--- a/src/backend/libpq/hba.c
+++ b/src/backend/libpq/hba.c
@@ -66,6 +66,7 @@ typedef struct check_network_data
 } check_network_data;
 
 
+#define token_has_regexp(t)	(t->regex != NULL)
 #define token_is_keyword(t, k)	(!t->quoted && strcmp(t->string, k) == 0)
 #define token_matches(t, k)  (strcmp(t->string, k) == 0)
 
@@ -80,9 +81,10 @@ static MemoryContext parsed_hba_context = NULL;
  * pre-parsed content of ident mapping file: list of IdentLine structs.
  * parsed_ident_context is the memory context where it lives.
  *
- * NOTE: the IdentLine structs can contain pre-compiled regular expressions
- * that live outside the memory context. Before destroying or resetting the
- * memory context, they need to be explicitly free'd.
+ * NOTE: the IdentLine structs can contain AuthTokens with pre-compiled
+ * regular expressions that live outside the memory context. Before
+ * destroying or resetting the memory context, they need to be explicitly
+ * free'd.
  */
 static List *parsed_ident_lines = NIL;
 static MemoryContext parsed_ident_context = NULL;
@@ -117,6 +119,9 @@ static List *tokenize_inc_file(List *tokens, const char *outer_filename,
 							   const char *inc_filename, int elevel, char **err_msg);
 static bool parse_hba_auth_opt(char *name, char *val, HbaLine *hbaline,
 							   int elevel, char **err_msg);
+static int regcomp_auth_token(AuthToken *token);
+static int regexec_auth_token(const char *match, AuthToken *token,
+							  size_t nmatch, regmatch_t pmatch[]);
 
 
 /*
@@ -267,14 +272,27 @@ make_auth_token(const char *token, bool quoted)
 
 	toklen = strlen(token);
 	/* we copy string into same palloc block as the struct */
-	authtoken = (AuthToken *) palloc(sizeof(AuthToken) + toklen + 1);
+	authtoken = (AuthToken *) palloc0(sizeof(AuthToken) + toklen + 1);
 	authtoken->string = (char *) authtoken + sizeof(AuthToken);
 	authtoken->quoted = quoted;
+	authtoken->regex = NULL;
 	memcpy(authtoken->string, token, toklen + 1);
 
 	return authtoken;
 }
 
+/*
+ * Free an AuthToken, that may include a regular expression that needs
+ * to be cleaned up explicitly.
+ */
+static void
+free_auth_token(AuthToken *token)
+{
+	if (token_has_regexp(token))
+		pg_regfree(token->regex);
+	pfree(token);
+}
+
 /*
  * Copy a AuthToken struct into freshly palloc'd memory.
  */
@@ -286,6 +304,57 @@ copy_auth_token(AuthToken *in)
 	return out;
 }
 
+/*
+ * Compile the regular expression "re" and store it in the AuthToken
+ * given in input.  Returns the error code of pg_regcomp, to be used
+ * by the caller.
+ */
+static int
+regcomp_auth_token(AuthToken *token)
+{
+	pg_wchar   *wstr;
+	int			wlen;
+	int			rc;
+
+	Assert(token->regex == NULL);
+
+	if (token->string[0] != '/')
+		return 0;	/* nothing to compile */
+
+	token->regex = (regex_t *) palloc0(sizeof(regex_t));
+	wstr = palloc((strlen(token->string + 1) + 1) * sizeof(pg_wchar));
+	wlen = pg_mb2wchar_with_len(token->string + 1,
+								wstr, strlen(token->string + 1));
+
+	rc = pg_regcomp(token->regex, wstr, wlen, REG_ADVANCED, C_COLLATION_OID);
+
+	pfree(wstr);
+	return rc;
+}
+
+/*
+ * Execute a regular expression computed in an AuthToken, checking for a match
+ * with the string specified in "match".  The caller may optionally give an
+ * array to store the matches.  Return the result of pg_regexec().
+ */
+static int
+regexec_auth_token(const char *match, AuthToken *token, size_t nmatch,
+				   regmatch_t pmatch[])
+{
+	pg_wchar   *wmatchstr;
+	int			wmatchlen;
+	int			r;
+
+	Assert(token->string[0] == '/' && token->regex);
+
+	wmatchstr = palloc((strlen(match) + 1) * sizeof(pg_wchar));
+	wmatchlen = pg_mb2wchar_with_len(match, wmatchstr, strlen(match));
+
+	r = pg_regexec(token->regex, wmatchstr, wmatchlen, 0, NULL, nmatch, pmatch, 0);
+
+	pfree(wmatchstr);
+	return r;
+}
 
 /*
  * Tokenize one HBA field from a line, handling file inclusion and comma lists.
@@ -2307,6 +2376,7 @@ parse_ident_line(TokenizedAuthLine *tok_line, int elevel)
 	List	   *tokens;
 	AuthToken  *token;
 	IdentLine  *parsedline;
+	int			rc;
 
 	Assert(tok_line->fields != NIL);
 	field = list_head(tok_line->fields);
@@ -2326,7 +2396,9 @@ parse_ident_line(TokenizedAuthLine *tok_line, int elevel)
 	tokens = lfirst(field);
 	IDENT_MULTI_VALUE(tokens);
 	token = linitial(tokens);
-	parsedline->ident_user = pstrdup(token->string);
+
+	/* Copy the ident user token */
+	parsedline->token = copy_auth_token(token);
 
 	/* Get the PG rolename token */
 	field = lnext(tok_line->fields, field);
@@ -2336,40 +2408,27 @@ parse_ident_line(TokenizedAuthLine *tok_line, int elevel)
 	token = linitial(tokens);
 	parsedline->pg_role = pstrdup(token->string);
 
-	if (parsedline->ident_user[0] == '/')
+	/*
+	 * Now that the field validation is done, compile a regex from the user
+	 * token, if necessary.
+	 */
+	rc = regcomp_auth_token(parsedline->token);
+	if (rc)
 	{
-		/*
-		 * When system username starts with a slash, treat it as a regular
-		 * expression. Pre-compile it.
-		 */
-		int			r;
-		pg_wchar   *wstr;
-		int			wlen;
+		char		errstr[100];
 
-		wstr = palloc((strlen(parsedline->ident_user + 1) + 1) * sizeof(pg_wchar));
-		wlen = pg_mb2wchar_with_len(parsedline->ident_user + 1,
-									wstr, strlen(parsedline->ident_user + 1));
-
-		r = pg_regcomp(&parsedline->re, wstr, wlen, REG_ADVANCED, C_COLLATION_OID);
-		if (r)
-		{
-			char		errstr[100];
-
-			pg_regerror(r, &parsedline->re, errstr, sizeof(errstr));
-			ereport(elevel,
-					(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
-					 errmsg("invalid regular expression \"%s\": %s",
-							parsedline->ident_user + 1, errstr),
-					 errcontext("line %d of configuration file \"%s\"",
+		pg_regerror(rc, parsedline->token->regex, errstr, sizeof(errstr));
+		ereport(elevel,
+				(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+				 errmsg("invalid regular expression \"%s\": %s",
+						parsedline->token->string + 1, errstr),
+				 errcontext("line %d of configuration file \"%s\"",
 							line_num, IdentFileName)));
 
-			*err_msg = psprintf("invalid regular expression \"%s\": %s",
-								parsedline->ident_user + 1, errstr);
+		*err_msg = psprintf("invalid regular expression \"%s\": %s",
+							parsedline->token->string + 1, errstr);
 
-			pfree(wstr);
-			return NULL;
-		}
-		pfree(wstr);
+		return NULL;
 	}
 
 	return parsedline;
@@ -2394,25 +2453,19 @@ check_ident_usermap(IdentLine *identLine, const char *usermap_name,
 		return;
 
 	/* Match? */
-	if (identLine->ident_user[0] == '/')
+	if (token_has_regexp(identLine->token))
 	{
 		/*
-		 * When system username starts with a slash, treat it as a regular
-		 * expression. In this case, we process the system username as a
-		 * regular expression that returns exactly one match. This is replaced
-		 * for \1 in the database username string, if present.
+		 * Process the system username as a regular expression that returns
+		 * exactly one match. This is replaced for \1 in the database username
+		 * string, if present.
 		 */
 		int			r;
 		regmatch_t	matches[2];
-		pg_wchar   *wstr;
-		int			wlen;
 		char	   *ofs;
 		char	   *regexp_pgrole;
 
-		wstr = palloc((strlen(ident_user) + 1) * sizeof(pg_wchar));
-		wlen = pg_mb2wchar_with_len(ident_user, wstr, strlen(ident_user));
-
-		r = pg_regexec(&identLine->re, wstr, wlen, 0, NULL, 2, matches, 0);
+		r = regexec_auth_token(ident_user, identLine->token, 2, matches);
 		if (r)
 		{
 			char		errstr[100];
@@ -2420,18 +2473,15 @@ check_ident_usermap(IdentLine *identLine, const char *usermap_name,
 			if (r != REG_NOMATCH)
 			{
 				/* REG_NOMATCH is not an error, everything else is */
-				pg_regerror(r, &identLine->re, errstr, sizeof(errstr));
+				pg_regerror(r, identLine->token->regex, errstr, sizeof(errstr));
 				ereport(LOG,
 						(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
 						 errmsg("regular expression match for \"%s\" failed: %s",
-								identLine->ident_user + 1, errstr)));
+								identLine->token->string + 1, errstr)));
 				*error_p = true;
 			}
-
-			pfree(wstr);
 			return;
 		}
-		pfree(wstr);
 
 		if ((ofs = strstr(identLine->pg_role, "\\1")) != NULL)
 		{
@@ -2443,7 +2493,7 @@ check_ident_usermap(IdentLine *identLine, const char *usermap_name,
 				ereport(LOG,
 						(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
 						 errmsg("regular expression \"%s\" has no subexpressions as requested by backreference in \"%s\"",
-								identLine->ident_user + 1, identLine->pg_role)));
+								identLine->token->string + 1, identLine->pg_role)));
 				*error_p = true;
 				return;
 			}
@@ -2490,13 +2540,13 @@ check_ident_usermap(IdentLine *identLine, const char *usermap_name,
 		if (case_insensitive)
 		{
 			if (pg_strcasecmp(identLine->pg_role, pg_role) == 0 &&
-				pg_strcasecmp(identLine->ident_user, ident_user) == 0)
+				pg_strcasecmp(identLine->token->string, ident_user) == 0)
 				*found_p = true;
 		}
 		else
 		{
 			if (strcmp(identLine->pg_role, pg_role) == 0 &&
-				strcmp(identLine->ident_user, ident_user) == 0)
+				strcmp(identLine->token->string, ident_user) == 0)
 				*found_p = true;
 		}
 	}
@@ -2646,8 +2696,7 @@ load_ident(void)
 		foreach(parsed_line_cell, new_parsed_lines)
 		{
 			newline = (IdentLine *) lfirst(parsed_line_cell);
-			if (newline->ident_user[0] == '/')
-				pg_regfree(&newline->re);
+			free_auth_token(newline->token);
 		}
 		MemoryContextDelete(ident_context);
 		return false;
@@ -2659,8 +2708,7 @@ load_ident(void)
 		foreach(parsed_line_cell, parsed_ident_lines)
 		{
 			newline = (IdentLine *) lfirst(parsed_line_cell);
-			if (newline->ident_user[0] == '/')
-				pg_regfree(&newline->re);
+			free_auth_token(newline->token);
 		}
 	}
 	if (parsed_ident_context != NULL)
diff --git a/src/backend/utils/adt/hbafuncs.c b/src/backend/utils/adt/hbafuncs.c
index cbbe44ff13..cfdc4d8b39 100644
--- a/src/backend/utils/adt/hbafuncs.c
+++ b/src/backend/utils/adt/hbafuncs.c
@@ -467,7 +467,7 @@ fill_ident_line(Tuplestorestate *tuple_store, TupleDesc tupdesc,
 	if (ident != NULL)
 	{
 		values[index++] = CStringGetTextDatum(ident->usermap);
-		values[index++] = CStringGetTextDatum(ident->ident_user);
+		values[index++] = CStringGetTextDatum(ident->token->string);
 		values[index++] = CStringGetTextDatum(ident->pg_role);
 	}
 	else
-- 
2.37.2

