From fa55bf28a2a45ae91a3363792b39e77d7c055cd2 Mon Sep 17 00:00:00 2001
From: Robin Haberkorn <haberkorn@b1-systems.de>
Date: Thu, 17 Apr 2025 13:15:48 +0300
Subject: [PATCH v3 1/2] contrib/xml2: xslt_process() now reports XSLT-related
 error details

* It sets (and restores) the libxslt error handler (xsltSetGenericErrorFunc()).
  Since it only supports old-school "generic" error handlers, which are no longer
  used in PG's libxml code, we reintroduced a "generic" error handler
  xml_generic_error_handler() in xml.c.
* The alternative would have been to expose PgXmlErrorContext in xml.h,
  so we could implement a generic handler in xslt_proc.c.
  This is obviously not desirable, as it would depend on USE_LIBXML.
* No longer use the PG_XML_STRICTNESS_LEGACY for error handling,
  but query the error_occurred-flag via pg_xml_error_occurred().
  The existance of this getter is another hint, that we are not supposed
  to expose PgXmlErrorContext in xml.h.
* This change means that xslt_process() now reports not only details
  about XML parsing errors, but XSLT-schema deviations and missing
  stylesheet parameters as well.
* The XSLT error messages now also contain line numbers.
  For that to work, we had to set a dummy "SQL" URL when parsing XML strings.
  This is important, since xsltPrintErrorContext() includes
  line numbers only if an URL is set.
* The xsltSaveResultToString() error handling has been removed.
  It can practically only fail in OOM situations and there is no reason
  to handle them any different than with the other libxslt functions.
* Updated test suite and added test case for detecting missing
  stylesheet parameters.
  This was initially reported here but has obviously been fixed in the
  meantime:
  https://www.postgresql.org/message-id/4C5ECEF9.3030806%40mlfowler.com
---
 contrib/xml2/expected/xml2.out | 17 +++++++++++++++
 contrib/xml2/sql/xml2.sql      |  8 +++++++
 contrib/xml2/xslt_proc.c       | 39 +++++++++++++++++++++++-----------
 src/backend/utils/adt/xml.c    | 29 +++++++++++++++++++++++++
 src/include/utils/xml.h        |  2 ++
 5 files changed, 83 insertions(+), 12 deletions(-)

diff --git a/contrib/xml2/expected/xml2.out b/contrib/xml2/expected/xml2.out
index 3d97b14c3a..157d584e63 100644
--- a/contrib/xml2/expected/xml2.out
+++ b/contrib/xml2/expected/xml2.out
@@ -261,3 +261,20 @@ $$<xsl:stylesheet version="1.0"
   </xsl:template>
 </xsl:stylesheet>$$);
 ERROR:  failed to apply stylesheet
+DETAIL:  runtime error: file SQL line 7 element output
+File write for 0wn3d.txt refused
+runtime error: file SQL line 7 element output
+xsltDocumentElem: write rights for 0wn3d.txt denied
+-- detecting missing stylesheet parameter
+SELECT xslt_process('<xml/>',
+$$<stylesheet version="1.0" xmlns="http://www.w3.org/1999/XSL/Transform">
+  <template match="/">
+    <value-of select="$n1"/>
+  </template>
+</stylesheet>$$)::xml;
+ERROR:  failed to apply stylesheet
+DETAIL:  runtime error: file SQL line 3 element value-of
+Variable 'n1' has not been declared.
+Undefined variable
+runtime error: file SQL line 3 element value-of
+XPath evaluation returned no result.
diff --git a/contrib/xml2/sql/xml2.sql b/contrib/xml2/sql/xml2.sql
index ef99d164f2..9d42ac8a0b 100644
--- a/contrib/xml2/sql/xml2.sql
+++ b/contrib/xml2/sql/xml2.sql
@@ -153,3 +153,11 @@ $$<xsl:stylesheet version="1.0"
     </sax:output>
   </xsl:template>
 </xsl:stylesheet>$$);
+
+-- detecting missing stylesheet parameter
+SELECT xslt_process('<xml/>',
+$$<stylesheet version="1.0" xmlns="http://www.w3.org/1999/XSL/Transform">
+  <template match="/">
+    <value-of select="$n1"/>
+  </template>
+</stylesheet>$$)::xml;
diff --git a/contrib/xml2/xslt_proc.c b/contrib/xml2/xslt_proc.c
index b720d89f75..2aa44d39e7 100644
--- a/contrib/xml2/xslt_proc.c
+++ b/contrib/xml2/xslt_proc.c
@@ -61,6 +61,10 @@ xslt_process(PG_FUNCTION_ARGS)
 	xmlChar    *resstr = NULL;
 	int			reslen = 0;
 
+	/* the previous libxslt error context */
+	xmlGenericErrorFunc saved_errfunc;
+	void	   *saved_errcxt;
+
 	if (fcinfo->nargs == 3)
 	{
 		paramstr = PG_GETARG_TEXT_PP(2);
@@ -74,35 +78,45 @@ xslt_process(PG_FUNCTION_ARGS)
 	}
 
 	/* Setup parser */
-	xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
+	xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_ALL);
+
+	/*
+	 * Save the previous libxslt error context.
+	 */
+	saved_errfunc = xsltGenericError;
+	saved_errcxt = xsltGenericErrorContext;
+	xsltSetGenericErrorFunc(xmlerrcxt, xml_generic_error_handler);
 
 	PG_TRY();
 	{
 		xmlDocPtr	ssdoc;
 		bool		xslt_sec_prefs_error;
 
-		/* Parse document */
+		/*
+		 * Parse document. It's important to set an "URL", so libxslt includes
+		 * line numbers in error messages (cf. xsltPrintErrorContext()).
+		 */
 		doctree = xmlReadMemory((char *) VARDATA_ANY(doct),
-								VARSIZE_ANY_EXHDR(doct), NULL, NULL,
+								VARSIZE_ANY_EXHDR(doct), "SQL", NULL,
 								XML_PARSE_NOENT);
 
-		if (doctree == NULL)
+		if (doctree == NULL || pg_xml_error_occurred(xmlerrcxt))
 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
 						"error parsing XML document");
 
 		/* Same for stylesheet */
 		ssdoc = xmlReadMemory((char *) VARDATA_ANY(ssheet),
-							  VARSIZE_ANY_EXHDR(ssheet), NULL, NULL,
+							  VARSIZE_ANY_EXHDR(ssheet), "SQL", NULL,
 							  XML_PARSE_NOENT);
 
-		if (ssdoc == NULL)
+		if (ssdoc == NULL || pg_xml_error_occurred(xmlerrcxt))
 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
 						"error parsing stylesheet as XML document");
 
 		/* After this call we need not free ssdoc separately */
 		stylesheet = xsltParseStylesheetDoc(ssdoc);
 
-		if (stylesheet == NULL)
+		if (stylesheet == NULL || pg_xml_error_occurred(xmlerrcxt))
 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
 						"failed to parse stylesheet");
 
@@ -137,11 +151,14 @@ xslt_process(PG_FUNCTION_ARGS)
 		restree = xsltApplyStylesheetUser(stylesheet, doctree, params,
 										  NULL, NULL, xslt_ctxt);
 
-		if (restree == NULL)
+		if (restree == NULL || pg_xml_error_occurred(xmlerrcxt))
 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
 						"failed to apply stylesheet");
 
 		resstat = xsltSaveResultToString(&resstr, &reslen, restree, stylesheet);
+		if (resstat < 0 || pg_xml_error_occurred(xmlerrcxt))
+			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
+						"failed to save result to string");
 	}
 	PG_CATCH();
 	{
@@ -157,6 +174,7 @@ xslt_process(PG_FUNCTION_ARGS)
 			xmlFreeDoc(doctree);
 		xsltCleanupGlobals();
 
+		xsltSetGenericErrorFunc(saved_errcxt, saved_errfunc);
 		pg_xml_done(xmlerrcxt, true);
 
 		PG_RE_THROW();
@@ -170,12 +188,9 @@ xslt_process(PG_FUNCTION_ARGS)
 	xmlFreeDoc(doctree);
 	xsltCleanupGlobals();
 
+	xsltSetGenericErrorFunc(saved_errcxt, saved_errfunc);
 	pg_xml_done(xmlerrcxt, false);
 
-	/* XXX this is pretty dubious, really ought to throw error instead */
-	if (resstat < 0)
-		PG_RETURN_NULL();
-
 	result = cstring_to_text_with_len((char *) resstr, reslen);
 
 	if (resstr)
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index db8d0d6a7e..61b0e2e117 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -2078,6 +2078,35 @@ xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
 			 detail ? errdetail_internal("%s", detail) : 0));
 }
 
+/*
+ * Generic error handler for libxml errors and warnings.
+ * This is not used by this module, but may be useful for
+ * libxml-based libraries like libxslt, which do not support
+ * structured error handlers.
+ */
+void
+xml_generic_error_handler(void *data, const char *msg,...)
+{
+	PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
+	va_list		ap;
+
+	/*
+	 * Defend against someone passing us a bogus context struct.
+	 *
+	 * We force a backend exit if this check fails because longjmp'ing out of
+	 * libxslt would likely render it unsafe to use further.
+	 */
+	if (xmlerrcxt->magic != ERRCXT_MAGIC)
+		elog(FATAL, "xml_generic_error_handler called with invalid PgXmlErrorContext");
+
+	appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
+	va_start(ap, msg);
+	appendStringInfoVA(&xmlerrcxt->err_buf, msg, ap);
+	va_end(ap);
+
+	/* Get rid of any trailing newlines in errorBuf */
+	chopStringInfoNewlines(&xmlerrcxt->err_buf);
+}
 
 /*
  * Error handler for libxml errors and warnings
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index 0d7a816b9f..7cb101e81d 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -66,6 +66,8 @@ extern void pg_xml_init_library(void);
 extern PgXmlErrorContext *pg_xml_init(PgXmlStrictness strictness);
 extern void pg_xml_done(PgXmlErrorContext *errcxt, bool isError);
 extern bool pg_xml_error_occurred(PgXmlErrorContext *errcxt);
+extern void xml_generic_error_handler(void *data, const char *msg,...)
+			pg_attribute_printf(2, 3);
 extern void xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode,
 						const char *msg);
 
-- 
2.49.0

