From f4996f6278fc575ad67e16bf21d00f0be7907457 Mon Sep 17 00:00:00 2001
From: Robin Haberkorn <haberkorn@b1-systems.de>
Date: Wed, 30 Apr 2025 13:19:42 +0300
Subject: [PATCH v4 2/2] contrib/xml2: overloaded xslt_process() to provide
 variants for xmltype and specifying parameters in arrays

* There are apparently no functions that accept XML as text, except for xmlparse().
  xslt_process() should therefore also accept xmltype.
* A version accepting text is still kept for backwards compatibility, but is considered
  deprecated.
* The new xmltype-based version expects an array of stylesheet parameter-value pairs,
  which is less limited than the now deprecated way of encoding all stylesheet parameters into
  a single text argument.
  We can now accept an arbitrary number of parameters and you can include `=` and `,` signs
  in both the key and value strings.
  Hstores haven't been used since they are in a module and we don't want to depend on any
  additional module.
* The new implementation respects the database's encoding - text strings are always converted to UTF8
  before passing them into libxml2.
* On the downside, xml_parse() had to be made an external function.
  Since a declaration cannot be added to xml.h without drawing in libxml2 headers,
  the declaration is repeated in xslt_proc.c.
  Perhaps xml_parse() should be declared in a separate internal header?
* xmlCtxtReadDoc() now sets a dummy "SQL" URL to preserve line numbers in XSLT stylesheet errors.
  This change at least does not break the test suite.
---
 contrib/xml2/expected/xml2.out |  13 +++
 contrib/xml2/sql/xml2.sql      |   8 ++
 contrib/xml2/xml2--1.1.sql     |  11 +++
 contrib/xml2/xslt_proc.c       | 146 +++++++++++++++++++++++++--------
 doc/src/sgml/xml2.sgml         |  19 +++--
 src/backend/utils/adt/xml.c    |  19 +++--
 6 files changed, 171 insertions(+), 45 deletions(-)

diff --git a/contrib/xml2/expected/xml2.out b/contrib/xml2/expected/xml2.out
index 157d584e63..0a8a628020 100644
--- a/contrib/xml2/expected/xml2.out
+++ b/contrib/xml2/expected/xml2.out
@@ -278,3 +278,16 @@ Variable 'n1' has not been declared.
 Undefined variable
 runtime error: file SQL line 3 element value-of
 XPath evaluation returned no result.
+-- xmltype and Array-based signature
+SELECT xslt_process(xmlelement(name xml),
+$$<stylesheet version="1.0" xmlns="http://www.w3.org/1999/XSL/Transform">
+  <template match="/">
+    <value-of select="$n1"/>
+  </template>
+</stylesheet>$$::xml, ARRAY['n1','"foo"']);
+ xslt_process 
+--------------
+ foo         +
+ 
+(1 row)
+
diff --git a/contrib/xml2/sql/xml2.sql b/contrib/xml2/sql/xml2.sql
index 9d42ac8a0b..7555854d49 100644
--- a/contrib/xml2/sql/xml2.sql
+++ b/contrib/xml2/sql/xml2.sql
@@ -161,3 +161,11 @@ $$<stylesheet version="1.0" xmlns="http://www.w3.org/1999/XSL/Transform">
     <value-of select="$n1"/>
   </template>
 </stylesheet>$$)::xml;
+
+-- xmltype and Array-based signature
+SELECT xslt_process(xmlelement(name xml),
+$$<stylesheet version="1.0" xmlns="http://www.w3.org/1999/XSL/Transform">
+  <template match="/">
+    <value-of select="$n1"/>
+  </template>
+</stylesheet>$$::xml, ARRAY['n1','"foo"']);
diff --git a/contrib/xml2/xml2--1.1.sql b/contrib/xml2/xml2--1.1.sql
index 671372cb27..a579a1e5e1 100644
--- a/contrib/xml2/xml2--1.1.sql
+++ b/contrib/xml2/xml2--1.1.sql
@@ -71,3 +71,14 @@ CREATE FUNCTION xslt_process(text,text)
 RETURNS text
 AS 'MODULE_PATHNAME'
 LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE FUNCTION xslt_process(xml,xml,text[])
+RETURNS xml
+AS 'MODULE_PATHNAME','xslt_process_xmltype'
+LANGUAGE C STRICT VOLATILE PARALLEL SAFE;
+
+-- the function checks for the correct argument count
+CREATE FUNCTION xslt_process(xml,xml)
+RETURNS xml
+AS 'MODULE_PATHNAME','xslt_process_xmltype'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
diff --git a/contrib/xml2/xslt_proc.c b/contrib/xml2/xslt_proc.c
index 5efb659439..c31ed9d039 100644
--- a/contrib/xml2/xslt_proc.c
+++ b/contrib/xml2/xslt_proc.c
@@ -10,6 +10,9 @@
 #include "fmgr.h"
 #include "utils/builtins.h"
 #include "utils/xml.h"
+#include "utils/array.h"
+#include "utils/memutils.h"
+#include "mb/pg_wchar.h"
 
 #ifdef USE_LIBXSLT
 
@@ -35,9 +38,18 @@
 extern PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness);
 
 /* local defs */
+static xmltype *xslt_process_internal(xmltype *doct, xmltype *ssheet, const char **params);
 static const char **parse_params(text *paramstr);
 #endif							/* USE_LIBXSLT */
 
+/*
+ * FIXME: This cannot easily be exposed in xml.h.
+ * Perhaps there should be an xml-internal.h?
+ */
+xmlDocPtr	xml_parse(text *data, XmlOptionType xmloption_arg,
+					  bool preserve_whitespace, int encoding,
+					  XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
+					  Node *escontext);
 
 PG_FUNCTION_INFO_V1(xslt_process);
 
@@ -48,9 +60,103 @@ xslt_process(PG_FUNCTION_ARGS)
 
 	text	   *doct = PG_GETARG_TEXT_PP(0);
 	text	   *ssheet = PG_GETARG_TEXT_PP(1);
+	const char **params = NULL;
+	text	   *result;
+
+	if (fcinfo->nargs == 3)
+	{
+		text	   *paramstr = PG_GETARG_TEXT_PP(2);
+
+		params = parse_params(paramstr);
+	}
+
+	result = xslt_process_internal(doct, ssheet, params);
+
+	PG_RETURN_TEXT_P(result);
+
+#else							/* !USE_LIBXSLT */
+
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("xslt_process() is not available without libxslt")));
+	PG_RETURN_NULL();
+
+#endif							/* USE_LIBXSLT */
+}
+
+PG_FUNCTION_INFO_V1(xslt_process_xmltype);
+
+Datum
+xslt_process_xmltype(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXSLT
+
+	xmltype    *doct = PG_GETARG_XML_P(0);
+	xmltype    *ssheet = PG_GETARG_XML_P(1);
+	const char **params = NULL;
+	xmltype    *result;
+
+	/*
+	 * Parameters are key-value pairs. The values are XPath expressions, so
+	 * strings will have to be escaped with single or double quotes. Even
+	 * `xsltproc --stringparam` does nothing else than adding single or double
+	 * quotes and fails if the value contains both.
+	 */
+	if (fcinfo->nargs == 3)
+	{
+		ArrayType  *paramarray = PG_GETARG_ARRAYTYPE_P(2);
+		Datum	   *arr_datums;
+		bool	   *arr_nulls;
+		int			arr_count;
+		int			i,
+					j;
+
+		deconstruct_array_builtin(paramarray, TEXTOID, &arr_datums, &arr_nulls, &arr_count);
+
+		if ((arr_count % 2) != 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_ARRAY_ELEMENT_ERROR),
+					 errmsg("number of stylesheet parameters (%d) must be a multiple of 2",
+							arr_count)));
+
+		params = palloc_array(const char *, arr_count + 1);
+
+		for (i = 0, j = 0; i < arr_count; i++)
+		{
+			char	   *cstr;
+
+			if (arr_nulls[i])
+				continue;
+
+			cstr = TextDatumGetCString(arr_datums[i]);
+			params[j++] = (char *) pg_do_encoding_conversion((unsigned char *) cstr,
+															 strlen(cstr),
+															 GetDatabaseEncoding(),
+															 PG_UTF8);
+		}
+		params[j] = NULL;
+	}
+
+	result = xslt_process_internal(doct, ssheet, params);
+
+	PG_RETURN_XML_P(result);
+
+#else							/* !USE_LIBXSLT */
+
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("xslt_process() is not available without libxslt")));
+	PG_RETURN_NULL();
+
+#endif							/* USE_LIBXSLT */
+}
+
+#ifdef USE_LIBXSLT
+
+static xmltype *
+xslt_process_internal(xmltype *doct, xmltype *ssheet, const char **params)
+{
 	text	   *result;
-	text	   *paramstr;
-	const char **params;
 	PgXmlErrorContext *xmlerrcxt;
 	volatile xsltStylesheetPtr stylesheet = NULL;
 	volatile xmlDocPtr doctree = NULL;
@@ -65,18 +171,6 @@ xslt_process(PG_FUNCTION_ARGS)
 	xmlGenericErrorFunc saved_errfunc;
 	void	   *saved_errcxt;
 
-	if (fcinfo->nargs == 3)
-	{
-		paramstr = PG_GETARG_TEXT_PP(2);
-		params = parse_params(paramstr);
-	}
-	else
-	{
-		/* No parameters */
-		params = (const char **) palloc(sizeof(char *));
-		params[0] = NULL;
-	}
-
 	/* Setup parser */
 	xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_ALL);
 
@@ -93,21 +187,18 @@ xslt_process(PG_FUNCTION_ARGS)
 		bool		xslt_sec_prefs_error;
 
 		/*
-		 * Parse document. It's important to set an "URL", so libxslt includes
-		 * line numbers in error messages (cf. xsltPrintErrorContext()).
+		 * Parse document.
 		 */
-		doctree = xmlReadMemory((char *) VARDATA_ANY(doct),
-								VARSIZE_ANY_EXHDR(doct), "SQL", NULL,
-								XML_PARSE_NOENT);
+		doctree = xml_parse(doct, XMLOPTION_DOCUMENT, true,
+							GetDatabaseEncoding(), NULL, NULL, NULL);
 
 		if (doctree == NULL || pg_xml_error_occurred(xmlerrcxt))
 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
 						"error parsing XML document");
 
 		/* Same for stylesheet */
-		ssdoc = xmlReadMemory((char *) VARDATA_ANY(ssheet),
-							  VARSIZE_ANY_EXHDR(ssheet), "SQL", NULL,
-							  XML_PARSE_NOENT);
+		ssdoc = xml_parse(ssheet, XMLOPTION_DOCUMENT, true,
+						  GetDatabaseEncoding(), NULL, NULL, NULL);
 
 		if (ssdoc == NULL || pg_xml_error_occurred(xmlerrcxt))
 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
@@ -198,18 +289,9 @@ xslt_process(PG_FUNCTION_ARGS)
 	xsltSetGenericErrorFunc(saved_errcxt, saved_errfunc);
 	pg_xml_done(xmlerrcxt, false);
 
-	PG_RETURN_TEXT_P(result);
-#else							/* !USE_LIBXSLT */
-
-	ereport(ERROR,
-			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			 errmsg("xslt_process() is not available without libxslt")));
-	PG_RETURN_NULL();
-#endif							/* USE_LIBXSLT */
+	return result;
 }
 
-#ifdef USE_LIBXSLT
-
 static const char **
 parse_params(text *paramstr)
 {
diff --git a/doc/src/sgml/xml2.sgml b/doc/src/sgml/xml2.sgml
index 9fd613f967..dc6fb40121 100644
--- a/doc/src/sgml/xml2.sgml
+++ b/doc/src/sgml/xml2.sgml
@@ -408,22 +408,29 @@ ORDER BY doc_num, line_num;
   </indexterm>
 
 <synopsis>
-xslt_process(text document, text stylesheet, text paramlist) returns text
+xslt_process(xml document, xml stylesheet, text[] paramlist) returns xml
 </synopsis>
 
    <para>
     This function applies the XSL stylesheet to the document and returns
-    the transformed result. The <literal>paramlist</literal> is a list of parameter
-    assignments to be used in the transformation, specified in the form
-    <literal>a=1,b=2</literal>. Note that the
-    parameter parsing is very simple-minded: parameter values cannot
-    contain commas!
+    the transformed result. The <literal>paramlist</literal> is an array of parameter
+    assignments to be used in the transformation, specified in pairs of
+    key and value strings (e.g. <literal>ARRAY['a','1', 'b','2']</literal>).
+    The length of the array must be even.
+    Note that the values are still interpreted as XPath expressions, so string values need to
+    be quoted in single or double quotes (e.g. <literal>ARRAY['a','"string"']</literal>).
    </para>
 
    <para>
     There is also a two-parameter version of <function>xslt_process</function> which
     does not pass any parameters to the transformation.
    </para>
+
+   <para>
+    <emphasis>Deprecated</emphasis> variants of <function>xslt_process</function> accepting
+    text arguments and parameters encoded into single text strings
+    (e.g. <literal>a=1,b=2</literal>) are also still available.
+   </para>
   </sect3>
  </sect2>
 
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 080d9d2ad9..c14352cb11 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -155,11 +155,11 @@ static int	parse_xml_decl(const xmlChar *str, size_t *lenp,
 static bool print_xml_decl(StringInfo buf, const xmlChar *version,
 						   pg_enc encoding, int standalone);
 static bool xml_doctype_in_content(const xmlChar *str);
-static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
-						   bool preserve_whitespace, int encoding,
-						   XmlOptionType *parsed_xmloptiontype,
-						   xmlNodePtr *parsed_nodes,
-						   Node *escontext);
+xmlDocPtr	xml_parse(text *data, XmlOptionType xmloption_arg,
+					  bool preserve_whitespace, int encoding,
+					  XmlOptionType *parsed_xmloptiontype,
+					  xmlNodePtr *parsed_nodes,
+					  Node *escontext);
 static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
 static int	xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
 								   ArrayBuildState *astate,
@@ -1778,7 +1778,7 @@ xml_doctype_in_content(const xmlChar *str)
  * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
  * yet do not use SAX - see xmlreader.c)
  */
-static xmlDocPtr
+xmlDocPtr
 xml_parse(text *data, XmlOptionType xmloption_arg,
 		  bool preserve_whitespace, int encoding,
 		  XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
@@ -1874,8 +1874,13 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
 							"could not allocate parser context");
 
+			/*
+			 * Setting a dummy "SQL" URL is important for the
+			 * xsltPrintErrorContext() when using the legacy text-based
+			 * xslt_process() variant.
+			 */
 			doc = xmlCtxtReadDoc(ctxt, utf8string,
-								 NULL,	/* no URL */
+								 "SQL",
 								 "UTF-8",
 								 options);
 
-- 
2.49.0

