[PoC] Add CANONICAL option to xmlserialize

Started by Jim Jonesalmost 3 years ago46 messages

jim.jones@uni-muenster.de

almost 3 years ago

1 attachment(s)

Hi,

In order to compare pairs of XML documents for equivalence it is
necessary to convert them first to their canonical form, as described at
W3C Canonical XML 1.1.[1] This spec basically defines a standard
physical representation of xml documents that have more then one
possible representation, so that it is possible to compare them, e.g.
forcing UTF-8 encoding, entity reference replacement, attributes
normalization, etc.

Although it is not part of the XML/SQL standard, it would be nice to
have the option CANONICAL in xmlserialize. Additionally, we could also
add the attribute WITH [NO] COMMENTS to keep or remove xml comments from
the documents.

Something like this:

WITH t(col) AS (
VALUES
('<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE doc SYSTEM "doc.dtd" [
<!ENTITY val "42">
<!ATTLIST xyz attr CDATA "default">
]>

<foo ns:c = "3" ns:b = "2" ns:a = "1"
xmlns:ns="http://postgresql.org">

<bar     xmlns:ns="http://postgresql.org" >&val;</bar     >

<empty/>

<iso8859>©</iso8859>

<xyz> 321 </xyz>
</foo>
'::xml)
)
SELECT xmlserialize(DOCUMENT col AS text CANONICAL) FROM t;
xmlserialize
--------------------------------------------------------------------------------------------------------------------------------------------------------
<foo xmlns:ns="http://postgresql.org" ns:a="1" ns:b="2"
ns:c="3"><bar>42</bar><empty></empty><iso8859>©</iso8859><xyz
attr="default"> 321 </xyz></foo>
(1 row)

-- using WITH COMMENTS

WITH t(col) AS (
VALUES
(' <foo ns:c = "3" ns:b = "2" ns:a = "1"
    xmlns:ns="http://postgresql.org">
    
    <xyz> 321 </xyz>
</foo>'::xml)
)
SELECT xmlserialize(DOCUMENT col AS text CANONICAL WITH COMMENTS) FROM t;
xmlserialize
------------------------------------------------------------------------------------------------------------------------
<foo xmlns:ns="http://postgresql.org" ns:a="1" ns:b="2" ns:c="3"><xyz> 321 </xyz></foo>
(1 row)

Another option would be to simply create a new function, e.g.
xmlcanonical(doc xml, keep_comments boolean), but I'm not sure if this
would be the right approach.

Attached a very short draft. What do you think?

Best, Jim

1- https://www.w3.org/TR/xml-c14n11/

Attachments:

Add-CANONICAL-Option-to-xmlserialize-draft.patchtext/x-patch; charset=UTF-8; name=Add-CANONICAL-Option-to-xmlserialize-draft.patchDownload

diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 19351fe34b..f8f10f0ed9 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -3829,6 +3829,8 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 			{
 				Datum	   *argvalue = op->d.xmlexpr.argvalue;
 				bool	   *argnull = op->d.xmlexpr.argnull;
+				XmlSerializeFormat	format = op->d.xmlexpr.xexpr->format;
+				text	   *data;
 
 				/* argument type is known to be xml */
 				Assert(list_length(xexpr->args) == 1);
@@ -3837,9 +3839,15 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 					return;
 				value = argvalue[0];
 
-				*op->resvalue = PointerGetDatum(xmltotext_with_xmloption(DatumGetXmlP(value),
-																		 xexpr->xmloption));
 				*op->resnull = false;
+
+				data = xmltotext_with_xmloption(DatumGetXmlP(value),
+												xexpr->xmloption);
+
+				if (format == XMLDEFAULT_FORMAT)
+					*op->resvalue = PointerGetDatum(data);
+				else if (format == XMLCANONICAL || format == XMLCANONICAL_WITH_COMMENTS)
+					*op->resvalue = PointerGetDatum(xmlserialize_canonical(data,format));
 			}
 			break;
 
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index a0138382a1..af5f3dfdfd 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -619,6 +619,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <defelt>	xmltable_column_option_el
 %type <list>	xml_namespace_list
 %type <target>	xml_namespace_el
+%type <ival> 	opt_xml_serialize_format
 
 %type <node>	func_application func_expr_common_subexpr
 %type <node>	func_expr func_expr_windowless
@@ -676,7 +677,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONFIGURATION CONFLICT
@@ -15532,13 +15533,14 @@ func_expr_common_subexpr:
 					$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 									 list_make3($3, $5, $6), @1);
 				}
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename ')'
+			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')'
 				{
 					XmlSerialize *n = makeNode(XmlSerialize);
 
 					n->xmloption = $3;
 					n->expr = $4;
 					n->typeName = $6;
+					n->format = $7;
 					n->location = @1;
 					$$ = (Node *) n;
 				}
@@ -15622,6 +15624,12 @@ xml_passing_mech:
 			| BY VALUE_P
 		;
 
+opt_xml_serialize_format:
+			CANONICAL								{ $$ = XMLCANONICAL; }
+			| CANONICAL WITH NO COMMENTS			{ $$ = XMLCANONICAL; }
+			| CANONICAL WITH COMMENTS				{ $$ = XMLCANONICAL_WITH_COMMENTS; }
+			| /*EMPTY*/								{ $$ = XMLDEFAULT_FORMAT; }
+		;
 
 /*
  * Aggregate decoration clauses
@@ -16737,6 +16745,7 @@ unreserved_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CATALOG_P
@@ -17259,6 +17268,7 @@ bare_label_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CASE
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 7ff41acb84..ddfbfe259d 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2332,6 +2332,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
 
 	xexpr->xmloption = xs->xmloption;
 	xexpr->location = xs->location;
+	xexpr->format = xs->format;
 	/* We actually only need these to be able to parse back the expression. */
 	xexpr->type = targetType;
 	xexpr->typmod = targetTypmod;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 079bcb1208..f5c4ee520c 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -56,6 +56,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -4818,3 +4819,59 @@ XmlTableDestroyOpaque(TableFuncScanState *state)
 	NO_XML_SUPPORT();
 #endif							/* not USE_LIBXML */
 }
+
+xmltype *
+xmlserialize_canonical(text *data, XmlSerializeFormat format)
+{
+#ifdef USE_LIBXML
+
+	xmlDocPtr   doc;
+	xmlChar    *xmlbuf = NULL;
+	int         nbytes;
+	StringInfoData buf;
+
+	if (format != XMLCANONICAL && format != XMLCANONICAL_WITH_COMMENTS)
+		elog(ERROR,"invalid canonical xml option");
+
+	doc = xml_parse(data, XMLOPTION_DOCUMENT, false, GetDatabaseEncoding(), NULL);
+
+	if(!doc)
+		elog(ERROR, "could not parse the given XML document");
+
+	/*
+	* int
+	* xmlC14NDocDumpMemory (
+	*   xmlDocPtr doc,                    # the XML document for canonization
+	*   xmlNodeSetPtr nodes,              # the nodes set to be included in the canonized image
+	*                                       or NULL if all document nodes should be included
+	*	 int mode,                        # 0 = Original C14N 1.0  (Outdated)
+	*	                                    1 = Exclusive C14N 1.0 (Outdated)
+	*                                       2 = C14N 1.1
+	*	 xmlChar **inclusive_ns_prefixes, # the list of inclusive namespace prefixes ended with
+	*	                                     a NULL or NULL if there is no inclusive namespaces
+	*	                                     (only for exclusive canonicalization, ignored otherwise)
+	*	 int with_comments,               # include comments in the result (!=0) or not (==0)
+	*	 xmlChar **xmlbuf                 # the memory pointer for allocated canonical XML text;
+	*	)
+	* Returns: the number of bytes written on success or a negative value on fail.
+	*/
+
+	nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, format, &xmlbuf);
+
+	xmlFreeDoc(doc);
+
+	if(nbytes < 0)
+		elog(ERROR,"could not canonicalize the given XML document");
+
+	initStringInfo(&buf);
+	appendStringInfoString(&buf, (const char *) xmlbuf);
+
+	xmlFree(xmlbuf);
+
+	return stringinfo_to_xmltype(&buf);
+
+#else
+	NO_XML_SUPPORT();
+	return 0;
+#endif
+}
\ No newline at end of file
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index f7d7f10f7d..c75fec17ba 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -842,6 +842,7 @@ typedef struct XmlSerialize
 	Node	   *expr;
 	TypeName   *typeName;
 	int			location;		/* token location, or -1 if unknown */
+	XmlSerializeFormat	format;
 } XmlSerialize;
 
 /* Partitioning related definitions */
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index b4292253cc..f36d79fb14 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1471,6 +1471,13 @@ typedef enum XmlOptionType
 	XMLOPTION_CONTENT
 } XmlOptionType;
 
+typedef enum XmlSerializeFormat
+{
+	XMLCANONICAL,
+	XMLCANONICAL_WITH_COMMENTS,
+	XMLDEFAULT_FORMAT
+} XmlSerializeFormat;
+
 typedef struct XmlExpr
 {
 	Expr		xpr;
@@ -1491,6 +1498,8 @@ typedef struct XmlExpr
 	int32		typmod pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	int			location;
+	/* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */
+	XmlSerializeFormat format pg_node_attr(query_jumble_ignore);
 } XmlExpr;
 
 /* ----------------
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index bb36213e6f..c1b1a720fe 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -67,6 +67,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index 311da06cd6..745ebefe24 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -90,4 +90,5 @@ extern PGDLLIMPORT int xmloption;	/* XmlOptionType, but int for guc enum */
 
 extern PGDLLIMPORT const TableFuncRoutine XmlTableRoutine;
 
+xmltype *xmlserialize_canonical(text *data, XmlSerializeFormat format);
 #endif							/* XML_H */

Jim Jones

jim.jones@uni-muenster.de

almost 3 years ago

In reply to: Jim Jones (#1)

1 attachment(s)

[PATCH] Add CANONICAL option to xmlserialize

On 27.02.23 14:16, I wrote:

Hi,

In order to compare pairs of XML documents for equivalence it is
necessary to convert them first to their canonical form, as described
at W3C Canonical XML 1.1.[1] This spec basically defines a standard
physical representation of xml documents that have more then one
possible representation, so that it is possible to compare them, e.g.
forcing UTF-8 encoding, entity reference replacement, attributes
normalization, etc.

Although it is not part of the XML/SQL standard, it would be nice to
have the option CANONICAL in xmlserialize. Additionally, we could also
add the attribute WITH [NO] COMMENTS to keep or remove xml comments
from the documents.

Something like this:

WITH t(col) AS (
VALUES
('<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE doc SYSTEM "doc.dtd" [
<!ENTITY val "42">
<!ATTLIST xyz attr CDATA "default">
]>


<foo ns:c = "3" ns:b = "2" ns:a = "1"
    xmlns:ns="http://postgresql.org">

    
    
<bar     xmlns:ns="http://postgresql.org" >&val;</bar     >

    
    <empty/>

    
    <iso8859>©</iso8859>

    
    
    <xyz> 321 </xyz>
</foo>
'::xml)
)
SELECT xmlserialize(DOCUMENT col AS text CANONICAL) FROM t;
xmlserialize
--------------------------------------------------------------------------------------------------------------------------------------------------------

<foo xmlns:ns="http://postgresql.org" ns:a="1" ns:b="2"
ns:c="3"><bar>42</bar><empty></empty><iso8859>©</iso8859><xyz
attr="default"> 321 </xyz></foo>
(1 row)

-- using WITH COMMENTS

WITH t(col) AS (
VALUES
(' <foo ns:c = "3" ns:b = "2" ns:a = "1"
    xmlns:ns="http://postgresql.org">
    
    <xyz> 321 </xyz>
</foo>'::xml)
)
SELECT xmlserialize(DOCUMENT col AS text CANONICAL WITH COMMENTS) FROM t;
xmlserialize
------------------------------------------------------------------------------------------------------------------------

<foo xmlns:ns="http://postgresql.org" ns:a="1" ns:b="2" ns:c="3"><xyz> 321 </xyz></foo>
(1 row)

Another option would be to simply create a new function, e.g.
xmlcanonical(doc xml, keep_comments boolean), but I'm not sure if this
would be the right approach.

Attached a very short draft. What do you think?

Best, Jim

1- https://www.w3.org/TR/xml-c14n11/

The attached version includes documentation and tests to the patch.

I hope things are clearer now :)

Best, Jim

Attachments:

v1-0001-Add-CANONICAL-format-to-xmlserialize.patchtext/x-patch; charset=UTF-8; name=v1-0001-Add-CANONICAL-format-to-xmlserialize.patchDownload

From 1a3b8bc66c451863ace0488d32f1c0a876ab8f04 Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Tue, 28 Feb 2023 23:06:30 +0100
Subject: [PATCH v1] Add CANONICAL format to xmlserialize

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output. This feature
is based on the function xmlC14NDocDumpMemory from the C14N module
of libxml2.

This patch also includes regression tests and documentation.
---
 doc/src/sgml/datatype.sgml            |  41 +++++++++-
 src/backend/executor/execExprInterp.c |  12 ++-
 src/backend/parser/gram.y             |  14 +++-
 src/backend/parser/parse_expr.c       |   1 +
 src/backend/utils/adt/xml.c           |  60 ++++++++++++++
 src/include/nodes/parsenodes.h        |   1 +
 src/include/nodes/primnodes.h         |   9 +++
 src/include/parser/kwlist.h           |   1 +
 src/include/utils/xml.h               |   1 +
 src/test/regress/expected/xml.out     | 108 ++++++++++++++++++++++++++
 src/test/regress/expected/xml_1.out   | 104 +++++++++++++++++++++++++
 src/test/regress/expected/xml_2.out   | 108 ++++++++++++++++++++++++++
 src/test/regress/sql/xml.sql          |  61 +++++++++++++++
 13 files changed, 516 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 467b49b199..46ec95dbb8 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4460,7 +4460,7 @@ xml '<foo>bar</foo>'
     <type>xml</type>, uses the function
     <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
 <synopsis>
-XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> )
+XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ CANONICAL [ WITH [NO] COMMENTS ] ])
 </synopsis>
     <replaceable>type</replaceable> can be
     <type>character</type>, <type>character varying</type>, or
@@ -4470,6 +4470,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <repla
     you to simply cast the value.
    </para>
 
+   <para>
+    The option <type>CANONICAL</type> converts a given
+     XML document to its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>
+     based on the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>.
+     It is basically designed to provide applications the ability to compare xml documents or test if they
+     have been changed. The optional parameter <type>WITH [NO] COMMENTS</type> removes or keeps XML comments
+     from the given document.
+    </para>
+
+    <para>
+     Example:
+
+<screen><![CDATA[
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL);
+                       xmlserialize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL WITH COMMENTS);
+                                xmlserialize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+]]></screen>
+   </para>
    <para>
     When a character string value is cast to or from type
     <type>xml</type> without going through <type>XMLPARSE</type> or
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 19351fe34b..f8f10f0ed9 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -3829,6 +3829,8 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 			{
 				Datum	   *argvalue = op->d.xmlexpr.argvalue;
 				bool	   *argnull = op->d.xmlexpr.argnull;
+				XmlSerializeFormat	format = op->d.xmlexpr.xexpr->format;
+				text	   *data;
 
 				/* argument type is known to be xml */
 				Assert(list_length(xexpr->args) == 1);
@@ -3837,9 +3839,15 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 					return;
 				value = argvalue[0];
 
-				*op->resvalue = PointerGetDatum(xmltotext_with_xmloption(DatumGetXmlP(value),
-																		 xexpr->xmloption));
 				*op->resnull = false;
+
+				data = xmltotext_with_xmloption(DatumGetXmlP(value),
+												xexpr->xmloption);
+
+				if (format == XMLDEFAULT_FORMAT)
+					*op->resvalue = PointerGetDatum(data);
+				else if (format == XMLCANONICAL || format == XMLCANONICAL_WITH_COMMENTS)
+					*op->resvalue = PointerGetDatum(xmlserialize_canonical(data,format));
 			}
 			break;
 
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index a0138382a1..af5f3dfdfd 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -619,6 +619,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <defelt>	xmltable_column_option_el
 %type <list>	xml_namespace_list
 %type <target>	xml_namespace_el
+%type <ival> 	opt_xml_serialize_format
 
 %type <node>	func_application func_expr_common_subexpr
 %type <node>	func_expr func_expr_windowless
@@ -676,7 +677,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONFIGURATION CONFLICT
@@ -15532,13 +15533,14 @@ func_expr_common_subexpr:
 					$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 									 list_make3($3, $5, $6), @1);
 				}
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename ')'
+			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')'
 				{
 					XmlSerialize *n = makeNode(XmlSerialize);
 
 					n->xmloption = $3;
 					n->expr = $4;
 					n->typeName = $6;
+					n->format = $7;
 					n->location = @1;
 					$$ = (Node *) n;
 				}
@@ -15622,6 +15624,12 @@ xml_passing_mech:
 			| BY VALUE_P
 		;
 
+opt_xml_serialize_format:
+			CANONICAL								{ $$ = XMLCANONICAL; }
+			| CANONICAL WITH NO COMMENTS			{ $$ = XMLCANONICAL; }
+			| CANONICAL WITH COMMENTS				{ $$ = XMLCANONICAL_WITH_COMMENTS; }
+			| /*EMPTY*/								{ $$ = XMLDEFAULT_FORMAT; }
+		;
 
 /*
  * Aggregate decoration clauses
@@ -16737,6 +16745,7 @@ unreserved_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CATALOG_P
@@ -17259,6 +17268,7 @@ bare_label_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CASE
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 7ff41acb84..ddfbfe259d 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2332,6 +2332,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
 
 	xexpr->xmloption = xs->xmloption;
 	xexpr->location = xs->location;
+	xexpr->format = xs->format;
 	/* We actually only need these to be able to parse back the expression. */
 	xexpr->type = targetType;
 	xexpr->typmod = targetTypmod;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 079bcb1208..e0119d5ce6 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -56,6 +56,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -4818,3 +4819,62 @@ XmlTableDestroyOpaque(TableFuncScanState *state)
 	NO_XML_SUPPORT();
 #endif							/* not USE_LIBXML */
 }
+
+xmltype *
+xmlserialize_canonical(text *data, XmlSerializeFormat format)
+{
+#ifdef USE_LIBXML
+
+	xmlDocPtr   doc;
+	xmlChar    *xmlbuf = NULL;
+	int         nbytes;
+	int         with_comments = 0; /* 0 = remove xml comments (default) */
+	StringInfoData buf;
+
+	if (format != XMLCANONICAL && format != XMLCANONICAL_WITH_COMMENTS)
+		elog(ERROR,"invalid canonical xml option");
+	else if (format == XMLCANONICAL_WITH_COMMENTS)
+		with_comments = 1;
+
+	doc = xml_parse(data, XMLOPTION_DOCUMENT, false, GetDatabaseEncoding(), NULL);
+
+	if(!doc)
+		elog(ERROR, "could not parse the given XML document");
+
+	/*
+	* int
+	* xmlC14NDocDumpMemory (
+	*   xmlDocPtr doc,                   # the XML document for canonization
+	*   xmlNodeSetPtr nodes,             # the nodes set to be included in the canonized image
+	*                                      or NULL if all document nodes should be included
+	*   int mode,                        # 0 = Original C14N 1.0  (Outdated)
+	*                                      1 = Exclusive C14N 1.0 (Outdated)
+	*                                      2 = C14N 1.1
+	*   xmlChar **inclusive_ns_prefixes, # the list of inclusive namespace prefixes ended with
+	*                                      a NULL or NULL if there is no inclusive namespaces
+	*                                      (only for exclusive canonicalization, ignored otherwise)
+	*   int with_comments,               # include comments in the result (!=0) or not (==0)
+	*   xmlChar **xmlbuf                 # the memory pointer for allocated canonical XML text;
+	* )
+	* Returns: the number of bytes written on success or a negative value on fail.
+	*/
+
+	nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, with_comments, &xmlbuf);
+
+	xmlFreeDoc(doc);
+
+	if(nbytes < 0)
+		elog(ERROR,"could not canonicalize the given XML document");
+
+	initStringInfo(&buf);
+	appendStringInfoString(&buf, (const char *) xmlbuf);
+
+	xmlFree(xmlbuf);
+
+	return stringinfo_to_xmltype(&buf);
+
+#else
+	NO_XML_SUPPORT();
+	return 0;
+#endif
+}
\ No newline at end of file
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index f7d7f10f7d..8ba0984266 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -842,6 +842,7 @@ typedef struct XmlSerialize
 	Node	   *expr;
 	TypeName   *typeName;
 	int			location;		/* token location, or -1 if unknown */
+	XmlSerializeFormat	format;	/* serialization format */
 } XmlSerialize;
 
 /* Partitioning related definitions */
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index b4292253cc..79fdcc2650 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1471,6 +1471,13 @@ typedef enum XmlOptionType
 	XMLOPTION_CONTENT
 } XmlOptionType;
 
+typedef enum XmlSerializeFormat
+{
+	XMLCANONICAL,				/* canonical form without xml comments */
+	XMLCANONICAL_WITH_COMMENTS,	/* canonical form with xml comments */
+	XMLDEFAULT_FORMAT			/* unformatted xml representation */
+} XmlSerializeFormat;
+
 typedef struct XmlExpr
 {
 	Expr		xpr;
@@ -1491,6 +1498,8 @@ typedef struct XmlExpr
 	int32		typmod pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	int			location;
+	/* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */
+	XmlSerializeFormat format pg_node_attr(query_jumble_ignore);
 } XmlExpr;
 
 /* ----------------
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index bb36213e6f..c1b1a720fe 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -67,6 +67,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index 311da06cd6..745ebefe24 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -90,4 +90,5 @@ extern PGDLLIMPORT int xmloption;	/* XmlOptionType, but int for guc enum */
 
 extern PGDLLIMPORT const TableFuncRoutine XmlTableRoutine;
 
+xmltype *xmlserialize_canonical(text *data, XmlSerializeFormat format);
 #endif							/* XML_H */
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 3c357a9c7e..de3bfabcef 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -486,6 +486,114 @@ SELECT xmlserialize(content 'good' as char(10));
 
 SELECT xmlserialize(document 'bad' as text);
 ERROR:  not an XML document
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#163;&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                      
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>£1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                      
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>£1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 378b412db0..8b5be34c50 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -309,6 +309,110 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmlserialize(document 'bad' as text);
                                      ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#163;&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+ERROR:  unsupported XML feature
+LINE 2:   (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+             ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 ERROR:  unsupported XML feature
 LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index 42055c5003..9feeb9301e 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -466,6 +466,114 @@ SELECT xmlserialize(content 'good' as char(10));
 
 SELECT xmlserialize(document 'bad' as text);
 ERROR:  not an XML document
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#163;&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                      
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>£1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                      
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>£1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index ddff459297..32bc650a9d 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -132,6 +132,67 @@ SELECT xmlserialize(content data as character varying(20)) FROM xmltest;
 SELECT xmlserialize(content 'good' as char(10));
 SELECT xmlserialize(document 'bad' as text);
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#163;&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+\set VERBOSITY default
 
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
-- 
2.25.1

Thomas Munro

thomas.munro@gmail.com

almost 3 years ago

In reply to: Jim Jones (#2)

Re: [PATCH] Add CANONICAL option to xmlserialize

On Mon, Mar 6, 2023 at 7:44 AM Jim Jones <jim.jones@uni-muenster.de> wrote:

The attached version includes documentation and tests to the patch.

The CI run for that failed in an interesting way, only on Debian +
Meson, 32 bit. The diffs appear to show that psql has a different
opinion of the column width, while building its header (the "------"
you get at the top of psql's output), even though the actual column
contents was the same. regression.diff[2]https://api.cirrus-ci.com/v1/artifact/task/5800598633709568/testrun/build-32/testrun/regress/regress/regression.diffs shows that there is a "Â£1"
in the output, which is how UTF-8 "£1" looks if you view it with
Latin1 glasses on. Clearly this patch involves transcoding, Latin1
and UTF-8 and I haven't studied it, but it's pretty weird for the 32
bit build to give a different result... could be something to do with
our environment, since .cirrus.yml sets LANG=C in the 32 bit test run
-- maybe try that locally?

That run also generated a core dump, but I think that's just our open
SIGQUIT problem[3]/messages/by-id/20230214202927.xgb2w6b7gnhq6tvv@awork3.anarazel.de and not relevant here.

[1]: https://cirrus-ci.com/build/6319462375227392
[2]: https://api.cirrus-ci.com/v1/artifact/task/5800598633709568/testrun/build-32/testrun/regress/regress/regression.diffs
[3]: /messages/by-id/20230214202927.xgb2w6b7gnhq6tvv@awork3.anarazel.de

Jim Jones

jim.jones@uni-muenster.de

almost 3 years ago

In reply to: Thomas Munro (#3)

Re: [PATCH] Add CANONICAL option to xmlserialize

On 05.03.23 22:00, Thomas Munro wrote:

The CI run for that failed in an interesting way, only on Debian +
Meson, 32 bit. The diffs appear to show that psql has a different
opinion of the column width, while building its header (the "------"
you get at the top of psql's output), even though the actual column
contents was the same. regression.diff[2] shows that there is a "Â£1"
in the output, which is how UTF-8 "£1" looks if you view it with
Latin1 glasses on. Clearly this patch involves transcoding, Latin1
and UTF-8

One of the use cases of this patch is exactly the transcoding of a non
utf-8 document to utf-8 - as described in the XML canonical spec.

and I haven't studied it, but it's pretty weird for the 32
bit build to give a different result...

Yeah, it's pretty weird indeed. I'll try to reproduce this environment
in a container to see if I get the same diff. Although I'm not sure that
by "fixing" the result set for this environment it won't break all the
others.

could be something to do with
our environment, since .cirrus.yml sets LANG=C in the 32 bit test run
-- maybe try that locally?

Also using LANGUAGE=C the result is the same for me - all tests pass
just fine.

That run also generated a core dump, but I think that's just our open
SIGQUIT problem[3] and not relevant here.

[1] https://cirrus-ci.com/build/6319462375227392
[2] https://api.cirrus-ci.com/v1/artifact/task/5800598633709568/testrun/build-32/testrun/regress/regress/regression.diffs
[3] /messages/by-id/20230214202927.xgb2w6b7gnhq6tvv@awork3.anarazel.de

Thanks for the quick reply. Much appreciated!

Thomas Munro

thomas.munro@gmail.com

almost 3 years ago

In reply to: Jim Jones (#4)

Re: [PATCH] Add CANONICAL option to xmlserialize

On Mon, Mar 6, 2023 at 11:20 AM Jim Jones <jim.jones@uni-muenster.de> wrote:

On 05.03.23 22:00, Thomas Munro wrote:

could be something to do with
our environment, since .cirrus.yml sets LANG=C in the 32 bit test run
-- maybe try that locally?

Also using LANGUAGE=C the result is the same for me - all tests pass
just fine.

I couldn't reproduce that locally either, but I just tested on CI with
your patch applied saw the failure, and then removed
"PYTHONCOERCECLOCALE=0 LANG=C" and it's all green:

https://github.com/macdice/postgres/commit/91999f5d13ac2df6f7237a301ed6cf73f2bb5b6d

Without looking too closely, my first guess would have been that this
just isn't going to work without UTF-8 database encoding, so you might
need to skip the test (see for example
src/test/regress/expected/unicode_1.out). It's annoying that "xml"
already has 3 expected variants... hmm. BTW shouldn't it be failing
in a more explicit way somewhere sooner if the database encoding is
not UTF-8, rather than getting confused?

Jim Jones

jim.jones@uni-muenster.de

almost 3 years ago

In reply to: Thomas Munro (#5)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

On 06.03.23 00:32, Thomas Munro wrote:

I couldn't reproduce that locally either, but I just tested on CI with
your patch applied saw the failure, and then removed
"PYTHONCOERCECLOCALE=0 LANG=C" and it's all green:

https://github.com/macdice/postgres/commit/91999f5d13ac2df6f7237a301ed6cf73f2bb5b6d

Without looking too closely, my first guess would have been that this
just isn't going to work without UTF-8 database encoding, so you might
need to skip the test (see for example
src/test/regress/expected/unicode_1.out). It's annoying that "xml"
already has 3 expected variants... hmm. BTW shouldn't it be failing
in a more explicit way somewhere sooner if the database encoding is
not UTF-8, rather than getting confused?

I guess this confusion is happening because xml_parse() was being called
with the database encoding from GetDatabaseEncoding().

I added a condition before calling xml_parse() to check if the xml
document has a different encoding than UTF-8

parse_xml_decl(xml_text2xmlChar(data), NULL, NULL, &encodingStr, NULL);
encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;

doc = xml_parse(data, XMLOPTION_DOCUMENT, false, encoding, NULL);

v2 attached.

Thanks!

Best, Jim

Attachments:

v2-0001-Add-CANONICAL-format-to-xmlserialize.patchtext/x-patch; charset=UTF-8; name=v2-0001-Add-CANONICAL-format-to-xmlserialize.patchDownload

From 23ed616809b319bb175473bd210d81ad95af21ff Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Tue, 28 Feb 2023 23:06:30 +0100
Subject: [PATCH v2] Add CANONICAL format to xmlserialize

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output. This feature
is based on the function xmlC14NDocDumpMemory from the C14N module
of libxml2.

This patch also includes regression tests and documentation.
---
 doc/src/sgml/datatype.sgml            |  41 +++++++++-
 src/backend/executor/execExprInterp.c |  12 ++-
 src/backend/parser/gram.y             |  14 +++-
 src/backend/parser/parse_expr.c       |   1 +
 src/backend/utils/adt/xml.c           |  70 +++++++++++++++++
 src/include/nodes/parsenodes.h        |   1 +
 src/include/nodes/primnodes.h         |   9 +++
 src/include/parser/kwlist.h           |   1 +
 src/include/utils/xml.h               |   1 +
 src/test/regress/expected/xml.out     | 108 ++++++++++++++++++++++++++
 src/test/regress/expected/xml_1.out   | 104 +++++++++++++++++++++++++
 src/test/regress/expected/xml_2.out   | 108 ++++++++++++++++++++++++++
 src/test/regress/sql/xml.sql          |  61 +++++++++++++++
 13 files changed, 526 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 467b49b199..46ec95dbb8 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4460,7 +4460,7 @@ xml '<foo>bar</foo>'
     <type>xml</type>, uses the function
     <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
 <synopsis>
-XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> )
+XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ CANONICAL [ WITH [NO] COMMENTS ] ])
 </synopsis>
     <replaceable>type</replaceable> can be
     <type>character</type>, <type>character varying</type>, or
@@ -4470,6 +4470,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <repla
     you to simply cast the value.
    </para>
 
+   <para>
+    The option <type>CANONICAL</type> converts a given
+     XML document to its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>
+     based on the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>.
+     It is basically designed to provide applications the ability to compare xml documents or test if they
+     have been changed. The optional parameter <type>WITH [NO] COMMENTS</type> removes or keeps XML comments
+     from the given document.
+    </para>
+
+    <para>
+     Example:
+
+<screen><![CDATA[
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL);
+                       xmlserialize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL WITH COMMENTS);
+                                xmlserialize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+]]></screen>
+   </para>
    <para>
     When a character string value is cast to or from type
     <type>xml</type> without going through <type>XMLPARSE</type> or
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 19351fe34b..f8f10f0ed9 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -3829,6 +3829,8 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 			{
 				Datum	   *argvalue = op->d.xmlexpr.argvalue;
 				bool	   *argnull = op->d.xmlexpr.argnull;
+				XmlSerializeFormat	format = op->d.xmlexpr.xexpr->format;
+				text	   *data;
 
 				/* argument type is known to be xml */
 				Assert(list_length(xexpr->args) == 1);
@@ -3837,9 +3839,15 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 					return;
 				value = argvalue[0];
 
-				*op->resvalue = PointerGetDatum(xmltotext_with_xmloption(DatumGetXmlP(value),
-																		 xexpr->xmloption));
 				*op->resnull = false;
+
+				data = xmltotext_with_xmloption(DatumGetXmlP(value),
+												xexpr->xmloption);
+
+				if (format == XMLDEFAULT_FORMAT)
+					*op->resvalue = PointerGetDatum(data);
+				else if (format == XMLCANONICAL || format == XMLCANONICAL_WITH_COMMENTS)
+					*op->resvalue = PointerGetDatum(xmlserialize_canonical(data,format));
 			}
 			break;
 
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index a0138382a1..af5f3dfdfd 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -619,6 +619,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <defelt>	xmltable_column_option_el
 %type <list>	xml_namespace_list
 %type <target>	xml_namespace_el
+%type <ival> 	opt_xml_serialize_format
 
 %type <node>	func_application func_expr_common_subexpr
 %type <node>	func_expr func_expr_windowless
@@ -676,7 +677,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONFIGURATION CONFLICT
@@ -15532,13 +15533,14 @@ func_expr_common_subexpr:
 					$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 									 list_make3($3, $5, $6), @1);
 				}
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename ')'
+			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')'
 				{
 					XmlSerialize *n = makeNode(XmlSerialize);
 
 					n->xmloption = $3;
 					n->expr = $4;
 					n->typeName = $6;
+					n->format = $7;
 					n->location = @1;
 					$$ = (Node *) n;
 				}
@@ -15622,6 +15624,12 @@ xml_passing_mech:
 			| BY VALUE_P
 		;
 
+opt_xml_serialize_format:
+			CANONICAL								{ $$ = XMLCANONICAL; }
+			| CANONICAL WITH NO COMMENTS			{ $$ = XMLCANONICAL; }
+			| CANONICAL WITH COMMENTS				{ $$ = XMLCANONICAL_WITH_COMMENTS; }
+			| /*EMPTY*/								{ $$ = XMLDEFAULT_FORMAT; }
+		;
 
 /*
  * Aggregate decoration clauses
@@ -16737,6 +16745,7 @@ unreserved_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CATALOG_P
@@ -17259,6 +17268,7 @@ bare_label_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CASE
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 7ff41acb84..ddfbfe259d 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2332,6 +2332,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
 
 	xexpr->xmloption = xs->xmloption;
 	xexpr->location = xs->location;
+	xexpr->format = xs->format;
 	/* We actually only need these to be able to parse back the expression. */
 	xexpr->type = targetType;
 	xexpr->typmod = targetTypmod;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 079bcb1208..f618be524b 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -56,6 +56,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -4818,3 +4819,72 @@ XmlTableDestroyOpaque(TableFuncScanState *state)
 	NO_XML_SUPPORT();
 #endif							/* not USE_LIBXML */
 }
+
+xmltype *
+xmlserialize_canonical(text *data, XmlSerializeFormat format)
+{
+#ifdef USE_LIBXML
+
+	xmlDocPtr   doc;
+	xmlChar    *xmlbuf = NULL;
+	xmlChar    *encodingStr = NULL;
+	int         nbytes;
+	int         with_comments = 0; /* 0 = remove xml comments (default) */
+	int         encoding;
+	StringInfoData buf;
+
+
+	if (format != XMLCANONICAL && format != XMLCANONICAL_WITH_COMMENTS)
+		elog(ERROR,"invalid canonical xml option");
+	else if (format == XMLCANONICAL_WITH_COMMENTS)
+		with_comments = 1;
+
+	/*
+	 * Parses the encoding from the header, so that xml_parse() knows what
+	 * it is dealing with. If not explicitly declared, we assume UTF-8.
+	 */
+	parse_xml_decl(xml_text2xmlChar(data), NULL, NULL, &encodingStr, NULL);
+	encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
+
+	doc = xml_parse(data, XMLOPTION_DOCUMENT, false, encoding, NULL);
+
+	if(!doc)
+		elog(ERROR, "could not parse the given XML document");
+
+	/*
+	* int
+	* xmlC14NDocDumpMemory (
+	*   xmlDocPtr doc,                   # the XML document for canonization
+	*   xmlNodeSetPtr nodes,             # the nodes set to be included in the canonized image
+	*                                      or NULL if all document nodes should be included
+	*   int mode,                        # 0 = Original C14N 1.0  (Outdated)
+	*                                      1 = Exclusive C14N 1.0 (Outdated)
+	*                                      2 = C14N 1.1
+	*   xmlChar **inclusive_ns_prefixes, # the list of inclusive namespace prefixes ended with
+	*                                      a NULL or NULL if there is no inclusive namespaces
+	*                                      (only for exclusive canonicalization, ignored otherwise)
+	*   int with_comments,               # include comments in the result (!=0) or not (==0)
+	*   xmlChar **xmlbuf                 # the memory pointer for allocated canonical XML text;
+	* )
+	* Returns: the number of bytes written on success or a negative value on fail.
+	*/
+
+	nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, with_comments, &xmlbuf);
+
+	xmlFreeDoc(doc);
+
+	if(nbytes < 0)
+		elog(ERROR,"could not canonicalize the given XML document");
+
+	initStringInfo(&buf);
+	appendStringInfoString(&buf, (const char *) xmlbuf);
+
+	xmlFree(xmlbuf);
+
+	return stringinfo_to_xmltype(&buf);
+
+#else
+	NO_XML_SUPPORT();
+	return 0;
+#endif
+}
\ No newline at end of file
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index f7d7f10f7d..8ba0984266 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -842,6 +842,7 @@ typedef struct XmlSerialize
 	Node	   *expr;
 	TypeName   *typeName;
 	int			location;		/* token location, or -1 if unknown */
+	XmlSerializeFormat	format;	/* serialization format */
 } XmlSerialize;
 
 /* Partitioning related definitions */
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index b4292253cc..79fdcc2650 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1471,6 +1471,13 @@ typedef enum XmlOptionType
 	XMLOPTION_CONTENT
 } XmlOptionType;
 
+typedef enum XmlSerializeFormat
+{
+	XMLCANONICAL,				/* canonical form without xml comments */
+	XMLCANONICAL_WITH_COMMENTS,	/* canonical form with xml comments */
+	XMLDEFAULT_FORMAT			/* unformatted xml representation */
+} XmlSerializeFormat;
+
 typedef struct XmlExpr
 {
 	Expr		xpr;
@@ -1491,6 +1498,8 @@ typedef struct XmlExpr
 	int32		typmod pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	int			location;
+	/* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */
+	XmlSerializeFormat format pg_node_attr(query_jumble_ignore);
 } XmlExpr;
 
 /* ----------------
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index bb36213e6f..c1b1a720fe 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -67,6 +67,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index 311da06cd6..745ebefe24 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -90,4 +90,5 @@ extern PGDLLIMPORT int xmloption;	/* XmlOptionType, but int for guc enum */
 
 extern PGDLLIMPORT const TableFuncRoutine XmlTableRoutine;
 
+xmltype *xmlserialize_canonical(text *data, XmlSerializeFormat format);
 #endif							/* XML_H */
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 3c357a9c7e..de3bfabcef 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -486,6 +486,114 @@ SELECT xmlserialize(content 'good' as char(10));
 
 SELECT xmlserialize(document 'bad' as text);
 ERROR:  not an XML document
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#163;&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                      
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>£1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                      
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>£1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 378b412db0..8b5be34c50 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -309,6 +309,110 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmlserialize(document 'bad' as text);
                                      ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#163;&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+ERROR:  unsupported XML feature
+LINE 2:   (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+             ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 ERROR:  unsupported XML feature
 LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index 42055c5003..9feeb9301e 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -466,6 +466,114 @@ SELECT xmlserialize(content 'good' as char(10));
 
 SELECT xmlserialize(document 'bad' as text);
 ERROR:  not an XML document
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#163;&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                      
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>£1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                      
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>£1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index ddff459297..32bc650a9d 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -132,6 +132,67 @@ SELECT xmlserialize(content data as character varying(20)) FROM xmltest;
 SELECT xmlserialize(content 'good' as char(10));
 SELECT xmlserialize(document 'bad' as text);
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#163;&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+\set VERBOSITY default
 
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
-- 
2.25.1

Jim Jones

jim.jones@uni-muenster.de

almost 3 years ago

In reply to: Jim Jones (#6)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

On 06.03.23 11:50, I wrote:

I guess this confusion is happening because xml_parse() was being
called with the database encoding from GetDatabaseEncoding().

I added a condition before calling xml_parse() to check if the xml
document has a different encoding than UTF-8

parse_xml_decl(xml_text2xmlChar(data), NULL, NULL, &encodingStr, NULL);
encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;

doc = xml_parse(data, XMLOPTION_DOCUMENT, false, encoding, NULL);

It seems that this bug fix didn't change the output of the CI on Debian
+ Meson, 32bit.

I slightly changed the test case to a character that both encodings can
deal with.

v3 attached.

Attachments:

v3-0001-Add-CANONICAL-format-to-xmlserialize.patchtext/x-patch; charset=UTF-8; name=v3-0001-Add-CANONICAL-format-to-xmlserialize.patchDownload

From 759bacecb7e9355f41ebd02cbca38cf893e3cfc0 Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Mon, 6 Mar 2023 14:08:54 +0100
Subject: [PATCH v3] Add CANONICAL format to xmlserialize

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output. This feature
is based on the function xmlC14NDocDumpMemory from the C14N module
of libxml2.

This patch also includes regression tests and documentation.
---
 doc/src/sgml/datatype.sgml            |  41 +++++++++-
 src/backend/executor/execExprInterp.c |  12 ++-
 src/backend/parser/gram.y             |  14 +++-
 src/backend/parser/parse_expr.c       |   1 +
 src/backend/utils/adt/xml.c           |  70 +++++++++++++++++
 src/include/nodes/parsenodes.h        |   1 +
 src/include/nodes/primnodes.h         |   9 +++
 src/include/parser/kwlist.h           |   1 +
 src/include/utils/xml.h               |   1 +
 src/test/regress/expected/xml.out     | 108 ++++++++++++++++++++++++++
 src/test/regress/expected/xml_1.out   | 104 +++++++++++++++++++++++++
 src/test/regress/expected/xml_2.out   | 108 ++++++++++++++++++++++++++
 src/test/regress/sql/xml.sql          |  61 +++++++++++++++
 13 files changed, 526 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 467b49b199..46ec95dbb8 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4460,7 +4460,7 @@ xml '<foo>bar</foo>'
     <type>xml</type>, uses the function
     <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
 <synopsis>
-XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> )
+XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ CANONICAL [ WITH [NO] COMMENTS ] ])
 </synopsis>
     <replaceable>type</replaceable> can be
     <type>character</type>, <type>character varying</type>, or
@@ -4470,6 +4470,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <repla
     you to simply cast the value.
    </para>
 
+   <para>
+    The option <type>CANONICAL</type> converts a given
+     XML document to its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>
+     based on the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>.
+     It is basically designed to provide applications the ability to compare xml documents or test if they
+     have been changed. The optional parameter <type>WITH [NO] COMMENTS</type> removes or keeps XML comments
+     from the given document.
+    </para>
+
+    <para>
+     Example:
+
+<screen><![CDATA[
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL);
+                       xmlserialize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL WITH COMMENTS);
+                                xmlserialize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+]]></screen>
+   </para>
    <para>
     When a character string value is cast to or from type
     <type>xml</type> without going through <type>XMLPARSE</type> or
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 19351fe34b..f8f10f0ed9 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -3829,6 +3829,8 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 			{
 				Datum	   *argvalue = op->d.xmlexpr.argvalue;
 				bool	   *argnull = op->d.xmlexpr.argnull;
+				XmlSerializeFormat	format = op->d.xmlexpr.xexpr->format;
+				text	   *data;
 
 				/* argument type is known to be xml */
 				Assert(list_length(xexpr->args) == 1);
@@ -3837,9 +3839,15 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 					return;
 				value = argvalue[0];
 
-				*op->resvalue = PointerGetDatum(xmltotext_with_xmloption(DatumGetXmlP(value),
-																		 xexpr->xmloption));
 				*op->resnull = false;
+
+				data = xmltotext_with_xmloption(DatumGetXmlP(value),
+												xexpr->xmloption);
+
+				if (format == XMLDEFAULT_FORMAT)
+					*op->resvalue = PointerGetDatum(data);
+				else if (format == XMLCANONICAL || format == XMLCANONICAL_WITH_COMMENTS)
+					*op->resvalue = PointerGetDatum(xmlserialize_canonical(data,format));
 			}
 			break;
 
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index a0138382a1..af5f3dfdfd 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -619,6 +619,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <defelt>	xmltable_column_option_el
 %type <list>	xml_namespace_list
 %type <target>	xml_namespace_el
+%type <ival> 	opt_xml_serialize_format
 
 %type <node>	func_application func_expr_common_subexpr
 %type <node>	func_expr func_expr_windowless
@@ -676,7 +677,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONFIGURATION CONFLICT
@@ -15532,13 +15533,14 @@ func_expr_common_subexpr:
 					$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 									 list_make3($3, $5, $6), @1);
 				}
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename ')'
+			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')'
 				{
 					XmlSerialize *n = makeNode(XmlSerialize);
 
 					n->xmloption = $3;
 					n->expr = $4;
 					n->typeName = $6;
+					n->format = $7;
 					n->location = @1;
 					$$ = (Node *) n;
 				}
@@ -15622,6 +15624,12 @@ xml_passing_mech:
 			| BY VALUE_P
 		;
 
+opt_xml_serialize_format:
+			CANONICAL								{ $$ = XMLCANONICAL; }
+			| CANONICAL WITH NO COMMENTS			{ $$ = XMLCANONICAL; }
+			| CANONICAL WITH COMMENTS				{ $$ = XMLCANONICAL_WITH_COMMENTS; }
+			| /*EMPTY*/								{ $$ = XMLDEFAULT_FORMAT; }
+		;
 
 /*
  * Aggregate decoration clauses
@@ -16737,6 +16745,7 @@ unreserved_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CATALOG_P
@@ -17259,6 +17268,7 @@ bare_label_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CASE
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 78221d2e0f..799a345067 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2332,6 +2332,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
 
 	xexpr->xmloption = xs->xmloption;
 	xexpr->location = xs->location;
+	xexpr->format = xs->format;
 	/* We actually only need these to be able to parse back the expression. */
 	xexpr->type = targetType;
 	xexpr->typmod = targetTypmod;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 079bcb1208..f618be524b 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -56,6 +56,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -4818,3 +4819,72 @@ XmlTableDestroyOpaque(TableFuncScanState *state)
 	NO_XML_SUPPORT();
 #endif							/* not USE_LIBXML */
 }
+
+xmltype *
+xmlserialize_canonical(text *data, XmlSerializeFormat format)
+{
+#ifdef USE_LIBXML
+
+	xmlDocPtr   doc;
+	xmlChar    *xmlbuf = NULL;
+	xmlChar    *encodingStr = NULL;
+	int         nbytes;
+	int         with_comments = 0; /* 0 = remove xml comments (default) */
+	int         encoding;
+	StringInfoData buf;
+
+
+	if (format != XMLCANONICAL && format != XMLCANONICAL_WITH_COMMENTS)
+		elog(ERROR,"invalid canonical xml option");
+	else if (format == XMLCANONICAL_WITH_COMMENTS)
+		with_comments = 1;
+
+	/*
+	 * Parses the encoding from the header, so that xml_parse() knows what
+	 * it is dealing with. If not explicitly declared, we assume UTF-8.
+	 */
+	parse_xml_decl(xml_text2xmlChar(data), NULL, NULL, &encodingStr, NULL);
+	encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
+
+	doc = xml_parse(data, XMLOPTION_DOCUMENT, false, encoding, NULL);
+
+	if(!doc)
+		elog(ERROR, "could not parse the given XML document");
+
+	/*
+	* int
+	* xmlC14NDocDumpMemory (
+	*   xmlDocPtr doc,                   # the XML document for canonization
+	*   xmlNodeSetPtr nodes,             # the nodes set to be included in the canonized image
+	*                                      or NULL if all document nodes should be included
+	*   int mode,                        # 0 = Original C14N 1.0  (Outdated)
+	*                                      1 = Exclusive C14N 1.0 (Outdated)
+	*                                      2 = C14N 1.1
+	*   xmlChar **inclusive_ns_prefixes, # the list of inclusive namespace prefixes ended with
+	*                                      a NULL or NULL if there is no inclusive namespaces
+	*                                      (only for exclusive canonicalization, ignored otherwise)
+	*   int with_comments,               # include comments in the result (!=0) or not (==0)
+	*   xmlChar **xmlbuf                 # the memory pointer for allocated canonical XML text;
+	* )
+	* Returns: the number of bytes written on success or a negative value on fail.
+	*/
+
+	nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, with_comments, &xmlbuf);
+
+	xmlFreeDoc(doc);
+
+	if(nbytes < 0)
+		elog(ERROR,"could not canonicalize the given XML document");
+
+	initStringInfo(&buf);
+	appendStringInfoString(&buf, (const char *) xmlbuf);
+
+	xmlFree(xmlbuf);
+
+	return stringinfo_to_xmltype(&buf);
+
+#else
+	NO_XML_SUPPORT();
+	return 0;
+#endif
+}
\ No newline at end of file
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index f7d7f10f7d..8ba0984266 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -842,6 +842,7 @@ typedef struct XmlSerialize
 	Node	   *expr;
 	TypeName   *typeName;
 	int			location;		/* token location, or -1 if unknown */
+	XmlSerializeFormat	format;	/* serialization format */
 } XmlSerialize;
 
 /* Partitioning related definitions */
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index b4292253cc..79fdcc2650 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1471,6 +1471,13 @@ typedef enum XmlOptionType
 	XMLOPTION_CONTENT
 } XmlOptionType;
 
+typedef enum XmlSerializeFormat
+{
+	XMLCANONICAL,				/* canonical form without xml comments */
+	XMLCANONICAL_WITH_COMMENTS,	/* canonical form with xml comments */
+	XMLDEFAULT_FORMAT			/* unformatted xml representation */
+} XmlSerializeFormat;
+
 typedef struct XmlExpr
 {
 	Expr		xpr;
@@ -1491,6 +1498,8 @@ typedef struct XmlExpr
 	int32		typmod pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	int			location;
+	/* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */
+	XmlSerializeFormat format pg_node_attr(query_jumble_ignore);
 } XmlExpr;
 
 /* ----------------
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index bb36213e6f..c1b1a720fe 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -67,6 +67,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index 311da06cd6..745ebefe24 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -90,4 +90,5 @@ extern PGDLLIMPORT int xmloption;	/* XmlOptionType, but int for guc enum */
 
 extern PGDLLIMPORT const TableFuncRoutine XmlTableRoutine;
 
+xmltype *xmlserialize_canonical(text *data, XmlSerializeFormat format);
 #endif							/* XML_H */
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index ad852dc2f7..5c205a0d88 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -486,6 +486,114 @@ SELECT xmlserialize(content 'good' as char(10));
 
 SELECT xmlserialize(document 'bad' as text);
 ERROR:  not an XML document
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 70fe34a04f..cb372c39e2 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -309,6 +309,110 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmlserialize(document 'bad' as text);
                                      ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+ERROR:  unsupported XML feature
+LINE 2:   (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+             ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 ERROR:  unsupported XML feature
 LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index 4f029d0072..5a05aad34c 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -466,6 +466,114 @@ SELECT xmlserialize(content 'good' as char(10));
 
 SELECT xmlserialize(document 'bad' as text);
 ERROR:  not an XML document
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 24e40d2653..90ede4b799 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -132,6 +132,67 @@ SELECT xmlserialize(content data as character varying(20)) FROM xmltest;
 SELECT xmlserialize(content 'good' as char(10));
 SELECT xmlserialize(document 'bad' as text);
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+\set VERBOSITY default
 
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
-- 
2.25.1

Jim Jones

jim.jones@uni-muenster.de

almost 3 years ago

In reply to: Jim Jones (#7)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

v4 attached fixes an encoding issue at the xml_parse call. It now uses
GetDatabaseEncoding().

Best, Jim

Attachments:

v4-0001-Add-CANONICAL-format-to-xmlserialize.patchtext/x-patch; charset=UTF-8; name=v4-0001-Add-CANONICAL-format-to-xmlserialize.patchDownload

From 3ff8e7bd9a9e43194d834ba6b125841539d5df1c Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Mon, 6 Mar 2023 14:08:54 +0100
Subject: [PATCH v4] Add CANONICAL format to xmlserialize

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output. This feature
is based on the function xmlC14NDocDumpMemory from the C14N module
of libxml2.

This patch also includes regression tests and documentation.
---
 doc/src/sgml/datatype.sgml            |  41 +++++++++-
 src/backend/executor/execExprInterp.c |  13 +++-
 src/backend/parser/gram.y             |  14 +++-
 src/backend/parser/parse_expr.c       |   1 +
 src/backend/utils/adt/xml.c           |  57 ++++++++++++++
 src/include/nodes/parsenodes.h        |   1 +
 src/include/nodes/primnodes.h         |   9 +++
 src/include/parser/kwlist.h           |   1 +
 src/include/utils/xml.h               |   1 +
 src/test/regress/expected/xml.out     | 108 ++++++++++++++++++++++++++
 src/test/regress/expected/xml_1.out   | 104 +++++++++++++++++++++++++
 src/test/regress/expected/xml_2.out   | 108 ++++++++++++++++++++++++++
 src/test/regress/sql/xml.sql          |  61 +++++++++++++++
 13 files changed, 514 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 467b49b199..46ec95dbb8 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4460,7 +4460,7 @@ xml '<foo>bar</foo>'
     <type>xml</type>, uses the function
     <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
 <synopsis>
-XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> )
+XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ CANONICAL [ WITH [NO] COMMENTS ] ])
 </synopsis>
     <replaceable>type</replaceable> can be
     <type>character</type>, <type>character varying</type>, or
@@ -4470,6 +4470,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <repla
     you to simply cast the value.
    </para>
 
+   <para>
+    The option <type>CANONICAL</type> converts a given
+     XML document to its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>
+     based on the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>.
+     It is basically designed to provide applications the ability to compare xml documents or test if they
+     have been changed. The optional parameter <type>WITH [NO] COMMENTS</type> removes or keeps XML comments
+     from the given document.
+    </para>
+
+    <para>
+     Example:
+
+<screen><![CDATA[
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL);
+                       xmlserialize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL WITH COMMENTS);
+                                xmlserialize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+]]></screen>
+   </para>
    <para>
     When a character string value is cast to or from type
     <type>xml</type> without going through <type>XMLPARSE</type> or
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 19351fe34b..ce376a6fcd 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -3827,18 +3827,27 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 
 		case IS_XMLSERIALIZE:
 			{
+				text	   *result;
 				Datum	   *argvalue = op->d.xmlexpr.argvalue;
 				bool	   *argnull = op->d.xmlexpr.argnull;
+				XmlSerializeFormat	format = op->d.xmlexpr.xexpr->format;
 
 				/* argument type is known to be xml */
 				Assert(list_length(xexpr->args) == 1);
 
 				if (argnull[0])
 					return;
+
 				value = argvalue[0];
 
-				*op->resvalue = PointerGetDatum(xmltotext_with_xmloption(DatumGetXmlP(value),
-																		 xexpr->xmloption));
+				if (format == XMLCANONICAL || format == XMLCANONICAL_WITH_COMMENTS)
+					result = xmlserialize_canonical(DatumGetXmlP(value),
+													format);
+				else
+					result = xmltotext_with_xmloption(DatumGetXmlP(value),
+													  xexpr->xmloption);
+
+				*op->resvalue = PointerGetDatum(result);
 				*op->resnull = false;
 			}
 			break;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index a0138382a1..af5f3dfdfd 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -619,6 +619,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <defelt>	xmltable_column_option_el
 %type <list>	xml_namespace_list
 %type <target>	xml_namespace_el
+%type <ival> 	opt_xml_serialize_format
 
 %type <node>	func_application func_expr_common_subexpr
 %type <node>	func_expr func_expr_windowless
@@ -676,7 +677,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONFIGURATION CONFLICT
@@ -15532,13 +15533,14 @@ func_expr_common_subexpr:
 					$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 									 list_make3($3, $5, $6), @1);
 				}
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename ')'
+			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')'
 				{
 					XmlSerialize *n = makeNode(XmlSerialize);
 
 					n->xmloption = $3;
 					n->expr = $4;
 					n->typeName = $6;
+					n->format = $7;
 					n->location = @1;
 					$$ = (Node *) n;
 				}
@@ -15622,6 +15624,12 @@ xml_passing_mech:
 			| BY VALUE_P
 		;
 
+opt_xml_serialize_format:
+			CANONICAL								{ $$ = XMLCANONICAL; }
+			| CANONICAL WITH NO COMMENTS			{ $$ = XMLCANONICAL; }
+			| CANONICAL WITH COMMENTS				{ $$ = XMLCANONICAL_WITH_COMMENTS; }
+			| /*EMPTY*/								{ $$ = XMLDEFAULT_FORMAT; }
+		;
 
 /*
  * Aggregate decoration clauses
@@ -16737,6 +16745,7 @@ unreserved_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CATALOG_P
@@ -17259,6 +17268,7 @@ bare_label_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CASE
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 78221d2e0f..799a345067 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2332,6 +2332,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
 
 	xexpr->xmloption = xs->xmloption;
 	xexpr->location = xs->location;
+	xexpr->format = xs->format;
 	/* We actually only need these to be able to parse back the expression. */
 	xexpr->type = targetType;
 	xexpr->typmod = targetTypmod;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 079bcb1208..37e2df968a 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -56,6 +56,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -4818,3 +4819,59 @@ XmlTableDestroyOpaque(TableFuncScanState *state)
 	NO_XML_SUPPORT();
 #endif							/* not USE_LIBXML */
 }
+
+text *
+xmlserialize_canonical(text *data, XmlSerializeFormat format)
+{
+#ifdef USE_LIBXML
+
+	xmlDocPtr   doc;
+	xmlChar    *xmlbuf = NULL;
+	text	   *result;
+	int         nbytes;
+	int         with_comments = 0; /* 0 = remove xml comments (default) */
+
+	if (format != XMLCANONICAL && format != XMLCANONICAL_WITH_COMMENTS)
+		elog(ERROR,"invalid canonical xml option");
+	else if (format == XMLCANONICAL_WITH_COMMENTS)
+		with_comments = 1;
+
+	doc = xml_parse(data, XMLOPTION_DOCUMENT, false, GetDatabaseEncoding(), NULL);
+
+	Assert(doc);
+
+	/*
+	* xmlC14NDocDumpMemory (
+	*   xmlDocPtr doc,                   # the XML document for canonization
+	*   xmlNodeSetPtr nodes,             # the nodes set to be included in the canonized image
+	*                                      or NULL if all document nodes should be included
+	*   int mode,                        # 0 = Original C14N 1.0  (Outdated)
+	*                                      1 = Exclusive C14N 1.0 (Outdated)
+	*                                      2 = C14N 1.1
+	*   xmlChar **inclusive_ns_prefixes, # the list of inclusive namespace prefixes ended with
+	*                                      a NULL or NULL if there is no inclusive namespaces
+	*                                      (only for exclusive canonicalization, ignored otherwise)
+	*   int with_comments,               # include comments in the result (!=0) or not (==0)
+	*   xmlChar **xmlbuf                 # the memory pointer for allocated canonical XML text;
+	* )
+	* Returns: the number of bytes written on success or a negative value on fail.
+	*/
+
+	nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, with_comments, &xmlbuf);
+
+	xmlFreeDoc(doc);
+
+	if(nbytes < 0)
+		elog(ERROR,"could not canonicalize the given XML document");
+
+	result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
+
+	xmlFree(xmlbuf);
+
+	return result;
+
+#else
+	NO_XML_SUPPORT();
+	return 0;
+#endif
+}
\ No newline at end of file
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index f7d7f10f7d..8ba0984266 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -842,6 +842,7 @@ typedef struct XmlSerialize
 	Node	   *expr;
 	TypeName   *typeName;
 	int			location;		/* token location, or -1 if unknown */
+	XmlSerializeFormat	format;	/* serialization format */
 } XmlSerialize;
 
 /* Partitioning related definitions */
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index b4292253cc..79fdcc2650 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1471,6 +1471,13 @@ typedef enum XmlOptionType
 	XMLOPTION_CONTENT
 } XmlOptionType;
 
+typedef enum XmlSerializeFormat
+{
+	XMLCANONICAL,				/* canonical form without xml comments */
+	XMLCANONICAL_WITH_COMMENTS,	/* canonical form with xml comments */
+	XMLDEFAULT_FORMAT			/* unformatted xml representation */
+} XmlSerializeFormat;
+
 typedef struct XmlExpr
 {
 	Expr		xpr;
@@ -1491,6 +1498,8 @@ typedef struct XmlExpr
 	int32		typmod pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	int			location;
+	/* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */
+	XmlSerializeFormat format pg_node_attr(query_jumble_ignore);
 } XmlExpr;
 
 /* ----------------
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index bb36213e6f..c1b1a720fe 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -67,6 +67,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index 311da06cd6..c894310e18 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -90,4 +90,5 @@ extern PGDLLIMPORT int xmloption;	/* XmlOptionType, but int for guc enum */
 
 extern PGDLLIMPORT const TableFuncRoutine XmlTableRoutine;
 
+extern text *xmlserialize_canonical(text *data, XmlSerializeFormat format);
 #endif							/* XML_H */
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index ad852dc2f7..c17cc266c7 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -486,6 +486,114 @@ SELECT xmlserialize(content 'good' as char(10));
 
 SELECT xmlserialize(document 'bad' as text);
 ERROR:  not an XML document
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 70fe34a04f..cb372c39e2 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -309,6 +309,110 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmlserialize(document 'bad' as text);
                                      ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+ERROR:  unsupported XML feature
+LINE 2:   (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+             ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 ERROR:  unsupported XML feature
 LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index 4f029d0072..4c51962c18 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -466,6 +466,114 @@ SELECT xmlserialize(content 'good' as char(10));
 
 SELECT xmlserialize(document 'bad' as text);
 ERROR:  not an XML document
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 24e40d2653..90ede4b799 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -132,6 +132,67 @@ SELECT xmlserialize(content data as character varying(20)) FROM xmltest;
 SELECT xmlserialize(content 'good' as char(10));
 SELECT xmlserialize(document 'bad' as text);
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+\set VERBOSITY default
 
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
-- 
2.25.1

Jim Jones

jim.jones@uni-muenster.de

almost 3 years ago

In reply to: Jim Jones (#8)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

v5 attached is a rebase over the latest changes in xmlserialize (INDENT
output).

Attachments:

v5-0001-Add-CANONICAL-output-format-to-xmlserialize.patchtext/x-patch; charset=UTF-8; name=v5-0001-Add-CANONICAL-output-format-to-xmlserialize.patchDownload

From 24f045ccf7ac000a509910cb32c54ce4c91e2c33 Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Fri, 17 Mar 2023 10:23:48 +0100
Subject: [PATCH v5] Add CANONICAL output format to xmlserialize

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output. This feature
is based on the function xmlC14NDocDumpMemory from the C14N module
of libxml2.

This patch also includes regression tests and documentation.
---
 doc/src/sgml/datatype.sgml            |  41 +++-
 src/backend/executor/execExprInterp.c |   2 +-
 src/backend/parser/gram.y             |  21 ++-
 src/backend/parser/parse_expr.c       |   2 +-
 src/backend/utils/adt/xml.c           | 258 ++++++++++++++++----------
 src/include/nodes/parsenodes.h        |   1 +
 src/include/nodes/primnodes.h         |  12 +-
 src/include/parser/kwlist.h           |   1 +
 src/include/utils/xml.h               |   2 +-
 src/test/regress/expected/xml.out     | 112 +++++++++++
 src/test/regress/expected/xml_1.out   | 108 +++++++++++
 src/test/regress/expected/xml_2.out   | 112 +++++++++++
 src/test/regress/sql/xml.sql          |  63 +++++++
 13 files changed, 626 insertions(+), 109 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 4df8bd1b64..90415265d4 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4460,7 +4460,7 @@ xml '<foo>bar</foo>'
     <type>xml</type>, uses the function
     <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
 <synopsis>
-XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ [ NO ] INDENT ] )
+XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ { [ NO ] INDENT ] | CANONICAL [ WITH [NO] COMMENTS ]})
 </synopsis>
     <replaceable>type</replaceable> can be
     <type>character</type>, <type>character varying</type>, or
@@ -4477,6 +4477,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <repla
     type likewise produces the original string.
    </para>
 
+   <para>
+    The option <type>CANONICAL</type> converts a given
+    XML document to its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>
+    based on the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>.
+    It is basically designed to provide applications the ability to compare xml documents or test if they
+    have been changed. The optional parameter <type>WITH [NO] COMMENTS</type> removes or keeps XML comments
+    from the given document.
+    </para>
+
+     <para>
+     Example:
+
+<screen><![CDATA[
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL);
+                       xmlserialize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL WITH COMMENTS);
+                                xmlserialize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+]]></screen>
+   </para>
    <para>
     When a character string value is cast to or from type
     <type>xml</type> without going through <type>XMLPARSE</type> or
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 9cb9625ce9..d3dfc9ecfc 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -3840,7 +3840,7 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 				*op->resvalue =
 					PointerGetDatum(xmltotext_with_options(DatumGetXmlP(value),
 														   xexpr->xmloption,
-														   xexpr->indent));
+														   xexpr->format));
 				*op->resnull = false;
 			}
 			break;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index efe88ccf9d..fd9315feb8 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -613,12 +613,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <node>	xml_root_version opt_xml_root_standalone
 %type <node>	xmlexists_argument
 %type <ival>	document_or_content
-%type <boolean>	xml_indent_option xml_whitespace_option
+%type <boolean>	xml_whitespace_option
 %type <list>	xmltable_column_list xmltable_column_option_list
 %type <node>	xmltable_column_el
 %type <defelt>	xmltable_column_option_el
 %type <list>	xml_namespace_list
 %type <target>	xml_namespace_el
+%type <ival> 	opt_xml_serialize_format
 
 %type <node>	func_application func_expr_common_subexpr
 %type <node>	func_expr func_expr_windowless
@@ -676,7 +677,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONFIGURATION CONFLICT
@@ -15532,14 +15533,14 @@ func_expr_common_subexpr:
 					$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 									 list_make3($3, $5, $6), @1);
 				}
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename xml_indent_option ')'
+			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')'
 				{
 					XmlSerialize *n = makeNode(XmlSerialize);
 
 					n->xmloption = $3;
 					n->expr = $4;
 					n->typeName = $6;
-					n->indent = $7;
+					n->format = $7;
 					n->location = @1;
 					$$ = (Node *) n;
 				}
@@ -15593,9 +15594,13 @@ document_or_content: DOCUMENT_P						{ $$ = XMLOPTION_DOCUMENT; }
 			| CONTENT_P								{ $$ = XMLOPTION_CONTENT; }
 		;
 
-xml_indent_option: INDENT							{ $$ = true; }
-			| NO INDENT								{ $$ = false; }
-			| /*EMPTY*/								{ $$ = false; }
+opt_xml_serialize_format:
+			INDENT									{ $$ = XMLSERIALIZE_INDENT; }
+			| NO INDENT								{ $$ = XMLSERIALIZE_NO_FORMAT; }
+			| CANONICAL								{ $$ = XMLSERIALIZE_CANONICAL; }
+			| CANONICAL WITH NO COMMENTS			{ $$ = XMLSERIALIZE_CANONICAL; }
+			| CANONICAL WITH COMMENTS				{ $$ = XMLSERIALIZE_CANONICAL_WITH_COMMENTS; }
+			| /*EMPTY*/								{ $$ = XMLSERIALIZE_NO_FORMAT; }
 		;
 
 xml_whitespace_option: PRESERVE WHITESPACE_P		{ $$ = true; }
@@ -16743,6 +16748,7 @@ unreserved_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CATALOG_P
@@ -17266,6 +17272,7 @@ bare_label_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CASE
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 2331417552..7e198fc985 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2331,7 +2331,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
 	typenameTypeIdAndMod(pstate, xs->typeName, &targetType, &targetTypmod);
 
 	xexpr->xmloption = xs->xmloption;
-	xexpr->indent = xs->indent;
+	xexpr->format = xs->format;
 	xexpr->location = xs->location;
 	/* We actually only need these to be able to parse back the expression. */
 	xexpr->type = targetType;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 15adbd6a01..576b76d4f7 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -57,6 +57,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -622,7 +623,7 @@ xmltotext(PG_FUNCTION_ARGS)
 
 
 text *
-xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
+xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, XmlSerializeFormat format)
 {
 #ifdef USE_LIBXML
 	text	   *volatile result;
@@ -635,7 +636,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	PgXmlErrorContext *xmlerrcxt;
 #endif
 
-	if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
+	if (xmloption_arg != XMLOPTION_DOCUMENT && format == XMLSERIALIZE_NO_FORMAT)
 	{
 		/*
 		 * We don't actually need to do anything, so just return the
@@ -661,7 +662,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 	/* If we weren't asked to indent, we're done. */
-	if (!indent)
+	if (format == XMLSERIALIZE_NO_FORMAT)
 	{
 		xmlFreeDoc(doc);
 		return (text *) data;
@@ -670,130 +671,195 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	/* Otherwise, we gotta spin up some error handling. */
 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
-	PG_TRY();
+	if(format == XMLSERIALIZE_INDENT)
 	{
-		size_t		decl_len = 0;
-
-		/* The serialized data will go into this buffer. */
-		buf = xmlBufferCreate();
-
-		if (buf == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlBuffer");
-
-		/* Detect whether there's an XML declaration */
-		parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
-
-		/*
-		 * Emit declaration only if the input had one.  Note: some versions of
-		 * xmlSaveToBuffer leak memory if a non-null encoding argument is
-		 * passed, so don't do that.  We don't want any encoding conversion
-		 * anyway.
-		 */
-		if (decl_len == 0)
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
-		else
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_FORMAT);
-
-		if (ctxt == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlSaveCtxt");
-
-		if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
-		{
-			/* If it's a document, saving is easy. */
-			if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
-				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-							"could not save document to xmlBuffer");
-		}
-		else if (content_nodes != NULL)
+		PG_TRY();
 		{
-			/*
-			 * Deal with the case where we have non-singly-rooted XML.
-			 * libxml's dump functions don't work well for that without help.
-			 * We build a fake root node that serves as a container for the
-			 * content nodes, and then iterate over the nodes.
-			 */
-			xmlNodePtr	root;
-			xmlNodePtr	newline;
+			size_t		decl_len = 0;
+
+			/* The serialized data will go into this buffer. */
+			buf = xmlBufferCreate();
 
-			root = xmlNewNode(NULL, (const xmlChar *) "content-root");
-			if (root == NULL || xmlerrcxt->err_occurred)
+			if (buf == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlBuffer");
 
-			/* This attaches root to doc, so we need not free it separately. */
-			xmlDocSetRootElement(doc, root);
-			xmlAddChild(root, content_nodes);
+			/* Detect whether there's an XML declaration */
+			parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
 
 			/*
-			 * We use this node to insert newlines in the dump.  Note: in at
-			 * least some libxml versions, xmlNewDocText would not attach the
-			 * node to the document even if we passed it.  Therefore, manage
-			 * freeing of this node manually, and pass NULL here to make sure
-			 * there's not a dangling link.
-			 */
-			newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
-			if (newline == NULL || xmlerrcxt->err_occurred)
+			* Emit declaration only if the input had one.  Note: some versions of
+			* xmlSaveToBuffer leak memory if a non-null encoding argument is
+			* passed, so don't do that.  We don't want any encoding conversion
+			* anyway.
+			*/
+			if (decl_len == 0)
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
+			else
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									XML_SAVE_FORMAT);
+
+			if (ctxt == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlSaveCtxt");
 
-			for (xmlNodePtr node = root->children; node; node = node->next)
+			if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
+			{
+				/* If it's a document, saving is easy. */
+				if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+								"could not save document to xmlBuffer");
+			}
+			else if (content_nodes != NULL)
 			{
-				/* insert newlines between nodes */
-				if (node->type != XML_TEXT_NODE && node->prev != NULL)
+				/*
+				* Deal with the case where we have non-singly-rooted XML.
+				* libxml's dump functions don't work well for that without help.
+				* We build a fake root node that serves as a container for the
+				* content nodes, and then iterate over the nodes.
+				*/
+				xmlNodePtr	root;
+				xmlNodePtr	newline;
+
+				root = xmlNewNode(NULL, (const xmlChar *) "content-root");
+				if (root == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				/* This attaches root to doc, so we need not free it separately. */
+				xmlDocSetRootElement(doc, root);
+				xmlAddChild(root, content_nodes);
+
+				/*
+				* We use this node to insert newlines in the dump.  Note: in at
+				* least some libxml versions, xmlNewDocText would not attach the
+				* node to the document even if we passed it.  Therefore, manage
+				* freeing of this node manually, and pass NULL here to make sure
+				* there's not a dangling link.
+				*/
+				newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
+				if (newline == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				for (xmlNodePtr node = root->children; node; node = node->next)
 				{
-					if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+					/* insert newlines between nodes */
+					if (node->type != XML_TEXT_NODE && node->prev != NULL)
+					{
+						if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+						{
+							xmlFreeNode(newline);
+							xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+										"could not save newline to xmlBuffer");
+						}
+					}
+
+					if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
 					{
 						xmlFreeNode(newline);
 						xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-									"could not save newline to xmlBuffer");
+									"could not save content to xmlBuffer");
 					}
 				}
 
-				if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
-				{
-					xmlFreeNode(newline);
-					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-								"could not save content to xmlBuffer");
-				}
+				xmlFreeNode(newline);
 			}
 
-			xmlFreeNode(newline);
-		}
+			if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			{
+				ctxt = NULL;		/* don't try to close it again */
+				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+							"could not close xmlSaveCtxtPtr");
+			}
 
-		if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			result = (text *) xmlBuffer_to_xmltype(buf);
+		}
+		PG_CATCH();
 		{
-			ctxt = NULL;		/* don't try to close it again */
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-						"could not close xmlSaveCtxtPtr");
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (buf)
+				xmlBufferFree(buf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
 		}
+		PG_END_TRY();
+
+		xmlBufferFree(buf);
+		xmlFreeDoc(doc);
 
-		result = (text *) xmlBuffer_to_xmltype(buf);
+		pg_xml_done(xmlerrcxt, false);
 	}
-	PG_CATCH();
+	else if (format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_COMMENTS)
 	{
-		if (ctxt)
-			xmlSaveClose(ctxt);
-		if (buf)
-			xmlBufferFree(buf);
-		if (doc)
-			xmlFreeDoc(doc);
+		xmlChar    *xmlbuf = NULL;
+		int         nbytes;
+		int         with_comments = 0; /* 0 = no xml comments (default) */
 
-		pg_xml_done(xmlerrcxt, true);
+		PG_TRY();
+		{
+			/* 1 = keeps xml comments */
+			if (format == XMLSERIALIZE_CANONICAL_WITH_COMMENTS)
+				with_comments = 1;
 
-		PG_RE_THROW();
-	}
-	PG_END_TRY();
+			/* Only singly-rooted XML are supported */
+			doc = xml_parse(data, XMLOPTION_DOCUMENT, false,
+							GetDatabaseEncoding(), NULL, NULL, NULL);
 
-	xmlBufferFree(buf);
-	xmlFreeDoc(doc);
+			if (doc == NULL || escontext.error_occurred)
+			{
+				if (doc)
+					xmlFreeDoc(doc);
+				/* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
+				ereport(ERROR,
+						(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
+						errmsg("not an XML document")));
+			}
 
-	pg_xml_done(xmlerrcxt, false);
+			/*
+			 * This dumps the canonicalized XML doc into the xmlChar* buffer.
+			 * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
+			 */
+			nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, with_comments, &xmlbuf);
+
+			if(nbytes < 0 || escontext.error_occurred)
+				ereport(ERROR,
+						(errcode(ERRCODE_INTERNAL_ERROR),
+						errmsg("could not canonicalize the given XML document")));
+
+			result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
+		}
+		PG_CATCH();
+		{
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (xmlbuf)
+				xmlFree(xmlbuf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
+
+		xmlFreeDoc(doc);
+		xmlFree(xmlbuf);
+
+		pg_xml_done(xmlerrcxt, false);
+	}
+	else
+		elog(ERROR,"invalid xmlserialize option");
 
 	return result;
+
 #else
 	NO_XML_SUPPORT();
 	return NULL;
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 028588fb33..919d7158f4 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -841,6 +841,7 @@ typedef struct XmlSerialize
 	Node	   *expr;
 	TypeName   *typeName;
 	bool		indent;			/* [NO] INDENT */
+	XmlSerializeFormat	format;	/* serialization format */
 	int			location;		/* token location, or -1 if unknown */
 } XmlSerialize;
 
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 8fb5b4b919..2cdb1d970e 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1474,6 +1474,14 @@ typedef enum XmlOptionType
 	XMLOPTION_CONTENT
 } XmlOptionType;
 
+typedef enum XmlSerializeFormat
+{
+	XMLSERIALIZE_INDENT,					/* pretty-printed xml serialization  */
+	XMLSERIALIZE_CANONICAL,					/* canonical form without xml comments */
+	XMLSERIALIZE_CANONICAL_WITH_COMMENTS,	/* canonical form with xml comments */
+	XMLSERIALIZE_NO_FORMAT					/* unformatted xml representation */
+} XmlSerializeFormat;
+
 typedef struct XmlExpr
 {
 	Expr		xpr;
@@ -1489,13 +1497,13 @@ typedef struct XmlExpr
 	List	   *args;
 	/* DOCUMENT or CONTENT */
 	XmlOptionType xmloption pg_node_attr(query_jumble_ignore);
-	/* INDENT option for XMLSERIALIZE */
-	bool		indent;
 	/* target type/typmod for XMLSERIALIZE */
 	Oid			type pg_node_attr(query_jumble_ignore);
 	int32		typmod pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	int			location;
+	/* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */
+	XmlSerializeFormat format pg_node_attr(query_jumble_ignore);
 } XmlExpr;
 
 /* ----------------
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 753e9ee174..8280f605d1 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -67,6 +67,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index 224f6d75ff..b74f216148 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -78,7 +78,7 @@ extern xmltype *xmlpi(const char *target, text *arg, bool arg_is_null, bool *res
 extern xmltype *xmlroot(xmltype *data, text *version, int standalone);
 extern bool xml_is_document(xmltype *arg);
 extern text *xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg,
-									bool indent);
+									XmlSerializeFormat format);
 extern char *escape_xml(const char *str);
 
 extern char *map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, bool escape_period);
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 398345ca67..a73ce5a0cf 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -672,6 +672,118 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 63b779470f..481badaa84 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -443,6 +443,114 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val><...
                                      ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+ERROR:  unsupported XML feature
+LINE 2:   (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+             ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 ERROR:  unsupported XML feature
 LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index 43c2558352..384c3ef946 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -652,6 +652,118 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  not an XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index a591eea2e5..a3eda9e84f 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -168,6 +168,69 @@ SELECT xmlserialize(CONTENT  '<foo><bar></bar></foo>' AS text INDENT);
 -- 'no indent' = not using 'no indent'
 SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
 SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+\set VERBOSITY default
 
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
-- 
2.25.1

#10

Jim Jones

jim.jones@uni-muenster.de

almost 3 years ago

In reply to: Jim Jones (#9)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

After some more testing I realized that v5 was leaking the xmlDocPtr.

Now fixed in v6.

Attachments:

v6-0001-Add-CANONICAL-output-format-to-xmlserialize.patchtext/x-patch; charset=UTF-8; name=v6-0001-Add-CANONICAL-output-format-to-xmlserialize.patchDownload

From d04d8fdcbedbd5ed88469bd22e079467c26ab7a4 Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Fri, 17 Mar 2023 10:23:48 +0100
Subject: [PATCH v6] Add CANONICAL output format to xmlserialize

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output. This feature
is based on the function xmlC14NDocDumpMemory from the C14N module
of libxml2.

This patch also includes regression tests and documentation.
---
 doc/src/sgml/datatype.sgml            |  41 +++-
 src/backend/executor/execExprInterp.c |   2 +-
 src/backend/parser/gram.y             |  21 +-
 src/backend/parser/parse_expr.c       |   2 +-
 src/backend/utils/adt/xml.c           | 275 ++++++++++++++++----------
 src/include/nodes/parsenodes.h        |   1 +
 src/include/nodes/primnodes.h         |  12 +-
 src/include/parser/kwlist.h           |   1 +
 src/include/utils/xml.h               |   2 +-
 src/test/regress/expected/xml.out     | 112 +++++++++++
 src/test/regress/expected/xml_1.out   | 108 ++++++++++
 src/test/regress/expected/xml_2.out   | 112 +++++++++++
 src/test/regress/sql/xml.sql          |  63 ++++++
 13 files changed, 639 insertions(+), 113 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 4df8bd1b64..90415265d4 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4460,7 +4460,7 @@ xml '<foo>bar</foo>'
     <type>xml</type>, uses the function
     <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
 <synopsis>
-XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ [ NO ] INDENT ] )
+XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ { [ NO ] INDENT ] | CANONICAL [ WITH [NO] COMMENTS ]})
 </synopsis>
     <replaceable>type</replaceable> can be
     <type>character</type>, <type>character varying</type>, or
@@ -4477,6 +4477,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <repla
     type likewise produces the original string.
    </para>
 
+   <para>
+    The option <type>CANONICAL</type> converts a given
+    XML document to its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>
+    based on the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>.
+    It is basically designed to provide applications the ability to compare xml documents or test if they
+    have been changed. The optional parameter <type>WITH [NO] COMMENTS</type> removes or keeps XML comments
+    from the given document.
+    </para>
+
+     <para>
+     Example:
+
+<screen><![CDATA[
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL);
+                       xmlserialize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL WITH COMMENTS);
+                                xmlserialize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+]]></screen>
+   </para>
    <para>
     When a character string value is cast to or from type
     <type>xml</type> without going through <type>XMLPARSE</type> or
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 9cb9625ce9..d3dfc9ecfc 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -3840,7 +3840,7 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 				*op->resvalue =
 					PointerGetDatum(xmltotext_with_options(DatumGetXmlP(value),
 														   xexpr->xmloption,
-														   xexpr->indent));
+														   xexpr->format));
 				*op->resnull = false;
 			}
 			break;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index efe88ccf9d..fd9315feb8 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -613,12 +613,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <node>	xml_root_version opt_xml_root_standalone
 %type <node>	xmlexists_argument
 %type <ival>	document_or_content
-%type <boolean>	xml_indent_option xml_whitespace_option
+%type <boolean>	xml_whitespace_option
 %type <list>	xmltable_column_list xmltable_column_option_list
 %type <node>	xmltable_column_el
 %type <defelt>	xmltable_column_option_el
 %type <list>	xml_namespace_list
 %type <target>	xml_namespace_el
+%type <ival> 	opt_xml_serialize_format
 
 %type <node>	func_application func_expr_common_subexpr
 %type <node>	func_expr func_expr_windowless
@@ -676,7 +677,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONFIGURATION CONFLICT
@@ -15532,14 +15533,14 @@ func_expr_common_subexpr:
 					$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 									 list_make3($3, $5, $6), @1);
 				}
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename xml_indent_option ')'
+			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')'
 				{
 					XmlSerialize *n = makeNode(XmlSerialize);
 
 					n->xmloption = $3;
 					n->expr = $4;
 					n->typeName = $6;
-					n->indent = $7;
+					n->format = $7;
 					n->location = @1;
 					$$ = (Node *) n;
 				}
@@ -15593,9 +15594,13 @@ document_or_content: DOCUMENT_P						{ $$ = XMLOPTION_DOCUMENT; }
 			| CONTENT_P								{ $$ = XMLOPTION_CONTENT; }
 		;
 
-xml_indent_option: INDENT							{ $$ = true; }
-			| NO INDENT								{ $$ = false; }
-			| /*EMPTY*/								{ $$ = false; }
+opt_xml_serialize_format:
+			INDENT									{ $$ = XMLSERIALIZE_INDENT; }
+			| NO INDENT								{ $$ = XMLSERIALIZE_NO_FORMAT; }
+			| CANONICAL								{ $$ = XMLSERIALIZE_CANONICAL; }
+			| CANONICAL WITH NO COMMENTS			{ $$ = XMLSERIALIZE_CANONICAL; }
+			| CANONICAL WITH COMMENTS				{ $$ = XMLSERIALIZE_CANONICAL_WITH_COMMENTS; }
+			| /*EMPTY*/								{ $$ = XMLSERIALIZE_NO_FORMAT; }
 		;
 
 xml_whitespace_option: PRESERVE WHITESPACE_P		{ $$ = true; }
@@ -16743,6 +16748,7 @@ unreserved_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CATALOG_P
@@ -17266,6 +17272,7 @@ bare_label_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CASE
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 2331417552..7e198fc985 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2331,7 +2331,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
 	typenameTypeIdAndMod(pstate, xs->typeName, &targetType, &targetTypmod);
 
 	xexpr->xmloption = xs->xmloption;
-	xexpr->indent = xs->indent;
+	xexpr->format = xs->format;
 	xexpr->location = xs->location;
 	/* We actually only need these to be able to parse back the expression. */
 	xexpr->type = targetType;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 15adbd6a01..fbce16464a 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -57,6 +57,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -622,7 +623,7 @@ xmltotext(PG_FUNCTION_ARGS)
 
 
 text *
-xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
+xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, XmlSerializeFormat format)
 {
 #ifdef USE_LIBXML
 	text	   *volatile result;
@@ -635,7 +636,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	PgXmlErrorContext *xmlerrcxt;
 #endif
 
-	if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
+	if (xmloption_arg != XMLOPTION_DOCUMENT && format == XMLSERIALIZE_NO_FORMAT)
 	{
 		/*
 		 * We don't actually need to do anything, so just return the
@@ -646,10 +647,23 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 #ifdef USE_LIBXML
-	/* Parse the input according to the xmloption */
-	doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
-					&parsed_xmloptiontype, &content_nodes,
-					(Node *) &escontext);
+	/*
+	* Parse the input according to the xmloption.
+	* XML canonical expects a well-formed XML input, so here in case of
+	* XMLSERIALIZE_CANONICAL or XMLSERIALIZE_CANONICAL_WITH_COMMENTS we
+	* force xml_parse() to parse 'data' as XMLOPTION_DOCUMENT despite
+	* of the XmlOptionType given in 'xmloption_arg'. This enables the
+	* canonicalization of CONTENT fragments if they contain a singly-rooted
+	* XML - xml_parse() will thrown an error otherwise.
+	*/
+	if(format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_COMMENTS)
+		doc = xml_parse(data, XMLOPTION_DOCUMENT, false,
+						GetDatabaseEncoding(), NULL, NULL, NULL);
+	else
+		doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
+						&parsed_xmloptiontype, &content_nodes,
+						(Node *) &escontext);
+
 	if (doc == NULL || escontext.error_occurred)
 	{
 		if (doc)
@@ -661,7 +675,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 	/* If we weren't asked to indent, we're done. */
-	if (!indent)
+	if (format == XMLSERIALIZE_NO_FORMAT)
 	{
 		xmlFreeDoc(doc);
 		return (text *) data;
@@ -670,130 +684,191 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	/* Otherwise, we gotta spin up some error handling. */
 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
-	PG_TRY();
+	if(format == XMLSERIALIZE_INDENT)
 	{
-		size_t		decl_len = 0;
-
-		/* The serialized data will go into this buffer. */
-		buf = xmlBufferCreate();
-
-		if (buf == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlBuffer");
-
-		/* Detect whether there's an XML declaration */
-		parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
-
-		/*
-		 * Emit declaration only if the input had one.  Note: some versions of
-		 * xmlSaveToBuffer leak memory if a non-null encoding argument is
-		 * passed, so don't do that.  We don't want any encoding conversion
-		 * anyway.
-		 */
-		if (decl_len == 0)
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
-		else
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_FORMAT);
-
-		if (ctxt == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlSaveCtxt");
-
-		if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
-		{
-			/* If it's a document, saving is easy. */
-			if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
-				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-							"could not save document to xmlBuffer");
-		}
-		else if (content_nodes != NULL)
+		PG_TRY();
 		{
-			/*
-			 * Deal with the case where we have non-singly-rooted XML.
-			 * libxml's dump functions don't work well for that without help.
-			 * We build a fake root node that serves as a container for the
-			 * content nodes, and then iterate over the nodes.
-			 */
-			xmlNodePtr	root;
-			xmlNodePtr	newline;
+			size_t		decl_len = 0;
 
-			root = xmlNewNode(NULL, (const xmlChar *) "content-root");
-			if (root == NULL || xmlerrcxt->err_occurred)
+			/* The serialized data will go into this buffer. */
+			buf = xmlBufferCreate();
+
+			if (buf == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlBuffer");
 
-			/* This attaches root to doc, so we need not free it separately. */
-			xmlDocSetRootElement(doc, root);
-			xmlAddChild(root, content_nodes);
+			/* Detect whether there's an XML declaration */
+			parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
 
 			/*
-			 * We use this node to insert newlines in the dump.  Note: in at
-			 * least some libxml versions, xmlNewDocText would not attach the
-			 * node to the document even if we passed it.  Therefore, manage
-			 * freeing of this node manually, and pass NULL here to make sure
-			 * there's not a dangling link.
-			 */
-			newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
-			if (newline == NULL || xmlerrcxt->err_occurred)
+			* Emit declaration only if the input had one.  Note: some versions of
+			* xmlSaveToBuffer leak memory if a non-null encoding argument is
+			* passed, so don't do that.  We don't want any encoding conversion
+			* anyway.
+			*/
+			if (decl_len == 0)
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
+			else
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									XML_SAVE_FORMAT);
+
+			if (ctxt == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlSaveCtxt");
 
-			for (xmlNodePtr node = root->children; node; node = node->next)
+			if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
 			{
-				/* insert newlines between nodes */
-				if (node->type != XML_TEXT_NODE && node->prev != NULL)
+				/* If it's a document, saving is easy. */
+				if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+								"could not save document to xmlBuffer");
+			}
+			else if (content_nodes != NULL)
+			{
+				/*
+				* Deal with the case where we have non-singly-rooted XML.
+				* libxml's dump functions don't work well for that without help.
+				* We build a fake root node that serves as a container for the
+				* content nodes, and then iterate over the nodes.
+				*/
+				xmlNodePtr	root;
+				xmlNodePtr	newline;
+
+				root = xmlNewNode(NULL, (const xmlChar *) "content-root");
+				if (root == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				/* This attaches root to doc, so we need not free it separately. */
+				xmlDocSetRootElement(doc, root);
+				xmlAddChild(root, content_nodes);
+
+				/*
+				* We use this node to insert newlines in the dump.  Note: in at
+				* least some libxml versions, xmlNewDocText would not attach the
+				* node to the document even if we passed it.  Therefore, manage
+				* freeing of this node manually, and pass NULL here to make sure
+				* there's not a dangling link.
+				*/
+				newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
+				if (newline == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				for (xmlNodePtr node = root->children; node; node = node->next)
 				{
-					if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+					/* insert newlines between nodes */
+					if (node->type != XML_TEXT_NODE && node->prev != NULL)
+					{
+						if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+						{
+							xmlFreeNode(newline);
+							xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+										"could not save newline to xmlBuffer");
+						}
+					}
+
+					if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
 					{
 						xmlFreeNode(newline);
 						xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-									"could not save newline to xmlBuffer");
+									"could not save content to xmlBuffer");
 					}
 				}
 
-				if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
-				{
-					xmlFreeNode(newline);
-					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-								"could not save content to xmlBuffer");
-				}
+				xmlFreeNode(newline);
 			}
 
-			xmlFreeNode(newline);
-		}
+			if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			{
+				ctxt = NULL;		/* don't try to close it again */
+				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+							"could not close xmlSaveCtxtPtr");
+			}
 
-		if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			result = (text *) xmlBuffer_to_xmltype(buf);
+		}
+		PG_CATCH();
 		{
-			ctxt = NULL;		/* don't try to close it again */
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-						"could not close xmlSaveCtxtPtr");
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (buf)
+				xmlBufferFree(buf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
 		}
+		PG_END_TRY();
+
+		xmlBufferFree(buf);
+		xmlFreeDoc(doc);
 
-		result = (text *) xmlBuffer_to_xmltype(buf);
+		pg_xml_done(xmlerrcxt, false);
 	}
-	PG_CATCH();
+	else if (format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_COMMENTS)
 	{
-		if (ctxt)
-			xmlSaveClose(ctxt);
-		if (buf)
-			xmlBufferFree(buf);
-		if (doc)
-			xmlFreeDoc(doc);
+		xmlChar    *xmlbuf = NULL;
+		int         nbytes;
+		int         with_comments = 0; /* 0 = no xml comments (default) */
 
-		pg_xml_done(xmlerrcxt, true);
+		PG_TRY();
+		{
+			/* 1 = keeps xml comments */
+			if (format == XMLSERIALIZE_CANONICAL_WITH_COMMENTS)
+				with_comments = 1;
 
-		PG_RE_THROW();
-	}
-	PG_END_TRY();
+			if (doc == NULL || escontext.error_occurred)
+			{
+				if (doc)
+					xmlFreeDoc(doc);
+				/* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
+				ereport(ERROR,
+						(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
+						errmsg("not an XML document")));
+			}
 
-	xmlBufferFree(buf);
-	xmlFreeDoc(doc);
+			/*
+			 * This dumps the canonicalized XML doc into the xmlChar* buffer.
+			 * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
+			 */
+			nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, with_comments, &xmlbuf);
 
-	pg_xml_done(xmlerrcxt, false);
+			if(nbytes < 0 || escontext.error_occurred)
+				ereport(ERROR,
+						(errcode(ERRCODE_INTERNAL_ERROR),
+						errmsg("could not canonicalize the given XML document")));
+
+			result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
+		}
+		PG_CATCH();
+		{
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (xmlbuf)
+				xmlFree(xmlbuf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
+
+		xmlFreeDoc(doc);
+		xmlFree(xmlbuf);
+
+		pg_xml_done(xmlerrcxt, false);
+	}
+	else
+		elog(ERROR,"invalid xmlserialize option");
 
 	return result;
+
 #else
 	NO_XML_SUPPORT();
 	return NULL;
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 028588fb33..919d7158f4 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -841,6 +841,7 @@ typedef struct XmlSerialize
 	Node	   *expr;
 	TypeName   *typeName;
 	bool		indent;			/* [NO] INDENT */
+	XmlSerializeFormat	format;	/* serialization format */
 	int			location;		/* token location, or -1 if unknown */
 } XmlSerialize;
 
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 8fb5b4b919..2cdb1d970e 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1474,6 +1474,14 @@ typedef enum XmlOptionType
 	XMLOPTION_CONTENT
 } XmlOptionType;
 
+typedef enum XmlSerializeFormat
+{
+	XMLSERIALIZE_INDENT,					/* pretty-printed xml serialization  */
+	XMLSERIALIZE_CANONICAL,					/* canonical form without xml comments */
+	XMLSERIALIZE_CANONICAL_WITH_COMMENTS,	/* canonical form with xml comments */
+	XMLSERIALIZE_NO_FORMAT					/* unformatted xml representation */
+} XmlSerializeFormat;
+
 typedef struct XmlExpr
 {
 	Expr		xpr;
@@ -1489,13 +1497,13 @@ typedef struct XmlExpr
 	List	   *args;
 	/* DOCUMENT or CONTENT */
 	XmlOptionType xmloption pg_node_attr(query_jumble_ignore);
-	/* INDENT option for XMLSERIALIZE */
-	bool		indent;
 	/* target type/typmod for XMLSERIALIZE */
 	Oid			type pg_node_attr(query_jumble_ignore);
 	int32		typmod pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	int			location;
+	/* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */
+	XmlSerializeFormat format pg_node_attr(query_jumble_ignore);
 } XmlExpr;
 
 /* ----------------
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 753e9ee174..8280f605d1 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -67,6 +67,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index 224f6d75ff..b74f216148 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -78,7 +78,7 @@ extern xmltype *xmlpi(const char *target, text *arg, bool arg_is_null, bool *res
 extern xmltype *xmlroot(xmltype *data, text *version, int standalone);
 extern bool xml_is_document(xmltype *arg);
 extern text *xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg,
-									bool indent);
+									XmlSerializeFormat format);
 extern char *escape_xml(const char *str);
 
 extern char *map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, bool escape_period);
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 398345ca67..83fbdc5223 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -672,6 +672,118 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 63b779470f..481badaa84 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -443,6 +443,114 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val><...
                                      ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+ERROR:  unsupported XML feature
+LINE 2:   (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+             ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 ERROR:  unsupported XML feature
 LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index 43c2558352..6dea6ca38d 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -652,6 +652,118 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index a591eea2e5..a3eda9e84f 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -168,6 +168,69 @@ SELECT xmlserialize(CONTENT  '<foo><bar></bar></foo>' AS text INDENT);
 -- 'no indent' = not using 'no indent'
 SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
 SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+\set VERBOSITY default
 
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
-- 
2.25.1

#11

Jim Jones

jim.jones@uni-muenster.de

over 2 years ago

In reply to: Jim Jones (#10)

Re: [PATCH] Add CANONICAL option to xmlserialize

The cfbot started complaining about this patch on "macOS - Ventura - Meson"

'Persistent worker failed to start the task: tart isolation failed:
failed to create VM cloned from
"ghcr.io/cirruslabs/macos-ventura-base:latest": tart command returned
non-zero exit code: ""'

Is this a problem in my code or in the CI itself?

Thanks!

Jim

#12

Thomas Munro

thomas.munro@gmail.com

over 2 years ago

In reply to: Jim Jones (#11)

Re: [PATCH] Add CANONICAL option to xmlserialize

On Thu, Sep 14, 2023 at 11:54 PM Jim Jones <jim.jones@uni-muenster.de> wrote:

The cfbot started complaining about this patch on "macOS - Ventura - Meson"

'Persistent worker failed to start the task: tart isolation failed: failed to create VM cloned from "ghcr.io/cirruslabs/macos-ventura-base:latest": tart command returned non-zero exit code: ""'

Is this a problem in my code or in the CI itself?

There was a temporary glitch on one of the new Mac CI runner machines
that caused a few tests to fail like that, but it was fixed so that
should turn red again later today.

#13

vignesh C

vignesh21@gmail.com

over 2 years ago

In reply to: Jim Jones (#10)

Re: [PATCH] Add CANONICAL option to xmlserialize

On Fri, 17 Mar 2023 at 18:01, Jim Jones <jim.jones@uni-muenster.de> wrote:

After some more testing I realized that v5 was leaking the xmlDocPtr.

Now fixed in v6.

Few comments:
1) Why the default option was chosen without comments shouldn't it be
the other way round?
+opt_xml_serialize_format:
+                       INDENT
                         { $$ = XMLSERIALIZE_INDENT; }
+                       | NO INDENT
                         { $$ = XMLSERIALIZE_NO_FORMAT; }
+                       | CANONICAL
                         { $$ = XMLSERIALIZE_CANONICAL; }
+                       | CANONICAL WITH NO COMMENTS
 { $$ = XMLSERIALIZE_CANONICAL; }
+                       | CANONICAL WITH COMMENTS
         { $$ = XMLSERIALIZE_CANONICAL_WITH_COMMENTS; }
+                       | /*EMPTY*/
                         { $$ = XMLSERIALIZE_NO_FORMAT; }

2) This should be added to typedefs.list:
+typedef enum XmlSerializeFormat
+{
+       XMLSERIALIZE_INDENT,                                    /*
pretty-printed xml serialization  */
+       XMLSERIALIZE_CANONICAL,                                 /*
canonical form without xml comments */
+       XMLSERIALIZE_CANONICAL_WITH_COMMENTS,   /* canonical form with
xml comments */
+       XMLSERIALIZE_NO_FORMAT                                  /*
unformatted xml representation */
+} XmlSerializeFormat;

3) This change is not required:
return result;
+
#else
NO_XML_SUPPORT();
return NULL;

4) This comment body needs slight reformatting:
+       /*
+       * Parse the input according to the xmloption.
+       * XML canonical expects a well-formed XML input, so here in case of
+       * XMLSERIALIZE_CANONICAL or XMLSERIALIZE_CANONICAL_WITH_COMMENTS we
+       * force xml_parse() to parse 'data' as XMLOPTION_DOCUMENT despite
+       * of the XmlOptionType given in 'xmloption_arg'. This enables the
+       * canonicalization of CONTENT fragments if they contain a singly-rooted
+       * XML - xml_parse() will thrown an error otherwise.
+       */

5) Similarly here too:
-                       if (newline == NULL || xmlerrcxt->err_occurred)
+                       * Emit declaration only if the input had one.
Note: some versions of
+                       * xmlSaveToBuffer leak memory if a non-null
encoding argument is
+                       * passed, so don't do that.  We don't want any
encoding conversion
+                       * anyway.
+                       */
+                       if (decl_len == 0)

6) Similarly here too:
+                               /*
+                               * Deal with the case where we have
non-singly-rooted XML.
+                               * libxml's dump functions don't work
well for that without help.
+                               * We build a fake root node that
serves as a container for the
+                               * content nodes, and then iterate over
the nodes.
+                               */

7) Similarly here too:
+                               /*
+                               * We use this node to insert newlines
in the dump.  Note: in at
+                               * least some libxml versions,
xmlNewDocText would not attach the
+                               * node to the document even if we
passed it.  Therefore, manage
+                               * freeing of this node manually, and
pass NULL here to make sure
+                               * there's not a dangling link.
+                               */

8) Should this:
+       * of the XmlOptionType given in 'xmloption_arg'. This enables the
+       * canonicalization of CONTENT fragments if they contain a singly-rooted
+       * XML - xml_parse() will thrown an error otherwise.
Be:
+       * of the XmlOptionType given in 'xmloption_arg'. This enables the
+       * canonicalization of CONTENT fragments if they contain a singly-rooted
+       * XML - xml_parse() will throw an error otherwise.

Regards,
Vignesh

#14

Jim Jones

jim.jones@uni-muenster.de

over 2 years ago

In reply to: vignesh C (#13)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

Hi Vignesh

Thanks for the thorough review!

On 04.10.23 11:39, vignesh C wrote:

Few comments:
1) Why the default option was chosen without comments shouldn't it be
the other way round?
+opt_xml_serialize_format:
+                       INDENT
{ $$ = XMLSERIALIZE_INDENT; }
+                       | NO INDENT
{ $$ = XMLSERIALIZE_NO_FORMAT; }
+                       | CANONICAL
{ $$ = XMLSERIALIZE_CANONICAL; }
+                       | CANONICAL WITH NO COMMENTS
{ $$ = XMLSERIALIZE_CANONICAL; }
+                       | CANONICAL WITH COMMENTS
{ $$ = XMLSERIALIZE_CANONICAL_WITH_COMMENTS; }
+                       | /*EMPTY*/
{ $$ = XMLSERIALIZE_NO_FORMAT; }

I'm not sure it is the way to go. The main idea is to check if two
documents have the same content, and comments might be different even if
the contents of two documents are identical. What are your concerns
regarding this default behaviour?

2) This should be added to typedefs.list:
+typedef enum XmlSerializeFormat
+{
+       XMLSERIALIZE_INDENT,                                    /*
pretty-printed xml serialization  */
+       XMLSERIALIZE_CANONICAL,                                 /*
canonical form without xml comments */
+       XMLSERIALIZE_CANONICAL_WITH_COMMENTS,   /* canonical form with
xml comments */
+       XMLSERIALIZE_NO_FORMAT                                  /*
unformatted xml representation */
+} XmlSerializeFormat;

added.

3) This change is not required:
return result;
+
#else
NO_XML_SUPPORT();
return NULL;

removed.

4) This comment body needs slight reformatting:
+       /*
+       * Parse the input according to the xmloption.
+       * XML canonical expects a well-formed XML input, so here in case of
+       * XMLSERIALIZE_CANONICAL or XMLSERIALIZE_CANONICAL_WITH_COMMENTS we
+       * force xml_parse() to parse 'data' as XMLOPTION_DOCUMENT despite
+       * of the XmlOptionType given in 'xmloption_arg'. This enables the
+       * canonicalization of CONTENT fragments if they contain a singly-rooted
+       * XML - xml_parse() will thrown an error otherwise.
+       */

reformatted.

5) Similarly here too:
-                       if (newline == NULL || xmlerrcxt->err_occurred)
+                       * Emit declaration only if the input had one.
Note: some versions of
+                       * xmlSaveToBuffer leak memory if a non-null
encoding argument is
+                       * passed, so don't do that.  We don't want any
encoding conversion
+                       * anyway.
+                       */
+                       if (decl_len == 0)

reformatted.

6) Similarly here too:
+                               /*
+                               * Deal with the case where we have
non-singly-rooted XML.
+                               * libxml's dump functions don't work
well for that without help.
+                               * We build a fake root node that
serves as a container for the
+                               * content nodes, and then iterate over
the nodes.
+                               */

reformatted.

7) Similarly here too:
+                               /*
+                               * We use this node to insert newlines
in the dump.  Note: in at
+                               * least some libxml versions,
xmlNewDocText would not attach the
+                               * node to the document even if we
passed it.  Therefore, manage
+                               * freeing of this node manually, and
pass NULL here to make sure
+                               * there's not a dangling link.
+                               */

reformatted.

8) Should this:
+       * of the XmlOptionType given in 'xmloption_arg'. This enables the
+       * canonicalization of CONTENT fragments if they contain a singly-rooted
+       * XML - xml_parse() will thrown an error otherwise.
Be:
+       * of the XmlOptionType given in 'xmloption_arg'. This enables the
+       * canonicalization of CONTENT fragments if they contain a singly-rooted
+       * XML - xml_parse() will throw an error otherwise.

I didn't understand the suggestion in 8) :)

Thanks again for the review. Much appreciated!

v7 attached.

Best, Jim

Attachments:

v7-0001-Add-CANONICAL-output-format-to-xmlserialize.patchtext/x-patch; charset=UTF-8; name=v7-0001-Add-CANONICAL-output-format-to-xmlserialize.patchDownload

From 4bd06615b9aa9f3f0fcebdd1bc30a0500524cdad Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Wed, 4 Oct 2023 17:58:24 +0200
Subject: [PATCH v7] Add CANONICAL output format to xmlserialize

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output. This feature
is based on the function xmlC14NDocDumpMemory from the C14N module
of libxml2.

This patch also includes regression tests and documentation.
---
 doc/src/sgml/datatype.sgml            |  41 +++-
 src/backend/executor/execExprInterp.c |   2 +-
 src/backend/parser/gram.y             |  21 +-
 src/backend/parser/parse_expr.c       |   2 +-
 src/backend/utils/adt/xml.c           | 272 ++++++++++++++++----------
 src/include/nodes/parsenodes.h        |   1 +
 src/include/nodes/primnodes.h         |  12 +-
 src/include/parser/kwlist.h           |   1 +
 src/include/utils/xml.h               |   2 +-
 src/test/regress/expected/xml.out     | 112 +++++++++++
 src/test/regress/expected/xml_1.out   | 108 ++++++++++
 src/test/regress/expected/xml_2.out   | 112 +++++++++++
 src/test/regress/sql/xml.sql          |  63 ++++++
 src/tools/pgindent/typedefs.list      |   1 +
 14 files changed, 638 insertions(+), 112 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 5d23765705..89feb530c6 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4465,7 +4465,7 @@ xml '<foo>bar</foo>'
     <type>xml</type>, uses the function
     <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
 <synopsis>
-XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ [ NO ] INDENT ] )
+XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ { [ NO ] INDENT ] | CANONICAL [ WITH [NO] COMMENTS ]})
 </synopsis>
     <replaceable>type</replaceable> can be
     <type>character</type>, <type>character varying</type>, or
@@ -4482,6 +4482,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <repla
     type likewise produces the original string.
    </para>
 
+   <para>
+    The option <type>CANONICAL</type> converts a given
+    XML document to its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>
+    based on the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>.
+    It is basically designed to provide applications the ability to compare xml documents or test if they
+    have been changed. The optional parameter <type>WITH [NO] COMMENTS</type> removes or keeps XML comments
+    from the given document.
+    </para>
+
+     <para>
+     Example:
+
+<screen><![CDATA[
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL);
+                       xmlserialize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL WITH COMMENTS);
+                                xmlserialize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+]]></screen>
+   </para>
    <para>
     When a character string value is cast to or from type
     <type>xml</type> without going through <type>XMLPARSE</type> or
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 24c2b60c62..d9305c28b0 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -3943,7 +3943,7 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 				*op->resvalue =
 					PointerGetDatum(xmltotext_with_options(DatumGetXmlP(value),
 														   xexpr->xmloption,
-														   xexpr->indent));
+														   xexpr->format));
 				*op->resnull = false;
 			}
 			break;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index e56cbe77cb..8123e9207a 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -615,12 +615,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <node>	xml_root_version opt_xml_root_standalone
 %type <node>	xmlexists_argument
 %type <ival>	document_or_content
-%type <boolean>	xml_indent_option xml_whitespace_option
+%type <boolean>	xml_whitespace_option
 %type <list>	xmltable_column_list xmltable_column_option_list
 %type <node>	xmltable_column_el
 %type <defelt>	xmltable_column_option_el
 %type <list>	xml_namespace_list
 %type <target>	xml_namespace_el
+%type <ival> 	opt_xml_serialize_format
 
 %type <node>	func_application func_expr_common_subexpr
 %type <node>	func_expr func_expr_windowless
@@ -692,7 +693,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONFIGURATION CONFLICT
@@ -15634,14 +15635,14 @@ func_expr_common_subexpr:
 					$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 									 list_make3($3, $5, $6), @1);
 				}
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename xml_indent_option ')'
+			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')'
 				{
 					XmlSerialize *n = makeNode(XmlSerialize);
 
 					n->xmloption = $3;
 					n->expr = $4;
 					n->typeName = $6;
-					n->indent = $7;
+					n->format = $7;
 					n->location = @1;
 					$$ = (Node *) n;
 				}
@@ -15797,9 +15798,13 @@ document_or_content: DOCUMENT_P						{ $$ = XMLOPTION_DOCUMENT; }
 			| CONTENT_P								{ $$ = XMLOPTION_CONTENT; }
 		;
 
-xml_indent_option: INDENT							{ $$ = true; }
-			| NO INDENT								{ $$ = false; }
-			| /*EMPTY*/								{ $$ = false; }
+opt_xml_serialize_format:
+			INDENT									{ $$ = XMLSERIALIZE_INDENT; }
+			| NO INDENT								{ $$ = XMLSERIALIZE_NO_FORMAT; }
+			| CANONICAL								{ $$ = XMLSERIALIZE_CANONICAL; }
+			| CANONICAL WITH NO COMMENTS			{ $$ = XMLSERIALIZE_CANONICAL; }
+			| CANONICAL WITH COMMENTS				{ $$ = XMLSERIALIZE_CANONICAL_WITH_COMMENTS; }
+			| /*EMPTY*/								{ $$ = XMLSERIALIZE_NO_FORMAT; }
 		;
 
 xml_whitespace_option: PRESERVE WHITESPACE_P		{ $$ = true; }
@@ -17084,6 +17089,7 @@ unreserved_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CATALOG_P
@@ -17618,6 +17624,7 @@ bare_label_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CASE
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 64c582c344..624b0b8844 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2443,7 +2443,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
 	typenameTypeIdAndMod(pstate, xs->typeName, &targetType, &targetTypmod);
 
 	xexpr->xmloption = xs->xmloption;
-	xexpr->indent = xs->indent;
+	xexpr->format = xs->format;
 	xexpr->location = xs->location;
 	/* We actually only need these to be able to parse back the expression. */
 	xexpr->type = targetType;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 2300c7ebf3..ce87451942 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -57,6 +57,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -622,7 +623,7 @@ xmltotext(PG_FUNCTION_ARGS)
 
 
 text *
-xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
+xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, XmlSerializeFormat format)
 {
 #ifdef USE_LIBXML
 	text	   *volatile result;
@@ -635,7 +636,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	PgXmlErrorContext *xmlerrcxt;
 #endif
 
-	if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
+	if (xmloption_arg != XMLOPTION_DOCUMENT && format == XMLSERIALIZE_NO_FORMAT)
 	{
 		/*
 		 * We don't actually need to do anything, so just return the
@@ -646,10 +647,23 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 #ifdef USE_LIBXML
-	/* Parse the input according to the xmloption */
-	doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
-					&parsed_xmloptiontype, &content_nodes,
-					(Node *) &escontext);
+	/*
+	 * Parse the input according to the xmloption. XML canonical expects
+	 * a well-formed XML input, so here in case of XMLSERIALIZE_CANONICAL
+	 * or XMLSERIALIZE_CANONICAL_WITH_COMMENTS we force xml_parse() to parse
+	 * 'data' as XMLOPTION_DOCUMENT despite of the XmlOptionType given in
+	 * 'xmloption_arg'. This enables the canonicalization of CONTENT fragments
+	 * if they contain a singly-rooted XML - xml_parse() will thrown an error
+	 * otherwise.
+	 */
+	if(format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_COMMENTS)
+		doc = xml_parse(data, XMLOPTION_DOCUMENT, false,
+						GetDatabaseEncoding(), NULL, NULL, NULL);
+	else
+		doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
+						&parsed_xmloptiontype, &content_nodes,
+						(Node *) &escontext);
+
 	if (doc == NULL || escontext.error_occurred)
 	{
 		if (doc)
@@ -661,7 +675,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 	/* If we weren't asked to indent, we're done. */
-	if (!indent)
+	if (format == XMLSERIALIZE_NO_FORMAT)
 	{
 		xmlFreeDoc(doc);
 		return (text *) data;
@@ -670,128 +684,188 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	/* Otherwise, we gotta spin up some error handling. */
 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
-	PG_TRY();
+	if(format == XMLSERIALIZE_INDENT)
 	{
-		size_t		decl_len = 0;
-
-		/* The serialized data will go into this buffer. */
-		buf = xmlBufferCreate();
-
-		if (buf == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlBuffer");
-
-		/* Detect whether there's an XML declaration */
-		parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
-
-		/*
-		 * Emit declaration only if the input had one.  Note: some versions of
-		 * xmlSaveToBuffer leak memory if a non-null encoding argument is
-		 * passed, so don't do that.  We don't want any encoding conversion
-		 * anyway.
-		 */
-		if (decl_len == 0)
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
-		else
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_FORMAT);
-
-		if (ctxt == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlSaveCtxt");
-
-		if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
-		{
-			/* If it's a document, saving is easy. */
-			if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
-				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-							"could not save document to xmlBuffer");
-		}
-		else if (content_nodes != NULL)
+		PG_TRY();
 		{
-			/*
-			 * Deal with the case where we have non-singly-rooted XML.
-			 * libxml's dump functions don't work well for that without help.
-			 * We build a fake root node that serves as a container for the
-			 * content nodes, and then iterate over the nodes.
-			 */
-			xmlNodePtr	root;
-			xmlNodePtr	newline;
+			size_t		decl_len = 0;
+
+			/* The serialized data will go into this buffer. */
+			buf = xmlBufferCreate();
 
-			root = xmlNewNode(NULL, (const xmlChar *) "content-root");
-			if (root == NULL || xmlerrcxt->err_occurred)
+			if (buf == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlBuffer");
 
-			/* This attaches root to doc, so we need not free it separately. */
-			xmlDocSetRootElement(doc, root);
-			xmlAddChild(root, content_nodes);
+			/* Detect whether there's an XML declaration */
+			parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
 
 			/*
-			 * We use this node to insert newlines in the dump.  Note: in at
-			 * least some libxml versions, xmlNewDocText would not attach the
-			 * node to the document even if we passed it.  Therefore, manage
-			 * freeing of this node manually, and pass NULL here to make sure
-			 * there's not a dangling link.
+			 * Emit declaration only if the input had one.  Note: some versions of
+			 * xmlSaveToBuffer leak memory if a non-null encoding argument is
+			 * passed, so don't do that.  We don't want any encoding conversion
+			 * anyway.
 			 */
-			newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
-			if (newline == NULL || xmlerrcxt->err_occurred)
+			if (decl_len == 0)
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
+			else
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									XML_SAVE_FORMAT);
+
+			if (ctxt == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlSaveCtxt");
 
-			for (xmlNodePtr node = root->children; node; node = node->next)
+			if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
 			{
-				/* insert newlines between nodes */
-				if (node->type != XML_TEXT_NODE && node->prev != NULL)
+				/* If it's a document, saving is easy. */
+				if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+								"could not save document to xmlBuffer");
+			}
+			else if (content_nodes != NULL)
+			{
+				/*
+				 * Deal with the case where we have non-singly-rooted XML.
+				 * libxml's dump functions don't work well for that without help.
+				 * We build a fake root node that serves as a container for the
+				 * content nodes, and then iterate over the nodes.
+				 */
+				xmlNodePtr	root;
+				xmlNodePtr	newline;
+
+				root = xmlNewNode(NULL, (const xmlChar *) "content-root");
+				if (root == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				/* This attaches root to doc, so we need not free it separately. */
+				xmlDocSetRootElement(doc, root);
+				xmlAddChild(root, content_nodes);
+
+				/*
+				 * We use this node to insert newlines in the dump.  Note: in at
+				 * least some libxml versions, xmlNewDocText would not attach the
+				 * node to the document even if we passed it.  Therefore, manage
+				 * freeing of this node manually, and pass NULL here to make sure
+				 * there's not a dangling link.
+				 */
+				newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
+				if (newline == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				for (xmlNodePtr node = root->children; node; node = node->next)
 				{
-					if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+					/* insert newlines between nodes */
+					if (node->type != XML_TEXT_NODE && node->prev != NULL)
+					{
+						if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+						{
+							xmlFreeNode(newline);
+							xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+										"could not save newline to xmlBuffer");
+						}
+					}
+
+					if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
 					{
 						xmlFreeNode(newline);
 						xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-									"could not save newline to xmlBuffer");
+									"could not save content to xmlBuffer");
 					}
 				}
 
-				if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
-				{
-					xmlFreeNode(newline);
-					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-								"could not save content to xmlBuffer");
-				}
+				xmlFreeNode(newline);
 			}
 
-			xmlFreeNode(newline);
-		}
+			if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			{
+				ctxt = NULL;		/* don't try to close it again */
+				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+							"could not close xmlSaveCtxtPtr");
+			}
 
-		if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			result = (text *) xmlBuffer_to_xmltype(buf);
+		}
+		PG_CATCH();
 		{
-			ctxt = NULL;		/* don't try to close it again */
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-						"could not close xmlSaveCtxtPtr");
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (buf)
+				xmlBufferFree(buf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
 		}
+		PG_END_TRY();
+
+		xmlBufferFree(buf);
+		xmlFreeDoc(doc);
 
-		result = (text *) xmlBuffer_to_xmltype(buf);
+		pg_xml_done(xmlerrcxt, false);
 	}
-	PG_CATCH();
+	else if (format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_COMMENTS)
 	{
-		if (ctxt)
-			xmlSaveClose(ctxt);
-		if (buf)
-			xmlBufferFree(buf);
-		if (doc)
-			xmlFreeDoc(doc);
+		xmlChar    *xmlbuf = NULL;
+		int         nbytes;
+		int         with_comments = 0; /* 0 = no xml comments (default) */
 
-		pg_xml_done(xmlerrcxt, true);
+		PG_TRY();
+		{
+			/* 1 = keeps xml comments */
+			if (format == XMLSERIALIZE_CANONICAL_WITH_COMMENTS)
+				with_comments = 1;
 
-		PG_RE_THROW();
-	}
-	PG_END_TRY();
+			if (doc == NULL || escontext.error_occurred)
+			{
+				if (doc)
+					xmlFreeDoc(doc);
+				/* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
+				ereport(ERROR,
+						(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
+						errmsg("not an XML document")));
+			}
 
-	xmlBufferFree(buf);
-	xmlFreeDoc(doc);
+			/*
+			 * This dumps the canonicalized XML doc into the xmlChar* buffer.
+			 * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
+			 */
+			nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, with_comments, &xmlbuf);
 
-	pg_xml_done(xmlerrcxt, false);
+			if(nbytes < 0 || escontext.error_occurred)
+				ereport(ERROR,
+						(errcode(ERRCODE_INTERNAL_ERROR),
+						errmsg("could not canonicalize the given XML document")));
+
+			result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
+		}
+		PG_CATCH();
+		{
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (xmlbuf)
+				xmlFree(xmlbuf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
+
+		xmlFreeDoc(doc);
+		xmlFree(xmlbuf);
+
+		pg_xml_done(xmlerrcxt, false);
+	}
+	else
+		elog(ERROR,"invalid xmlserialize option");
 
 	return result;
 #else
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index f637937cd2..e62d96d383 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -840,6 +840,7 @@ typedef struct XmlSerialize
 	Node	   *expr;
 	TypeName   *typeName;
 	bool		indent;			/* [NO] INDENT */
+	XmlSerializeFormat	format;	/* serialization format */
 	int			location;		/* token location, or -1 if unknown */
 } XmlSerialize;
 
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 60d72a876b..88595032cc 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1520,6 +1520,14 @@ typedef enum XmlOptionType
 	XMLOPTION_CONTENT
 } XmlOptionType;
 
+typedef enum XmlSerializeFormat
+{
+	XMLSERIALIZE_INDENT,					/* pretty-printed xml serialization  */
+	XMLSERIALIZE_CANONICAL,					/* canonical form without xml comments */
+	XMLSERIALIZE_CANONICAL_WITH_COMMENTS,	/* canonical form with xml comments */
+	XMLSERIALIZE_NO_FORMAT					/* unformatted xml representation */
+} XmlSerializeFormat;
+
 typedef struct XmlExpr
 {
 	Expr		xpr;
@@ -1535,13 +1543,13 @@ typedef struct XmlExpr
 	List	   *args;
 	/* DOCUMENT or CONTENT */
 	XmlOptionType xmloption pg_node_attr(query_jumble_ignore);
-	/* INDENT option for XMLSERIALIZE */
-	bool		indent;
 	/* target type/typmod for XMLSERIALIZE */
 	Oid			type pg_node_attr(query_jumble_ignore);
 	int32		typmod pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	int			location;
+	/* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */
+	XmlSerializeFormat format pg_node_attr(query_jumble_ignore);
 } XmlExpr;
 
 /*
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 5984dcfa4b..3233879cc3 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -68,6 +68,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index 224f6d75ff..b74f216148 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -78,7 +78,7 @@ extern xmltype *xmlpi(const char *target, text *arg, bool arg_is_null, bool *res
 extern xmltype *xmlroot(xmltype *data, text *version, int standalone);
 extern bool xml_is_document(xmltype *arg);
 extern text *xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg,
-									bool indent);
+									XmlSerializeFormat format);
 extern char *escape_xml(const char *str);
 
 extern char *map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, bool escape_period);
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 398345ca67..83fbdc5223 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -672,6 +672,118 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 63b779470f..481badaa84 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -443,6 +443,114 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val><...
                                      ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+ERROR:  unsupported XML feature
+LINE 2:   (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+             ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 ERROR:  unsupported XML feature
 LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index 43c2558352..6dea6ca38d 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -652,6 +652,118 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index a591eea2e5..a3eda9e84f 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -168,6 +168,69 @@ SELECT xmlserialize(CONTENT  '<foo><bar></bar></foo>' AS text INDENT);
 -- 'no indent' = not using 'no indent'
 SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
 SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+\set VERBOSITY default
 
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 8de90c4958..ed48d57c99 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3113,6 +3113,7 @@ XmlExpr
 XmlExprOp
 XmlOptionType
 XmlSerialize
+XmlSerializeFormat
 XmlTableBuilderData
 YYLTYPE
 YYSTYPE
-- 
2.34.1

#15

Chapman Flack

chap@anastigmatix.net

over 2 years ago

In reply to: Jim Jones (#14)

Re: [PATCH] Add CANONICAL option to xmlserialize

On 2023-10-04 12:19, Jim Jones wrote:

On 04.10.23 11:39, vignesh C wrote:

1) Why the default option was chosen without comments shouldn't it be
the other way round?

I'm not sure it is the way to go. The main idea is to check if two
documents have the same content, and comments might be different even
if the contents of two documents are identical. What are your concerns
regarding this default behaviour?

I hope I'm not butting in, but I too would be leery of any default
behavior that's going to say thing1 and thing2 are the same thing
but ignoring (name part of thing here). If that's the comparison
I mean to make, and it's as easy as CANONICAL WITHOUT COMMENTS
to say that's what I mean, I'd be happy to write that. It also means
that the next person reading my code will know "oh, he means
'same' in *that* way", without having to think about it.

Regards,
-Chap

#16

Jim Jones

jim.jones@uni-muenster.de

over 2 years ago

In reply to: Chapman Flack (#15)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

Hi Chap

On 04.10.23 23:05, Chapman Flack wrote:

I hope I'm not butting in, but I too would be leery of any default
behavior that's going to say thing1 and thing2 are the same thing
but ignoring (name part of thing here). If that's the comparison
I mean to make, and it's as easy as CANONICAL WITHOUT COMMENTS
to say that's what I mean, I'd be happy to write that. It also means
that the next person reading my code will know "oh, he means
'same' in *that* way", without having to think about it.

That's a very compelling argument. Thanks for that!

It is indeed clearer to only remove items from the result set if
explicitly said so.

v8 attached changes de default behaviour to WITH COMMENTS.

Best,

Jim

Attachments:

v8-0001-Add-CANONICAL-output-format-to-xmlserialize.patchtext/x-patch; charset=UTF-8; name=v8-0001-Add-CANONICAL-output-format-to-xmlserialize.patchDownload

From fa3c9c7d5a37c407993f4e2076afed2891e1bfc1 Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Wed, 4 Oct 2023 17:58:24 +0200
Subject: [PATCH v8] Add CANONICAL output format to xmlserialize

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output. In case no
parameter is provided, WITH COMMENTS will be used as default. This
feature is based on the function xmlC14NDocDumpMemory from the C14N
module of libxml2.

This patch also includes regression tests and documentation.
---
 doc/src/sgml/datatype.sgml            |  41 +++-
 src/backend/executor/execExprInterp.c |   2 +-
 src/backend/parser/gram.y             |  21 +-
 src/backend/parser/parse_expr.c       |   2 +-
 src/backend/utils/adt/xml.c           | 272 ++++++++++++++++----------
 src/include/nodes/parsenodes.h        |   1 +
 src/include/nodes/primnodes.h         |  12 +-
 src/include/parser/kwlist.h           |   1 +
 src/include/utils/xml.h               |   2 +-
 src/test/regress/expected/xml.out     | 114 +++++++++++
 src/test/regress/expected/xml_1.out   | 108 ++++++++++
 src/test/regress/expected/xml_2.out   | 114 +++++++++++
 src/test/regress/sql/xml.sql          |  63 ++++++
 src/tools/pgindent/typedefs.list      |   1 +
 14 files changed, 642 insertions(+), 112 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 5d23765705..df7525208a 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4465,7 +4465,7 @@ xml '<foo>bar</foo>'
     <type>xml</type>, uses the function
     <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
 <synopsis>
-XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ [ NO ] INDENT ] )
+XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ { [ NO ] INDENT ] | CANONICAL [ WITH [NO] COMMENTS ]})
 </synopsis>
     <replaceable>type</replaceable> can be
     <type>character</type>, <type>character varying</type>, or
@@ -4482,6 +4482,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <repla
     type likewise produces the original string.
    </para>
 
+   <para>
+    The option <type>CANONICAL</type> converts a given
+    XML document to its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>
+    based on the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>.
+    It is basically designed to provide applications the ability to compare xml documents or test if they
+    have been changed. The optional parameters <type>WITH COMMENTS</type> (which is the default) or
+    <type>WITH NO COMMENTS</type>, respectively, keep or remove XML comments from the given document.
+    </para>
+
+     <para>
+     Example:
+
+<screen><![CDATA[
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL);
+                                xmlserialize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL WITH NO COMMENTS);
+                       xmlserialize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+]]></screen>
+   </para>
    <para>
     When a character string value is cast to or from type
     <type>xml</type> without going through <type>XMLPARSE</type> or
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 24c2b60c62..d9305c28b0 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -3943,7 +3943,7 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 				*op->resvalue =
 					PointerGetDatum(xmltotext_with_options(DatumGetXmlP(value),
 														   xexpr->xmloption,
-														   xexpr->indent));
+														   xexpr->format));
 				*op->resnull = false;
 			}
 			break;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index e56cbe77cb..4c738ad4b2 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -615,12 +615,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <node>	xml_root_version opt_xml_root_standalone
 %type <node>	xmlexists_argument
 %type <ival>	document_or_content
-%type <boolean>	xml_indent_option xml_whitespace_option
+%type <boolean>	xml_whitespace_option
 %type <list>	xmltable_column_list xmltable_column_option_list
 %type <node>	xmltable_column_el
 %type <defelt>	xmltable_column_option_el
 %type <list>	xml_namespace_list
 %type <target>	xml_namespace_el
+%type <ival> 	opt_xml_serialize_format
 
 %type <node>	func_application func_expr_common_subexpr
 %type <node>	func_expr func_expr_windowless
@@ -692,7 +693,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONFIGURATION CONFLICT
@@ -15634,14 +15635,14 @@ func_expr_common_subexpr:
 					$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 									 list_make3($3, $5, $6), @1);
 				}
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename xml_indent_option ')'
+			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')'
 				{
 					XmlSerialize *n = makeNode(XmlSerialize);
 
 					n->xmloption = $3;
 					n->expr = $4;
 					n->typeName = $6;
-					n->indent = $7;
+					n->format = $7;
 					n->location = @1;
 					$$ = (Node *) n;
 				}
@@ -15797,9 +15798,13 @@ document_or_content: DOCUMENT_P						{ $$ = XMLOPTION_DOCUMENT; }
 			| CONTENT_P								{ $$ = XMLOPTION_CONTENT; }
 		;
 
-xml_indent_option: INDENT							{ $$ = true; }
-			| NO INDENT								{ $$ = false; }
-			| /*EMPTY*/								{ $$ = false; }
+opt_xml_serialize_format:
+			INDENT									{ $$ = XMLSERIALIZE_INDENT; }
+			| NO INDENT								{ $$ = XMLSERIALIZE_NO_FORMAT; }
+			| CANONICAL								{ $$ = XMLSERIALIZE_CANONICAL; }
+			| CANONICAL WITH NO COMMENTS			{ $$ = XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS; }
+			| CANONICAL WITH COMMENTS				{ $$ = XMLSERIALIZE_CANONICAL; }
+			| /*EMPTY*/								{ $$ = XMLSERIALIZE_NO_FORMAT; }
 		;
 
 xml_whitespace_option: PRESERVE WHITESPACE_P		{ $$ = true; }
@@ -17084,6 +17089,7 @@ unreserved_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CATALOG_P
@@ -17618,6 +17624,7 @@ bare_label_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CASE
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 64c582c344..624b0b8844 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2443,7 +2443,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
 	typenameTypeIdAndMod(pstate, xs->typeName, &targetType, &targetTypmod);
 
 	xexpr->xmloption = xs->xmloption;
-	xexpr->indent = xs->indent;
+	xexpr->format = xs->format;
 	xexpr->location = xs->location;
 	/* We actually only need these to be able to parse back the expression. */
 	xexpr->type = targetType;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 2300c7ebf3..8f26366cbe 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -57,6 +57,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -622,7 +623,7 @@ xmltotext(PG_FUNCTION_ARGS)
 
 
 text *
-xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
+xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, XmlSerializeFormat format)
 {
 #ifdef USE_LIBXML
 	text	   *volatile result;
@@ -635,7 +636,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	PgXmlErrorContext *xmlerrcxt;
 #endif
 
-	if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
+	if (xmloption_arg != XMLOPTION_DOCUMENT && format == XMLSERIALIZE_NO_FORMAT)
 	{
 		/*
 		 * We don't actually need to do anything, so just return the
@@ -646,10 +647,23 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 #ifdef USE_LIBXML
-	/* Parse the input according to the xmloption */
-	doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
-					&parsed_xmloptiontype, &content_nodes,
-					(Node *) &escontext);
+	/*
+	 * Parse the input according to the xmloption. XML canonical expects
+	 * a well-formed XML input, so here in case of XMLSERIALIZE_CANONICAL
+	 * or XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS we force xml_parse() to parse
+	 * 'data' as XMLOPTION_DOCUMENT despite of the XmlOptionType given in
+	 * 'xmloption_arg'. This enables the canonicalization of CONTENT fragments
+	 * if they contain a singly-rooted XML - xml_parse() will thrown an error
+	 * otherwise.
+	 */
+	if(format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
+		doc = xml_parse(data, XMLOPTION_DOCUMENT, false,
+						GetDatabaseEncoding(), NULL, NULL, NULL);
+	else
+		doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
+						&parsed_xmloptiontype, &content_nodes,
+						(Node *) &escontext);
+
 	if (doc == NULL || escontext.error_occurred)
 	{
 		if (doc)
@@ -661,7 +675,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 	/* If we weren't asked to indent, we're done. */
-	if (!indent)
+	if (format == XMLSERIALIZE_NO_FORMAT)
 	{
 		xmlFreeDoc(doc);
 		return (text *) data;
@@ -670,128 +684,188 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	/* Otherwise, we gotta spin up some error handling. */
 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
-	PG_TRY();
+	if(format == XMLSERIALIZE_INDENT)
 	{
-		size_t		decl_len = 0;
-
-		/* The serialized data will go into this buffer. */
-		buf = xmlBufferCreate();
-
-		if (buf == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlBuffer");
-
-		/* Detect whether there's an XML declaration */
-		parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
-
-		/*
-		 * Emit declaration only if the input had one.  Note: some versions of
-		 * xmlSaveToBuffer leak memory if a non-null encoding argument is
-		 * passed, so don't do that.  We don't want any encoding conversion
-		 * anyway.
-		 */
-		if (decl_len == 0)
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
-		else
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_FORMAT);
-
-		if (ctxt == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlSaveCtxt");
-
-		if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
-		{
-			/* If it's a document, saving is easy. */
-			if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
-				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-							"could not save document to xmlBuffer");
-		}
-		else if (content_nodes != NULL)
+		PG_TRY();
 		{
-			/*
-			 * Deal with the case where we have non-singly-rooted XML.
-			 * libxml's dump functions don't work well for that without help.
-			 * We build a fake root node that serves as a container for the
-			 * content nodes, and then iterate over the nodes.
-			 */
-			xmlNodePtr	root;
-			xmlNodePtr	newline;
+			size_t		decl_len = 0;
+
+			/* The serialized data will go into this buffer. */
+			buf = xmlBufferCreate();
 
-			root = xmlNewNode(NULL, (const xmlChar *) "content-root");
-			if (root == NULL || xmlerrcxt->err_occurred)
+			if (buf == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlBuffer");
 
-			/* This attaches root to doc, so we need not free it separately. */
-			xmlDocSetRootElement(doc, root);
-			xmlAddChild(root, content_nodes);
+			/* Detect whether there's an XML declaration */
+			parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
 
 			/*
-			 * We use this node to insert newlines in the dump.  Note: in at
-			 * least some libxml versions, xmlNewDocText would not attach the
-			 * node to the document even if we passed it.  Therefore, manage
-			 * freeing of this node manually, and pass NULL here to make sure
-			 * there's not a dangling link.
+			 * Emit declaration only if the input had one.  Note: some versions of
+			 * xmlSaveToBuffer leak memory if a non-null encoding argument is
+			 * passed, so don't do that.  We don't want any encoding conversion
+			 * anyway.
 			 */
-			newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
-			if (newline == NULL || xmlerrcxt->err_occurred)
+			if (decl_len == 0)
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
+			else
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									XML_SAVE_FORMAT);
+
+			if (ctxt == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlSaveCtxt");
 
-			for (xmlNodePtr node = root->children; node; node = node->next)
+			if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
 			{
-				/* insert newlines between nodes */
-				if (node->type != XML_TEXT_NODE && node->prev != NULL)
+				/* If it's a document, saving is easy. */
+				if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+								"could not save document to xmlBuffer");
+			}
+			else if (content_nodes != NULL)
+			{
+				/*
+				 * Deal with the case where we have non-singly-rooted XML.
+				 * libxml's dump functions don't work well for that without help.
+				 * We build a fake root node that serves as a container for the
+				 * content nodes, and then iterate over the nodes.
+				 */
+				xmlNodePtr	root;
+				xmlNodePtr	newline;
+
+				root = xmlNewNode(NULL, (const xmlChar *) "content-root");
+				if (root == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				/* This attaches root to doc, so we need not free it separately. */
+				xmlDocSetRootElement(doc, root);
+				xmlAddChild(root, content_nodes);
+
+				/*
+				 * We use this node to insert newlines in the dump.  Note: in at
+				 * least some libxml versions, xmlNewDocText would not attach the
+				 * node to the document even if we passed it.  Therefore, manage
+				 * freeing of this node manually, and pass NULL here to make sure
+				 * there's not a dangling link.
+				 */
+				newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
+				if (newline == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				for (xmlNodePtr node = root->children; node; node = node->next)
 				{
-					if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+					/* insert newlines between nodes */
+					if (node->type != XML_TEXT_NODE && node->prev != NULL)
+					{
+						if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+						{
+							xmlFreeNode(newline);
+							xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+										"could not save newline to xmlBuffer");
+						}
+					}
+
+					if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
 					{
 						xmlFreeNode(newline);
 						xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-									"could not save newline to xmlBuffer");
+									"could not save content to xmlBuffer");
 					}
 				}
 
-				if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
-				{
-					xmlFreeNode(newline);
-					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-								"could not save content to xmlBuffer");
-				}
+				xmlFreeNode(newline);
 			}
 
-			xmlFreeNode(newline);
-		}
+			if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			{
+				ctxt = NULL;		/* don't try to close it again */
+				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+							"could not close xmlSaveCtxtPtr");
+			}
 
-		if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			result = (text *) xmlBuffer_to_xmltype(buf);
+		}
+		PG_CATCH();
 		{
-			ctxt = NULL;		/* don't try to close it again */
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-						"could not close xmlSaveCtxtPtr");
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (buf)
+				xmlBufferFree(buf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
 		}
+		PG_END_TRY();
+
+		xmlBufferFree(buf);
+		xmlFreeDoc(doc);
 
-		result = (text *) xmlBuffer_to_xmltype(buf);
+		pg_xml_done(xmlerrcxt, false);
 	}
-	PG_CATCH();
+	else if (format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
 	{
-		if (ctxt)
-			xmlSaveClose(ctxt);
-		if (buf)
-			xmlBufferFree(buf);
-		if (doc)
-			xmlFreeDoc(doc);
+		xmlChar    *xmlbuf = NULL;
+		int         nbytes;
+		int         with_comments = 0; /* 0 = no xml comments (default) */
 
-		pg_xml_done(xmlerrcxt, true);
+		PG_TRY();
+		{
+			/* 1 = keeps xml comments */
+			if (format == XMLSERIALIZE_CANONICAL)
+				with_comments = 1;
 
-		PG_RE_THROW();
-	}
-	PG_END_TRY();
+			if (doc == NULL || escontext.error_occurred)
+			{
+				if (doc)
+					xmlFreeDoc(doc);
+				/* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
+				ereport(ERROR,
+						(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
+						errmsg("not an XML document")));
+			}
 
-	xmlBufferFree(buf);
-	xmlFreeDoc(doc);
+			/*
+			 * This dumps the canonicalized XML doc into the xmlChar* buffer.
+			 * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
+			 */
+			nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, with_comments, &xmlbuf);
 
-	pg_xml_done(xmlerrcxt, false);
+			if(nbytes < 0 || escontext.error_occurred)
+				ereport(ERROR,
+						(errcode(ERRCODE_INTERNAL_ERROR),
+						errmsg("could not canonicalize the given XML document")));
+
+			result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
+		}
+		PG_CATCH();
+		{
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (xmlbuf)
+				xmlFree(xmlbuf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
+
+		xmlFreeDoc(doc);
+		xmlFree(xmlbuf);
+
+		pg_xml_done(xmlerrcxt, false);
+	}
+	else
+		elog(ERROR,"invalid xmlserialize option");
 
 	return result;
 #else
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index f637937cd2..e62d96d383 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -840,6 +840,7 @@ typedef struct XmlSerialize
 	Node	   *expr;
 	TypeName   *typeName;
 	bool		indent;			/* [NO] INDENT */
+	XmlSerializeFormat	format;	/* serialization format */
 	int			location;		/* token location, or -1 if unknown */
 } XmlSerialize;
 
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 60d72a876b..c0b97c5156 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1520,6 +1520,14 @@ typedef enum XmlOptionType
 	XMLOPTION_CONTENT
 } XmlOptionType;
 
+typedef enum XmlSerializeFormat
+{
+	XMLSERIALIZE_INDENT,						/* pretty-printed xml serialization  */
+	XMLSERIALIZE_CANONICAL,						/* canonical form with xml comments */
+	XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS,	/* canonical form without xml comments */
+	XMLSERIALIZE_NO_FORMAT						/* unformatted xml representation */
+} XmlSerializeFormat;
+
 typedef struct XmlExpr
 {
 	Expr		xpr;
@@ -1535,13 +1543,13 @@ typedef struct XmlExpr
 	List	   *args;
 	/* DOCUMENT or CONTENT */
 	XmlOptionType xmloption pg_node_attr(query_jumble_ignore);
-	/* INDENT option for XMLSERIALIZE */
-	bool		indent;
 	/* target type/typmod for XMLSERIALIZE */
 	Oid			type pg_node_attr(query_jumble_ignore);
 	int32		typmod pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	int			location;
+	/* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */
+	XmlSerializeFormat format pg_node_attr(query_jumble_ignore);
 } XmlExpr;
 
 /*
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 5984dcfa4b..3233879cc3 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -68,6 +68,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index 224f6d75ff..b74f216148 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -78,7 +78,7 @@ extern xmltype *xmlpi(const char *target, text *arg, bool arg_is_null, bool *res
 extern xmltype *xmlroot(xmltype *data, text *version, int standalone);
 extern bool xml_is_document(xmltype *arg);
 extern text *xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg,
-									bool indent);
+									XmlSerializeFormat format);
 extern char *escape_xml(const char *str);
 
 extern char *map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, bool escape_period);
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 398345ca67..9bffecf2d3 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -672,6 +672,120 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                                                                                                                                                                                                                                                  xmlserialize                                                                                                                                                                                                                                                                                                                                                                                                                  
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside doc -->
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 63b779470f..aaebb51c82 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -443,6 +443,114 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val><...
                                      ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+ERROR:  unsupported XML feature
+LINE 2:   (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+             ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 ERROR:  unsupported XML feature
 LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index 43c2558352..73e59a4fea 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -652,6 +652,120 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                                                                                                                                                                                                                                                  xmlserialize                                                                                                                                                                                                                                                                                                                                                                                                                  
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside doc -->
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index a591eea2e5..fa78a62990 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -168,6 +168,69 @@ SELECT xmlserialize(CONTENT  '<foo><bar></bar></foo>' AS text INDENT);
 -- 'no indent' = not using 'no indent'
 SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
 SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+\set VERBOSITY default
 
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 8de90c4958..ed48d57c99 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3113,6 +3113,7 @@ XmlExpr
 XmlExprOp
 XmlOptionType
 XmlSerialize
+XmlSerializeFormat
 XmlTableBuilderData
 YYLTYPE
 YYSTYPE
-- 
2.34.1

#17

Jim Jones

jim.jones@uni-muenster.de

almost 2 years ago

In reply to: Jim Jones (#16)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

On 05.10.23 09:38, Jim Jones wrote:

v8 attached changes de default behaviour to WITH COMMENTS.

v9 attached with rebase due to changes done to primnodes.h in 615f5f6

--
Jim

Attachments:

v9-0001-Add-CANONICAL-output-format-to-xmlserialize.patchtext/x-patch; charset=UTF-8; name=v9-0001-Add-CANONICAL-output-format-to-xmlserialize.patchDownload

From fe51a1826b75b778c21f559236b23d340a10d703 Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Fri, 9 Feb 2024 13:51:44 +0100
Subject: [PATCH v9] Add CANONICAL output format to xmlserialize

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output. In case no
parameter is provided, WITH COMMENTS will be used as default. This
feature is based on the function xmlC14NDocDumpMemory from the C14N
module of libxml2.

This patch also includes regression tests and documentation.
---
 doc/src/sgml/datatype.sgml            |  41 +++-
 src/backend/executor/execExprInterp.c |   2 +-
 src/backend/parser/gram.y             |  21 +-
 src/backend/parser/parse_expr.c       |   2 +-
 src/backend/utils/adt/xml.c           | 272 ++++++++++++++++----------
 src/include/nodes/parsenodes.h        |   1 +
 src/include/nodes/primnodes.h         |  11 ++
 src/include/parser/kwlist.h           |   1 +
 src/include/utils/xml.h               |   2 +-
 src/test/regress/expected/xml.out     | 114 +++++++++++
 src/test/regress/expected/xml_1.out   | 108 ++++++++++
 src/test/regress/expected/xml_2.out   | 114 +++++++++++
 src/test/regress/sql/xml.sql          |  63 ++++++
 src/tools/pgindent/typedefs.list      |   1 +
 14 files changed, 643 insertions(+), 110 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 66510ee031..ec2f1137c8 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4472,7 +4472,7 @@ xml '<foo>bar</foo>'
     <type>xml</type>, uses the function
     <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
 <synopsis>
-XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ [ NO ] INDENT ] )
+XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ { [ NO ] INDENT ] | CANONICAL [ WITH [NO] COMMENTS ]})
 </synopsis>
     <replaceable>type</replaceable> can be
     <type>character</type>, <type>character varying</type>, or
@@ -4489,6 +4489,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <repla
     type likewise produces the original string.
    </para>
 
+   <para>
+    The option <type>CANONICAL</type> converts a given
+    XML document to its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>
+    based on the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>.
+    It is basically designed to provide applications the ability to compare xml documents or test if they
+    have been changed. The optional parameters <type>WITH COMMENTS</type> (which is the default) or
+    <type>WITH NO COMMENTS</type>, respectively, keep or remove XML comments from the given document.
+    </para>
+
+     <para>
+     Example:
+
+<screen><![CDATA[
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL);
+                                xmlserialize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL WITH NO COMMENTS);
+                       xmlserialize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+]]></screen>
+   </para>
    <para>
     When a character string value is cast to or from type
     <type>xml</type> without going through <type>XMLPARSE</type> or
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 3f20f1dd31..315ee53274 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -4019,7 +4019,7 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 				*op->resvalue =
 					PointerGetDatum(xmltotext_with_options(DatumGetXmlP(value),
 														   xexpr->xmloption,
-														   xexpr->indent));
+														   xexpr->format));
 				*op->resnull = false;
 			}
 			break;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 130f7fc7c3..f54858dfe9 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -616,12 +616,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <node>	xml_root_version opt_xml_root_standalone
 %type <node>	xmlexists_argument
 %type <ival>	document_or_content
-%type <boolean>	xml_indent_option xml_whitespace_option
+%type <boolean>	xml_whitespace_option
 %type <list>	xmltable_column_list xmltable_column_option_list
 %type <node>	xmltable_column_el
 %type <defelt>	xmltable_column_option_el
 %type <list>	xml_namespace_list
 %type <target>	xml_namespace_el
+%type <ival> 	opt_xml_serialize_format
 
 %type <node>	func_application func_expr_common_subexpr
 %type <node>	func_expr func_expr_windowless
@@ -693,7 +694,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONFIGURATION CONFLICT
@@ -15677,14 +15678,14 @@ func_expr_common_subexpr:
 					$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 									 list_make3($3, $5, $6), @1);
 				}
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename xml_indent_option ')'
+			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')'
 				{
 					XmlSerialize *n = makeNode(XmlSerialize);
 
 					n->xmloption = $3;
 					n->expr = $4;
 					n->typeName = $6;
-					n->indent = $7;
+					n->format = $7;
 					n->location = @1;
 					$$ = (Node *) n;
 				}
@@ -15840,9 +15841,13 @@ document_or_content: DOCUMENT_P						{ $$ = XMLOPTION_DOCUMENT; }
 			| CONTENT_P								{ $$ = XMLOPTION_CONTENT; }
 		;
 
-xml_indent_option: INDENT							{ $$ = true; }
-			| NO INDENT								{ $$ = false; }
-			| /*EMPTY*/								{ $$ = false; }
+opt_xml_serialize_format:
+			INDENT									{ $$ = XMLSERIALIZE_INDENT; }
+			| NO INDENT								{ $$ = XMLSERIALIZE_NO_FORMAT; }
+			| CANONICAL								{ $$ = XMLSERIALIZE_CANONICAL; }
+			| CANONICAL WITH NO COMMENTS			{ $$ = XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS; }
+			| CANONICAL WITH COMMENTS				{ $$ = XMLSERIALIZE_CANONICAL; }
+			| /*EMPTY*/								{ $$ = XMLSERIALIZE_NO_FORMAT; }
 		;
 
 xml_whitespace_option: PRESERVE WHITESPACE_P		{ $$ = true; }
@@ -17160,6 +17165,7 @@ unreserved_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CATALOG_P
@@ -17694,6 +17700,7 @@ bare_label_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CASE
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 9300c7b9ab..cf7b98364b 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2443,7 +2443,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
 	typenameTypeIdAndMod(pstate, xs->typeName, &targetType, &targetTypmod);
 
 	xexpr->xmloption = xs->xmloption;
-	xexpr->indent = xs->indent;
+	xexpr->format = xs->format;
 	xexpr->location = xs->location;
 	/* We actually only need these to be able to parse back the expression. */
 	xexpr->type = targetType;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 3e24aba546..c311504b16 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -58,6 +58,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -654,7 +655,7 @@ xmltotext(PG_FUNCTION_ARGS)
 
 
 text *
-xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
+xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, XmlSerializeFormat format)
 {
 #ifdef USE_LIBXML
 	text	   *volatile result;
@@ -667,7 +668,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	PgXmlErrorContext *xmlerrcxt;
 #endif
 
-	if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
+	if (xmloption_arg != XMLOPTION_DOCUMENT && format == XMLSERIALIZE_NO_FORMAT)
 	{
 		/*
 		 * We don't actually need to do anything, so just return the
@@ -678,10 +679,23 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 #ifdef USE_LIBXML
-	/* Parse the input according to the xmloption */
-	doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
-					&parsed_xmloptiontype, &content_nodes,
-					(Node *) &escontext);
+	/*
+	 * Parse the input according to the xmloption. XML canonical expects
+	 * a well-formed XML input, so here in case of XMLSERIALIZE_CANONICAL
+	 * or XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS we force xml_parse() to parse
+	 * 'data' as XMLOPTION_DOCUMENT despite of the XmlOptionType given in
+	 * 'xmloption_arg'. This enables the canonicalization of CONTENT fragments
+	 * if they contain a singly-rooted XML - xml_parse() will thrown an error
+	 * otherwise.
+	 */
+	if(format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
+		doc = xml_parse(data, XMLOPTION_DOCUMENT, false,
+						GetDatabaseEncoding(), NULL, NULL, NULL);
+	else
+		doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
+						&parsed_xmloptiontype, &content_nodes,
+						(Node *) &escontext);
+
 	if (doc == NULL || escontext.error_occurred)
 	{
 		if (doc)
@@ -693,7 +707,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 	/* If we weren't asked to indent, we're done. */
-	if (!indent)
+	if (format == XMLSERIALIZE_NO_FORMAT)
 	{
 		xmlFreeDoc(doc);
 		return (text *) data;
@@ -702,128 +716,188 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	/* Otherwise, we gotta spin up some error handling. */
 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
-	PG_TRY();
+	if(format == XMLSERIALIZE_INDENT)
 	{
-		size_t		decl_len = 0;
-
-		/* The serialized data will go into this buffer. */
-		buf = xmlBufferCreate();
-
-		if (buf == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlBuffer");
-
-		/* Detect whether there's an XML declaration */
-		parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
-
-		/*
-		 * Emit declaration only if the input had one.  Note: some versions of
-		 * xmlSaveToBuffer leak memory if a non-null encoding argument is
-		 * passed, so don't do that.  We don't want any encoding conversion
-		 * anyway.
-		 */
-		if (decl_len == 0)
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
-		else
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_FORMAT);
-
-		if (ctxt == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlSaveCtxt");
-
-		if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
-		{
-			/* If it's a document, saving is easy. */
-			if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
-				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-							"could not save document to xmlBuffer");
-		}
-		else if (content_nodes != NULL)
+		PG_TRY();
 		{
-			/*
-			 * Deal with the case where we have non-singly-rooted XML.
-			 * libxml's dump functions don't work well for that without help.
-			 * We build a fake root node that serves as a container for the
-			 * content nodes, and then iterate over the nodes.
-			 */
-			xmlNodePtr	root;
-			xmlNodePtr	newline;
+			size_t		decl_len = 0;
+
+			/* The serialized data will go into this buffer. */
+			buf = xmlBufferCreate();
 
-			root = xmlNewNode(NULL, (const xmlChar *) "content-root");
-			if (root == NULL || xmlerrcxt->err_occurred)
+			if (buf == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlBuffer");
 
-			/* This attaches root to doc, so we need not free it separately. */
-			xmlDocSetRootElement(doc, root);
-			xmlAddChild(root, content_nodes);
+			/* Detect whether there's an XML declaration */
+			parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
 
 			/*
-			 * We use this node to insert newlines in the dump.  Note: in at
-			 * least some libxml versions, xmlNewDocText would not attach the
-			 * node to the document even if we passed it.  Therefore, manage
-			 * freeing of this node manually, and pass NULL here to make sure
-			 * there's not a dangling link.
+			 * Emit declaration only if the input had one.  Note: some versions of
+			 * xmlSaveToBuffer leak memory if a non-null encoding argument is
+			 * passed, so don't do that.  We don't want any encoding conversion
+			 * anyway.
 			 */
-			newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
-			if (newline == NULL || xmlerrcxt->err_occurred)
+			if (decl_len == 0)
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
+			else
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									XML_SAVE_FORMAT);
+
+			if (ctxt == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlSaveCtxt");
 
-			for (xmlNodePtr node = root->children; node; node = node->next)
+			if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
 			{
-				/* insert newlines between nodes */
-				if (node->type != XML_TEXT_NODE && node->prev != NULL)
+				/* If it's a document, saving is easy. */
+				if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+								"could not save document to xmlBuffer");
+			}
+			else if (content_nodes != NULL)
+			{
+				/*
+				 * Deal with the case where we have non-singly-rooted XML.
+				 * libxml's dump functions don't work well for that without help.
+				 * We build a fake root node that serves as a container for the
+				 * content nodes, and then iterate over the nodes.
+				 */
+				xmlNodePtr	root;
+				xmlNodePtr	newline;
+
+				root = xmlNewNode(NULL, (const xmlChar *) "content-root");
+				if (root == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				/* This attaches root to doc, so we need not free it separately. */
+				xmlDocSetRootElement(doc, root);
+				xmlAddChild(root, content_nodes);
+
+				/*
+				 * We use this node to insert newlines in the dump.  Note: in at
+				 * least some libxml versions, xmlNewDocText would not attach the
+				 * node to the document even if we passed it.  Therefore, manage
+				 * freeing of this node manually, and pass NULL here to make sure
+				 * there's not a dangling link.
+				 */
+				newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
+				if (newline == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				for (xmlNodePtr node = root->children; node; node = node->next)
 				{
-					if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+					/* insert newlines between nodes */
+					if (node->type != XML_TEXT_NODE && node->prev != NULL)
+					{
+						if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+						{
+							xmlFreeNode(newline);
+							xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+										"could not save newline to xmlBuffer");
+						}
+					}
+
+					if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
 					{
 						xmlFreeNode(newline);
 						xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-									"could not save newline to xmlBuffer");
+									"could not save content to xmlBuffer");
 					}
 				}
 
-				if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
-				{
-					xmlFreeNode(newline);
-					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-								"could not save content to xmlBuffer");
-				}
+				xmlFreeNode(newline);
 			}
 
-			xmlFreeNode(newline);
-		}
+			if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			{
+				ctxt = NULL;		/* don't try to close it again */
+				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+							"could not close xmlSaveCtxtPtr");
+			}
 
-		if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			result = (text *) xmlBuffer_to_xmltype(buf);
+		}
+		PG_CATCH();
 		{
-			ctxt = NULL;		/* don't try to close it again */
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-						"could not close xmlSaveCtxtPtr");
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (buf)
+				xmlBufferFree(buf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
 		}
+		PG_END_TRY();
+
+		xmlBufferFree(buf);
+		xmlFreeDoc(doc);
 
-		result = (text *) xmlBuffer_to_xmltype(buf);
+		pg_xml_done(xmlerrcxt, false);
 	}
-	PG_CATCH();
+	else if (format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
 	{
-		if (ctxt)
-			xmlSaveClose(ctxt);
-		if (buf)
-			xmlBufferFree(buf);
-		if (doc)
-			xmlFreeDoc(doc);
+		xmlChar    *xmlbuf = NULL;
+		int         nbytes;
+		int         with_comments = 0; /* 0 = no xml comments (default) */
 
-		pg_xml_done(xmlerrcxt, true);
+		PG_TRY();
+		{
+			/* 1 = keeps xml comments */
+			if (format == XMLSERIALIZE_CANONICAL)
+				with_comments = 1;
 
-		PG_RE_THROW();
-	}
-	PG_END_TRY();
+			if (doc == NULL || escontext.error_occurred)
+			{
+				if (doc)
+					xmlFreeDoc(doc);
+				/* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
+				ereport(ERROR,
+						(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
+						errmsg("not an XML document")));
+			}
 
-	xmlBufferFree(buf);
-	xmlFreeDoc(doc);
+			/*
+			 * This dumps the canonicalized XML doc into the xmlChar* buffer.
+			 * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
+			 */
+			nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, with_comments, &xmlbuf);
 
-	pg_xml_done(xmlerrcxt, false);
+			if(nbytes < 0 || escontext.error_occurred)
+				ereport(ERROR,
+						(errcode(ERRCODE_INTERNAL_ERROR),
+						errmsg("could not canonicalize the given XML document")));
+
+			result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
+		}
+		PG_CATCH();
+		{
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (xmlbuf)
+				xmlFree(xmlbuf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
+
+		xmlFreeDoc(doc);
+		xmlFree(xmlbuf);
+
+		pg_xml_done(xmlerrcxt, false);
+	}
+	else
+		elog(ERROR,"invalid xmlserialize option");
 
 	return result;
 #else
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 476d55dd24..4f00d94322 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -833,6 +833,7 @@ typedef struct XmlSerialize
 	Node	   *expr;
 	TypeName   *typeName;
 	bool		indent;			/* [NO] INDENT */
+	XmlSerializeFormat	format;	/* serialization format */
 	int			location;		/* token location, or -1 if unknown */
 } XmlSerialize;
 
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 4a154606d2..b8ee584bbb 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1528,6 +1528,14 @@ typedef enum XmlOptionType
 	XMLOPTION_CONTENT,
 } XmlOptionType;
 
+typedef enum XmlSerializeFormat
+{
+	XMLSERIALIZE_INDENT,                       /* pretty-printed xml serialization  */
+	XMLSERIALIZE_CANONICAL,                    /* canonical form with xml comments */
+	XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS,   /* canonical form without xml comments */
+	XMLSERIALIZE_NO_FORMAT                     /* unformatted xml representation */
+} XmlSerializeFormat;
+
 typedef struct XmlExpr
 {
 	Expr		xpr;
@@ -1550,6 +1558,9 @@ typedef struct XmlExpr
 	int32		typmod pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	int			location;
+	/* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */
+	XmlSerializeFormat format pg_node_attr(query_jumble_ignore);
+
 } XmlExpr;
 
 /*
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 2331acac09..187e711476 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -68,6 +68,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index ed20e21375..05f11c0517 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -78,7 +78,7 @@ extern xmltype *xmlpi(const char *target, text *arg, bool arg_is_null, bool *res
 extern xmltype *xmlroot(xmltype *data, text *version, int standalone);
 extern bool xml_is_document(xmltype *arg);
 extern text *xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg,
-									bool indent);
+									XmlSerializeFormat format);
 extern char *escape_xml(const char *str);
 
 extern char *map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, bool escape_period);
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 6500cff885..3f9fce8a77 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -672,6 +672,120 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                                                                                                                                                                                                                                                  xmlserialize                                                                                                                                                                                                                                                                                                                                                                                                                  
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside doc -->
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 9323b84ae2..6a76a4fd9a 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -443,6 +443,114 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val><...
                                      ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+ERROR:  unsupported XML feature
+LINE 2:   (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+             ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 ERROR:  unsupported XML feature
 LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index e1d165c6c9..b47fb44b7a 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -652,6 +652,120 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                                                                                                                                                                                                                                                  xmlserialize                                                                                                                                                                                                                                                                                                                                                                                                                  
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside doc -->
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 953bac09e4..a2f1c3566d 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -168,6 +168,69 @@ SELECT xmlserialize(CONTENT  '<foo><bar></bar></foo>' AS text INDENT);
 -- 'no indent' = not using 'no indent'
 SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
 SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+\set VERBOSITY default
 
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 91433d439b..5d791ac732 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3141,6 +3141,7 @@ XmlExpr
 XmlExprOp
 XmlOptionType
 XmlSerialize
+XmlSerializeFormat
 XmlTableBuilderData
 YYLTYPE
 YYSTYPE
-- 
2.34.1

#18

Jim Jones

jim.jones@uni-muenster.de

over 1 year ago

In reply to: Jim Jones (#17)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

On 09.02.24 14:19, Jim Jones wrote:

v9 attached with rebase due to changes done to primnodes.h in 615f5f6

v10 attached with rebase due to changes in primnodes, parsenodes.h, and
gram.y

--
Jim

Attachments:

v10-0001-Add-CANONICAL-output-format-to-xmlserialize.patchtext/x-patch; charset=UTF-8; name=v10-0001-Add-CANONICAL-output-format-to-xmlserialize.patchDownload

From fbd98149d50fe19b886b30ed49b9d553a18f30b4 Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Wed, 19 Jun 2024 10:22:10 +0200
Subject: [PATCH v10] Add CANONICAL output format to xmlserialize

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output. In case no
parameter is provided, WITH COMMENTS will be used as default. This
feature is based on the function xmlC14NDocDumpMemory from the C14N
module of libxml2.

This patch also includes regression tests and documentation.
---
 doc/src/sgml/datatype.sgml            |  41 +++-
 src/backend/executor/execExprInterp.c |   2 +-
 src/backend/parser/gram.y             |  21 +-
 src/backend/parser/parse_expr.c       |   2 +-
 src/backend/utils/adt/xml.c           | 272 ++++++++++++++++----------
 src/include/nodes/parsenodes.h        |   1 +
 src/include/nodes/primnodes.h         |  10 +
 src/include/parser/kwlist.h           |   1 +
 src/include/utils/xml.h               |   2 +-
 src/test/regress/expected/xml.out     | 114 +++++++++++
 src/test/regress/expected/xml_1.out   | 108 ++++++++++
 src/test/regress/expected/xml_2.out   | 114 +++++++++++
 src/test/regress/sql/xml.sql          |  63 ++++++
 src/tools/pgindent/typedefs.list      |   1 +
 14 files changed, 642 insertions(+), 110 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 6646820d6a..7c28d34866 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4483,7 +4483,7 @@ xml '<foo>bar</foo>'
     <type>xml</type>, uses the function
     <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
 <synopsis>
-XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ [ NO ] INDENT ] )
+XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ { [ NO ] INDENT ] | CANONICAL [ WITH [NO] COMMENTS ]})
 </synopsis>
     <replaceable>type</replaceable> can be
     <type>character</type>, <type>character varying</type>, or
@@ -4500,6 +4500,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <repla
     type likewise produces the original string.
    </para>
 
+   <para>
+    The option <type>CANONICAL</type> converts a given
+    XML document to its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>
+    based on the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>.
+    It is basically designed to provide applications the ability to compare xml documents or test if they
+    have been changed. The optional parameters <type>WITH COMMENTS</type> (which is the default) or
+    <type>WITH NO COMMENTS</type>, respectively, keep or remove XML comments from the given document.
+    </para>
+
+     <para>
+     Example:
+
+<screen><![CDATA[
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL);
+                                xmlserialize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL WITH NO COMMENTS);
+                       xmlserialize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+]]></screen>
+   </para>
    <para>
     When a character string value is cast to or from type
     <type>xml</type> without going through <type>XMLPARSE</type> or
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 852186312c..f14d7464ef 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -4053,7 +4053,7 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 				*op->resvalue =
 					PointerGetDatum(xmltotext_with_options(DatumGetXmlP(value),
 														   xexpr->xmloption,
-														   xexpr->indent));
+														   xexpr->format));
 				*op->resnull = false;
 			}
 			break;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 4d582950b7..ff1caa21f2 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -619,12 +619,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <node>	xml_root_version opt_xml_root_standalone
 %type <node>	xmlexists_argument
 %type <ival>	document_or_content
-%type <boolean>	xml_indent_option xml_whitespace_option
+%type <boolean>	xml_whitespace_option
 %type <list>	xmltable_column_list xmltable_column_option_list
 %type <node>	xmltable_column_el
 %type <defelt>	xmltable_column_option_el
 %type <list>	xml_namespace_list
 %type <target>	xml_namespace_el
+%type <ival> 	opt_xml_serialize_format
 
 %type <node>	func_application func_expr_common_subexpr
 %type <node>	func_expr func_expr_windowless
@@ -712,7 +713,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONDITIONAL CONFIGURATION CONFLICT
@@ -15965,14 +15966,14 @@ func_expr_common_subexpr:
 					$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 									 list_make3($3, $5, $6), @1);
 				}
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename xml_indent_option ')'
+			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')'
 				{
 					XmlSerialize *n = makeNode(XmlSerialize);
 
 					n->xmloption = $3;
 					n->expr = $4;
 					n->typeName = $6;
-					n->indent = $7;
+					n->format = $7;
 					n->location = @1;
 					$$ = (Node *) n;
 				}
@@ -16192,9 +16193,13 @@ document_or_content: DOCUMENT_P						{ $$ = XMLOPTION_DOCUMENT; }
 			| CONTENT_P								{ $$ = XMLOPTION_CONTENT; }
 		;
 
-xml_indent_option: INDENT							{ $$ = true; }
-			| NO INDENT								{ $$ = false; }
-			| /*EMPTY*/								{ $$ = false; }
+opt_xml_serialize_format:
+			INDENT									{ $$ = XMLSERIALIZE_INDENT; }
+			| NO INDENT								{ $$ = XMLSERIALIZE_NO_FORMAT; }
+			| CANONICAL								{ $$ = XMLSERIALIZE_CANONICAL; }
+			| CANONICAL WITH NO COMMENTS			{ $$ = XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS; }
+			| CANONICAL WITH COMMENTS				{ $$ = XMLSERIALIZE_CANONICAL; }
+			| /*EMPTY*/								{ $$ = XMLSERIALIZE_NO_FORMAT; }
 		;
 
 xml_whitespace_option: PRESERVE WHITESPACE_P		{ $$ = true; }
@@ -17591,6 +17596,7 @@ unreserved_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CATALOG_P
@@ -18145,6 +18151,7 @@ bare_label_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CASE
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 00cd7358eb..b28a89f411 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2490,7 +2490,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
 	typenameTypeIdAndMod(pstate, xs->typeName, &targetType, &targetTypmod);
 
 	xexpr->xmloption = xs->xmloption;
-	xexpr->indent = xs->indent;
+	xexpr->format = xs->format;
 	xexpr->location = xs->location;
 	/* We actually only need these to be able to parse back the expression. */
 	xexpr->type = targetType;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 3e4ca874d8..44738b09d2 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -58,6 +58,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -653,7 +654,7 @@ xmltotext(PG_FUNCTION_ARGS)
 
 
 text *
-xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
+xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, XmlSerializeFormat format)
 {
 #ifdef USE_LIBXML
 	text	   *volatile result;
@@ -666,7 +667,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	PgXmlErrorContext *xmlerrcxt;
 #endif
 
-	if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
+	if (xmloption_arg != XMLOPTION_DOCUMENT && format == XMLSERIALIZE_NO_FORMAT)
 	{
 		/*
 		 * We don't actually need to do anything, so just return the
@@ -677,10 +678,23 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 #ifdef USE_LIBXML
-	/* Parse the input according to the xmloption */
-	doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
-					&parsed_xmloptiontype, &content_nodes,
-					(Node *) &escontext);
+	/*
+	 * Parse the input according to the xmloption. XML canonical expects
+	 * a well-formed XML input, so here in case of XMLSERIALIZE_CANONICAL
+	 * or XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS we force xml_parse() to parse
+	 * 'data' as XMLOPTION_DOCUMENT despite of the XmlOptionType given in
+	 * 'xmloption_arg'. This enables the canonicalization of CONTENT fragments
+	 * if they contain a singly-rooted XML - xml_parse() will thrown an error
+	 * otherwise.
+	 */
+	if(format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
+		doc = xml_parse(data, XMLOPTION_DOCUMENT, false,
+						GetDatabaseEncoding(), NULL, NULL, NULL);
+	else
+		doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
+						&parsed_xmloptiontype, &content_nodes,
+						(Node *) &escontext);
+
 	if (doc == NULL || escontext.error_occurred)
 	{
 		if (doc)
@@ -692,7 +706,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 	/* If we weren't asked to indent, we're done. */
-	if (!indent)
+	if (format == XMLSERIALIZE_NO_FORMAT)
 	{
 		xmlFreeDoc(doc);
 		return (text *) data;
@@ -701,128 +715,188 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	/* Otherwise, we gotta spin up some error handling. */
 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
-	PG_TRY();
+	if(format == XMLSERIALIZE_INDENT)
 	{
-		size_t		decl_len = 0;
-
-		/* The serialized data will go into this buffer. */
-		buf = xmlBufferCreate();
-
-		if (buf == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlBuffer");
-
-		/* Detect whether there's an XML declaration */
-		parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
-
-		/*
-		 * Emit declaration only if the input had one.  Note: some versions of
-		 * xmlSaveToBuffer leak memory if a non-null encoding argument is
-		 * passed, so don't do that.  We don't want any encoding conversion
-		 * anyway.
-		 */
-		if (decl_len == 0)
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
-		else
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_FORMAT);
-
-		if (ctxt == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlSaveCtxt");
-
-		if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
-		{
-			/* If it's a document, saving is easy. */
-			if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
-				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-							"could not save document to xmlBuffer");
-		}
-		else if (content_nodes != NULL)
+		PG_TRY();
 		{
-			/*
-			 * Deal with the case where we have non-singly-rooted XML.
-			 * libxml's dump functions don't work well for that without help.
-			 * We build a fake root node that serves as a container for the
-			 * content nodes, and then iterate over the nodes.
-			 */
-			xmlNodePtr	root;
-			xmlNodePtr	newline;
+			size_t		decl_len = 0;
+
+			/* The serialized data will go into this buffer. */
+			buf = xmlBufferCreate();
 
-			root = xmlNewNode(NULL, (const xmlChar *) "content-root");
-			if (root == NULL || xmlerrcxt->err_occurred)
+			if (buf == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlBuffer");
 
-			/* This attaches root to doc, so we need not free it separately. */
-			xmlDocSetRootElement(doc, root);
-			xmlAddChild(root, content_nodes);
+			/* Detect whether there's an XML declaration */
+			parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
 
 			/*
-			 * We use this node to insert newlines in the dump.  Note: in at
-			 * least some libxml versions, xmlNewDocText would not attach the
-			 * node to the document even if we passed it.  Therefore, manage
-			 * freeing of this node manually, and pass NULL here to make sure
-			 * there's not a dangling link.
+			 * Emit declaration only if the input had one.  Note: some versions of
+			 * xmlSaveToBuffer leak memory if a non-null encoding argument is
+			 * passed, so don't do that.  We don't want any encoding conversion
+			 * anyway.
 			 */
-			newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
-			if (newline == NULL || xmlerrcxt->err_occurred)
+			if (decl_len == 0)
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
+			else
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									XML_SAVE_FORMAT);
+
+			if (ctxt == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlSaveCtxt");
 
-			for (xmlNodePtr node = root->children; node; node = node->next)
+			if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
 			{
-				/* insert newlines between nodes */
-				if (node->type != XML_TEXT_NODE && node->prev != NULL)
+				/* If it's a document, saving is easy. */
+				if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+								"could not save document to xmlBuffer");
+			}
+			else if (content_nodes != NULL)
+			{
+				/*
+				 * Deal with the case where we have non-singly-rooted XML.
+				 * libxml's dump functions don't work well for that without help.
+				 * We build a fake root node that serves as a container for the
+				 * content nodes, and then iterate over the nodes.
+				 */
+				xmlNodePtr	root;
+				xmlNodePtr	newline;
+
+				root = xmlNewNode(NULL, (const xmlChar *) "content-root");
+				if (root == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				/* This attaches root to doc, so we need not free it separately. */
+				xmlDocSetRootElement(doc, root);
+				xmlAddChild(root, content_nodes);
+
+				/*
+				 * We use this node to insert newlines in the dump.  Note: in at
+				 * least some libxml versions, xmlNewDocText would not attach the
+				 * node to the document even if we passed it.  Therefore, manage
+				 * freeing of this node manually, and pass NULL here to make sure
+				 * there's not a dangling link.
+				 */
+				newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
+				if (newline == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				for (xmlNodePtr node = root->children; node; node = node->next)
 				{
-					if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+					/* insert newlines between nodes */
+					if (node->type != XML_TEXT_NODE && node->prev != NULL)
+					{
+						if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+						{
+							xmlFreeNode(newline);
+							xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+										"could not save newline to xmlBuffer");
+						}
+					}
+
+					if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
 					{
 						xmlFreeNode(newline);
 						xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-									"could not save newline to xmlBuffer");
+									"could not save content to xmlBuffer");
 					}
 				}
 
-				if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
-				{
-					xmlFreeNode(newline);
-					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-								"could not save content to xmlBuffer");
-				}
+				xmlFreeNode(newline);
 			}
 
-			xmlFreeNode(newline);
-		}
+			if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			{
+				ctxt = NULL;		/* don't try to close it again */
+				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+							"could not close xmlSaveCtxtPtr");
+			}
 
-		if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			result = (text *) xmlBuffer_to_xmltype(buf);
+		}
+		PG_CATCH();
 		{
-			ctxt = NULL;		/* don't try to close it again */
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-						"could not close xmlSaveCtxtPtr");
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (buf)
+				xmlBufferFree(buf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
 		}
+		PG_END_TRY();
+
+		xmlBufferFree(buf);
+		xmlFreeDoc(doc);
 
-		result = (text *) xmlBuffer_to_xmltype(buf);
+		pg_xml_done(xmlerrcxt, false);
 	}
-	PG_CATCH();
+	else if (format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
 	{
-		if (ctxt)
-			xmlSaveClose(ctxt);
-		if (buf)
-			xmlBufferFree(buf);
-		if (doc)
-			xmlFreeDoc(doc);
+		xmlChar    *xmlbuf = NULL;
+		int         nbytes;
+		int         with_comments = 0; /* 0 = no xml comments (default) */
 
-		pg_xml_done(xmlerrcxt, true);
+		PG_TRY();
+		{
+			/* 1 = keeps xml comments */
+			if (format == XMLSERIALIZE_CANONICAL)
+				with_comments = 1;
 
-		PG_RE_THROW();
-	}
-	PG_END_TRY();
+			if (doc == NULL || escontext.error_occurred)
+			{
+				if (doc)
+					xmlFreeDoc(doc);
+				/* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
+				ereport(ERROR,
+						(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
+						errmsg("not an XML document")));
+			}
 
-	xmlBufferFree(buf);
-	xmlFreeDoc(doc);
+			/*
+			 * This dumps the canonicalized XML doc into the xmlChar* buffer.
+			 * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
+			 */
+			nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, with_comments, &xmlbuf);
 
-	pg_xml_done(xmlerrcxt, false);
+			if(nbytes < 0 || escontext.error_occurred)
+				ereport(ERROR,
+						(errcode(ERRCODE_INTERNAL_ERROR),
+						errmsg("could not canonicalize the given XML document")));
+
+			result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
+		}
+		PG_CATCH();
+		{
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (xmlbuf)
+				xmlFree(xmlbuf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
+
+		xmlFreeDoc(doc);
+		xmlFree(xmlbuf);
+
+		pg_xml_done(xmlerrcxt, false);
+	}
+	else
+		elog(ERROR,"invalid xmlserialize option");
 
 	return result;
 #else
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 85a62b538e..47bffb7cd1 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -846,6 +846,7 @@ typedef struct XmlSerialize
 	Node	   *expr;
 	TypeName   *typeName;
 	bool		indent;			/* [NO] INDENT */
+	XmlSerializeFormat	format;	/* serialization format */
 	ParseLoc	location;		/* token location, or -1 if unknown */
 } XmlSerialize;
 
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 4830efc573..236ace6857 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1593,6 +1593,14 @@ typedef enum XmlOptionType
 	XMLOPTION_CONTENT,
 } XmlOptionType;
 
+typedef enum XmlSerializeFormat
+{
+	XMLSERIALIZE_INDENT,                       /* pretty-printed xml serialization  */
+	XMLSERIALIZE_CANONICAL,                    /* canonical form with xml comments */
+	XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS,   /* canonical form without xml comments */
+	XMLSERIALIZE_NO_FORMAT                     /* unformatted xml representation */
+} XmlSerializeFormat;
+
 typedef struct XmlExpr
 {
 	Expr		xpr;
@@ -1615,6 +1623,8 @@ typedef struct XmlExpr
 	int32		typmod pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	ParseLoc	location;
+	/* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */
+	XmlSerializeFormat format pg_node_attr(query_jumble_ignore);
 } XmlExpr;
 
 /*
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index f7fe834cf4..68d939d754 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -68,6 +68,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index ed20e21375..05f11c0517 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -78,7 +78,7 @@ extern xmltype *xmlpi(const char *target, text *arg, bool arg_is_null, bool *res
 extern xmltype *xmlroot(xmltype *data, text *version, int standalone);
 extern bool xml_is_document(xmltype *arg);
 extern text *xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg,
-									bool indent);
+									XmlSerializeFormat format);
 extern char *escape_xml(const char *str);
 
 extern char *map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, bool escape_period);
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 6500cff885..3f9fce8a77 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -672,6 +672,120 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                                                                                                                                                                                                                                                  xmlserialize                                                                                                                                                                                                                                                                                                                                                                                                                  
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside doc -->
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 9323b84ae2..6a76a4fd9a 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -443,6 +443,114 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val><...
                                      ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+ERROR:  unsupported XML feature
+LINE 2:   (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+             ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 ERROR:  unsupported XML feature
 LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index e1d165c6c9..b47fb44b7a 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -652,6 +652,120 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                                                                                                                                                                                                                                                  xmlserialize                                                                                                                                                                                                                                                                                                                                                                                                                  
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside doc -->
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 953bac09e4..a2f1c3566d 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -168,6 +168,69 @@ SELECT xmlserialize(CONTENT  '<foo><bar></bar></foo>' AS text INDENT);
 -- 'no indent' = not using 'no indent'
 SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
 SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+\set VERBOSITY default
 
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 61ad417cde..df763d7fa5 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3225,6 +3225,7 @@ XmlExpr
 XmlExprOp
 XmlOptionType
 XmlSerialize
+XmlSerializeFormat
 XmlTableBuilderData
 YYLTYPE
 YYSTYPE
-- 
2.34.1

#19

Jim Jones

jim.jones@uni-muenster.de

over 1 year ago

In reply to: Jim Jones (#18)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

On 19.06.24 10:59, Jim Jones wrote:

On 09.02.24 14:19, Jim Jones wrote:

v9 attached with rebase due to changes done to primnodes.h in 615f5f6

v10 attached with rebase due to changes in primnodes, parsenodes.h, and
gram.y

v11 attached with rebase due to changes in xml.c

--
Jim

Attachments:

v11-0001-Add-CANONICAL-output-format-to-xmlserialize.patchtext/x-patch; charset=UTF-8; name=v11-0001-Add-CANONICAL-output-format-to-xmlserialize.patchDownload

From f803703a85e9036213c21d8e5652cd6f1a72381e Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Tue, 9 Jul 2024 20:28:00 +0200
Subject: [PATCH v11] Add CANONICAL output format to xmlserialize

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output. In case no
parameter is provided, WITH COMMENTS will be used as default. This
feature is based on the function xmlC14NDocDumpMemory from the C14N
module of libxml2.

This patch also includes regression tests and documentation.
---
 doc/src/sgml/datatype.sgml            |  41 +++-
 src/backend/executor/execExprInterp.c |   2 +-
 src/backend/parser/gram.y             |  21 +-
 src/backend/parser/parse_expr.c       |   2 +-
 src/backend/utils/adt/xml.c           | 273 ++++++++++++++++----------
 src/include/nodes/parsenodes.h        |   1 +
 src/include/nodes/primnodes.h         |  10 +
 src/include/parser/kwlist.h           |   1 +
 src/include/utils/xml.h               |   2 +-
 src/test/regress/expected/xml.out     | 114 +++++++++++
 src/test/regress/expected/xml_1.out   | 108 ++++++++++
 src/test/regress/expected/xml_2.out   | 114 +++++++++++
 src/test/regress/sql/xml.sql          |  63 ++++++
 src/tools/pgindent/typedefs.list      |   1 +
 14 files changed, 642 insertions(+), 111 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index e0d33f12e1..19ba5bbf7f 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4496,7 +4496,7 @@ xml '<foo>bar</foo>'
     <type>xml</type>, uses the function
     <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
 <synopsis>
-XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ [ NO ] INDENT ] )
+XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ { [ NO ] INDENT ] | CANONICAL [ WITH [NO] COMMENTS ]})
 </synopsis>
     <replaceable>type</replaceable> can be
     <type>character</type>, <type>character varying</type>, or
@@ -4513,6 +4513,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <repla
     type likewise produces the original string.
    </para>
 
+   <para>
+    The option <type>CANONICAL</type> converts a given
+    XML document to its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>
+    based on the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>.
+    It is basically designed to provide applications the ability to compare xml documents or test if they
+    have been changed. The optional parameters <type>WITH COMMENTS</type> (which is the default) or
+    <type>WITH NO COMMENTS</type>, respectively, keep or remove XML comments from the given document.
+    </para>
+
+     <para>
+     Example:
+
+<screen><![CDATA[
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL);
+                                xmlserialize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL WITH NO COMMENTS);
+                       xmlserialize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+]]></screen>
+   </para>
    <para>
     When a character string value is cast to or from type
     <type>xml</type> without going through <type>XMLPARSE</type> or
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index d8735286c4..e7bd95c216 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -4053,7 +4053,7 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 				*op->resvalue =
 					PointerGetDatum(xmltotext_with_options(DatumGetXmlP(value),
 														   xexpr->xmloption,
-														   xexpr->indent));
+														   xexpr->format));
 				*op->resnull = false;
 			}
 			break;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index a043fd4c66..3f904d1e3b 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -619,12 +619,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <node>	xml_root_version opt_xml_root_standalone
 %type <node>	xmlexists_argument
 %type <ival>	document_or_content
-%type <boolean>	xml_indent_option xml_whitespace_option
+%type <boolean>	xml_whitespace_option
 %type <list>	xmltable_column_list xmltable_column_option_list
 %type <node>	xmltable_column_el
 %type <defelt>	xmltable_column_option_el
 %type <list>	xml_namespace_list
 %type <target>	xml_namespace_el
+%type <ival> 	opt_xml_serialize_format
 
 %type <node>	func_application func_expr_common_subexpr
 %type <node>	func_expr func_expr_windowless
@@ -712,7 +713,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONDITIONAL CONFIGURATION CONFLICT
@@ -15965,14 +15966,14 @@ func_expr_common_subexpr:
 					$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 									 list_make3($3, $5, $6), @1);
 				}
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename xml_indent_option ')'
+			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')'
 				{
 					XmlSerialize *n = makeNode(XmlSerialize);
 
 					n->xmloption = $3;
 					n->expr = $4;
 					n->typeName = $6;
-					n->indent = $7;
+					n->format = $7;
 					n->location = @1;
 					$$ = (Node *) n;
 				}
@@ -16192,9 +16193,13 @@ document_or_content: DOCUMENT_P						{ $$ = XMLOPTION_DOCUMENT; }
 			| CONTENT_P								{ $$ = XMLOPTION_CONTENT; }
 		;
 
-xml_indent_option: INDENT							{ $$ = true; }
-			| NO INDENT								{ $$ = false; }
-			| /*EMPTY*/								{ $$ = false; }
+opt_xml_serialize_format:
+			INDENT									{ $$ = XMLSERIALIZE_INDENT; }
+			| NO INDENT								{ $$ = XMLSERIALIZE_NO_FORMAT; }
+			| CANONICAL								{ $$ = XMLSERIALIZE_CANONICAL; }
+			| CANONICAL WITH NO COMMENTS			{ $$ = XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS; }
+			| CANONICAL WITH COMMENTS				{ $$ = XMLSERIALIZE_CANONICAL; }
+			| /*EMPTY*/								{ $$ = XMLSERIALIZE_NO_FORMAT; }
 		;
 
 xml_whitespace_option: PRESERVE WHITESPACE_P		{ $$ = true; }
@@ -17591,6 +17596,7 @@ unreserved_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CATALOG_P
@@ -18145,6 +18151,7 @@ bare_label_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CASE
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 45c019627c..7a211c4c07 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2489,7 +2489,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
 	typenameTypeIdAndMod(pstate, xs->typeName, &targetType, &targetTypmod);
 
 	xexpr->xmloption = xs->xmloption;
-	xexpr->indent = xs->indent;
+	xexpr->format = xs->format;
 	xexpr->location = xs->location;
 	/* We actually only need these to be able to parse back the expression. */
 	xexpr->type = targetType;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 8893be5682..3969db005f 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -58,6 +58,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -653,7 +654,7 @@ xmltotext(PG_FUNCTION_ARGS)
 
 
 text *
-xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
+xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, XmlSerializeFormat format)
 {
 #ifdef USE_LIBXML
 	text	   *volatile result;
@@ -666,7 +667,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	PgXmlErrorContext *xmlerrcxt;
 #endif
 
-	if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
+	if (xmloption_arg != XMLOPTION_DOCUMENT && format == XMLSERIALIZE_NO_FORMAT)
 	{
 		/*
 		 * We don't actually need to do anything, so just return the
@@ -677,10 +678,23 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 #ifdef USE_LIBXML
-	/* Parse the input according to the xmloption */
-	doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
-					&parsed_xmloptiontype, &content_nodes,
-					(Node *) &escontext);
+	/*
+	 * Parse the input according to the xmloption. XML canonical expects
+	 * a well-formed XML input, so here in case of XMLSERIALIZE_CANONICAL
+	 * or XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS we force xml_parse() to parse
+	 * 'data' as XMLOPTION_DOCUMENT despite of the XmlOptionType given in
+	 * 'xmloption_arg'. This enables the canonicalization of CONTENT fragments
+	 * if they contain a singly-rooted XML - xml_parse() will thrown an error
+	 * otherwise.
+	 */
+	if(format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
+		doc = xml_parse(data, XMLOPTION_DOCUMENT, false,
+						GetDatabaseEncoding(), NULL, NULL, NULL);
+	else
+		doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
+						&parsed_xmloptiontype, &content_nodes,
+						(Node *) &escontext);
+
 	if (doc == NULL || escontext.error_occurred)
 	{
 		if (doc)
@@ -692,7 +706,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 	/* If we weren't asked to indent, we're done. */
-	if (!indent)
+	if (format == XMLSERIALIZE_NO_FORMAT)
 	{
 		xmlFreeDoc(doc);
 		return (text *) data;
@@ -701,128 +715,187 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	/* Otherwise, we gotta spin up some error handling. */
 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
-	PG_TRY();
+	if (format == XMLSERIALIZE_INDENT)
 	{
-		size_t		decl_len = 0;
-
-		/* The serialized data will go into this buffer. */
-		buf = xmlBufferCreate();
-
-		if (buf == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlBuffer");
-
-		/* Detect whether there's an XML declaration */
-		parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
-
-		/*
-		 * Emit declaration only if the input had one.  Note: some versions of
-		 * xmlSaveToBuffer leak memory if a non-null encoding argument is
-		 * passed, so don't do that.  We don't want any encoding conversion
-		 * anyway.
-		 */
-		if (decl_len == 0)
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
-		else
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_FORMAT);
-
-		if (ctxt == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlSaveCtxt");
-
-		if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
-		{
-			/* If it's a document, saving is easy. */
-			if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
-				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-							"could not save document to xmlBuffer");
-		}
-		else if (content_nodes != NULL)
+		PG_TRY();
 		{
-			/*
-			 * Deal with the case where we have non-singly-rooted XML.
-			 * libxml's dump functions don't work well for that without help.
-			 * We build a fake root node that serves as a container for the
-			 * content nodes, and then iterate over the nodes.
-			 */
-			xmlNodePtr	root;
-			xmlNodePtr	newline;
+			size_t decl_len = 0;
 
-			root = xmlNewNode(NULL, (const xmlChar *) "content-root");
-			if (root == NULL || xmlerrcxt->err_occurred)
+			/* The serialized data will go into this buffer. */
+			buf = xmlBufferCreate();
+
+			if (buf == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlBuffer");
 
-			/* This attaches root to doc, so we need not free it separately. */
-			xmlDocSetRootElement(doc, root);
-			xmlAddChildList(root, content_nodes);
+			/* Detect whether there's an XML declaration */
+			parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
 
 			/*
-			 * We use this node to insert newlines in the dump.  Note: in at
-			 * least some libxml versions, xmlNewDocText would not attach the
-			 * node to the document even if we passed it.  Therefore, manage
-			 * freeing of this node manually, and pass NULL here to make sure
-			 * there's not a dangling link.
+			 * Emit declaration only if the input had one.  Note: some versions of
+			 * xmlSaveToBuffer leak memory if a non-null encoding argument is
+			 * passed, so don't do that.  We don't want any encoding conversion
+			 * anyway.
 			 */
-			newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
-			if (newline == NULL || xmlerrcxt->err_occurred)
+			if (decl_len == 0)
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									   XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
+			else
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									   XML_SAVE_FORMAT);
+
+			if (ctxt == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlSaveCtxt");
 
-			for (xmlNodePtr node = root->children; node; node = node->next)
+			if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
 			{
-				/* insert newlines between nodes */
-				if (node->type != XML_TEXT_NODE && node->prev != NULL)
+				/* If it's a document, saving is easy. */
+				if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+								"could not save document to xmlBuffer");
+			}
+			else if (content_nodes != NULL)
+			{
+				/*
+				 * Deal with the case where we have non-singly-rooted XML.
+				 * libxml's dump functions don't work well for that without help.
+				 * We build a fake root node that serves as a container for the
+				 * content nodes, and then iterate over the nodes.
+				 */
+				xmlNodePtr root;
+				xmlNodePtr newline;
+
+				root = xmlNewNode(NULL, (const xmlChar *)"content-root");
+				if (root == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				/* This attaches root to doc, so we need not free it separately. */
+				xmlDocSetRootElement(doc, root);
+				xmlAddChildList(root, content_nodes);
+
+				/*
+				 * We use this node to insert newlines in the dump.  Note: in at
+				 * least some libxml versions, xmlNewDocText would not attach the
+				 * node to the document even if we passed it.  Therefore, manage
+				 * freeing of this node manually, and pass NULL here to make sure
+				 * there's not a dangling link.
+				 */
+				newline = xmlNewDocText(NULL, (const xmlChar *)"\n");
+				if (newline == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				for (xmlNodePtr node = root->children; node; node = node->next)
 				{
-					if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+					/* insert newlines between nodes */
+					if (node->type != XML_TEXT_NODE && node->prev != NULL)
+					{
+						if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+						{
+							xmlFreeNode(newline);
+							xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+										"could not save newline to xmlBuffer");
+						}
+					}
+
+					if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
 					{
 						xmlFreeNode(newline);
 						xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-									"could not save newline to xmlBuffer");
+									"could not save content to xmlBuffer");
 					}
 				}
 
-				if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
-				{
-					xmlFreeNode(newline);
-					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-								"could not save content to xmlBuffer");
-				}
+				xmlFreeNode(newline);
 			}
 
-			xmlFreeNode(newline);
-		}
+			if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			{
+				ctxt = NULL; /* don't try to close it again */
+				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+							"could not close xmlSaveCtxtPtr");
+			}
 
-		if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			result = (text *)xmlBuffer_to_xmltype(buf);
+		}
+		PG_CATCH();
 		{
-			ctxt = NULL;		/* don't try to close it again */
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-						"could not close xmlSaveCtxtPtr");
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (buf)
+				xmlBufferFree(buf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
 		}
+		PG_END_TRY();
+
+		xmlBufferFree(buf);
+		xmlFreeDoc(doc);
 
-		result = (text *) xmlBuffer_to_xmltype(buf);
+		pg_xml_done(xmlerrcxt, false);
 	}
-	PG_CATCH();
-	{
-		if (ctxt)
-			xmlSaveClose(ctxt);
-		if (buf)
-			xmlBufferFree(buf);
-		if (doc)
-			xmlFreeDoc(doc);
+	else if (format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
+ 	{
+		xmlChar    *xmlbuf = NULL;
+		int         nbytes;
+		int         with_comments = 0; /* 0 = no xml comments (default) */
 
-		pg_xml_done(xmlerrcxt, true);
+		PG_TRY();
+		{
+			/* 1 = keeps xml comments */
+			if (format == XMLSERIALIZE_CANONICAL)
+				with_comments = 1;
 
-		PG_RE_THROW();
-	}
-	PG_END_TRY();
+		if (doc == NULL || escontext.error_occurred)
+		{
+			if (doc)
+				xmlFreeDoc(doc);
+			/* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
+			ereport(ERROR,
+					(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
+					errmsg("not an XML document")));
+		}
 
-	xmlBufferFree(buf);
-	xmlFreeDoc(doc);
+		/*
+		 * This dumps the canonicalized XML doc into the xmlChar* buffer.
+		 * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
+		 */
+		nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, with_comments, &xmlbuf);
+		if(nbytes < 0 || escontext.error_occurred)
+				ereport(ERROR,
+						(errcode(ERRCODE_INTERNAL_ERROR),
+						errmsg("could not canonicalize the given XML document")));
 
-	pg_xml_done(xmlerrcxt, false);
+			result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
+		}
+		PG_CATCH();
+		{
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (xmlbuf)
+				xmlFree(xmlbuf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
+
+		xmlFreeDoc(doc);
+		xmlFree(xmlbuf);
+
+		pg_xml_done(xmlerrcxt, false);
+	}
+	else
+		elog(ERROR,"invalid xmlserialize option");
 
 	return result;
 #else
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 85a62b538e..47bffb7cd1 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -846,6 +846,7 @@ typedef struct XmlSerialize
 	Node	   *expr;
 	TypeName   *typeName;
 	bool		indent;			/* [NO] INDENT */
+	XmlSerializeFormat	format;	/* serialization format */
 	ParseLoc	location;		/* token location, or -1 if unknown */
 } XmlSerialize;
 
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index ea47652adb..09f54ff247 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1593,6 +1593,14 @@ typedef enum XmlOptionType
 	XMLOPTION_CONTENT,
 } XmlOptionType;
 
+typedef enum XmlSerializeFormat
+{
+	XMLSERIALIZE_INDENT,                       /* pretty-printed xml serialization  */
+	XMLSERIALIZE_CANONICAL,                    /* canonical form with xml comments */
+	XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS,   /* canonical form without xml comments */
+	XMLSERIALIZE_NO_FORMAT                     /* unformatted xml representation */
+} XmlSerializeFormat;
+
 typedef struct XmlExpr
 {
 	Expr		xpr;
@@ -1615,6 +1623,8 @@ typedef struct XmlExpr
 	int32		typmod pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	ParseLoc	location;
+	/* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */
+	XmlSerializeFormat format pg_node_attr(query_jumble_ignore);
 } XmlExpr;
 
 /*
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index f7fe834cf4..68d939d754 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -68,6 +68,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index ed20e21375..05f11c0517 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -78,7 +78,7 @@ extern xmltype *xmlpi(const char *target, text *arg, bool arg_is_null, bool *res
 extern xmltype *xmlroot(xmltype *data, text *version, int standalone);
 extern bool xml_is_document(xmltype *arg);
 extern text *xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg,
-									bool indent);
+									XmlSerializeFormat format);
 extern char *escape_xml(const char *str);
 
 extern char *map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, bool escape_period);
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 6500cff885..3f9fce8a77 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -672,6 +672,120 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                                                                                                                                                                                                                                                  xmlserialize                                                                                                                                                                                                                                                                                                                                                                                                                  
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside doc -->
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 9323b84ae2..6a76a4fd9a 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -443,6 +443,114 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val><...
                                      ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+ERROR:  unsupported XML feature
+LINE 2:   (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+             ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 ERROR:  unsupported XML feature
 LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index e1d165c6c9..b47fb44b7a 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -652,6 +652,120 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                                                                                                                                                                                                                                                  xmlserialize                                                                                                                                                                                                                                                                                                                                                                                                                  
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside doc -->
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 953bac09e4..a2f1c3566d 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -168,6 +168,69 @@ SELECT xmlserialize(CONTENT  '<foo><bar></bar></foo>' AS text INDENT);
 -- 'no indent' = not using 'no indent'
 SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
 SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+\set VERBOSITY default
 
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 635e6d6e21..0f01e09100 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3228,6 +3228,7 @@ XmlExpr
 XmlExprOp
 XmlOptionType
 XmlSerialize
+XmlSerializeFormat
 XmlTableBuilderData
 YYLTYPE
 YYSTYPE
-- 
2.34.1

#20

Pavel Stehule

pavel.stehule@gmail.com

over 1 year ago

In reply to: Jim Jones (#19)

Re: [PATCH] Add CANONICAL option to xmlserialize

so 24. 8. 2024 v 7:40 odesílatel Jim Jones <jim.jones@uni-muenster.de>
napsal:

On 19.06.24 10:59, Jim Jones wrote:

On 09.02.24 14:19, Jim Jones wrote:

v9 attached with rebase due to changes done to primnodes.h in 615f5f6

v10 attached with rebase due to changes in primnodes, parsenodes.h, and
gram.y

v11 attached with rebase due to changes in xml.c

I try to check this patch

There is unwanted white space in the patch

-<-><--><-->xmlFreeDoc(doc);
+<->else if (format == XMLSERIALIZE_CANONICAL || format ==
XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
+ <>{
+<-><-->xmlChar    *xmlbuf = NULL;
+<-><-->int         nbytes;
+<-><-->int

1. the xml is serialized to UTF8 string every time, but when target type is
varchar or text, then it should be every time encoded to database encoding.
Is not possible to hold utf8 string in latin2 database varchar.

2. The proposed feature can increase some confusion in implementation of NO
IDENT. I am not an expert on this area, so I checked other databases. DB2
does not have anything similar. But Oracle's "NO IDENT" clause is very
similar to the proposed "CANONICAL". Unfortunately, there is different
behaviour of NO IDENT - Oracle's really removes formatting, Postgres does
nothing.

Regards

Pavel

Show quoted text

--
Jim

#21

Pavel Stehule

pavel.stehule@gmail.com

over 1 year ago

In reply to: Pavel Stehule (#20)

Re: [PATCH] Add CANONICAL option to xmlserialize

ne 25. 8. 2024 v 20:57 odesílatel Pavel Stehule <pavel.stehule@gmail.com>
napsal:

Hi

so 24. 8. 2024 v 7:40 odesílatel Jim Jones <jim.jones@uni-muenster.de>
napsal:

On 19.06.24 10:59, Jim Jones wrote:

On 09.02.24 14:19, Jim Jones wrote:

v9 attached with rebase due to changes done to primnodes.h in 615f5f6

v10 attached with rebase due to changes in primnodes, parsenodes.h, and
gram.y

v11 attached with rebase due to changes in xml.c

I try to check this patch

There is unwanted white space in the patch
-<-><--><-->xmlFreeDoc(doc);
+<->else if (format == XMLSERIALIZE_CANONICAL || format ==
XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
+ <>{
+<-><-->xmlChar    *xmlbuf = NULL;
+<-><-->int         nbytes;
+<-><-->int
1. the xml is serialized to UTF8 string every time, but when target type
is varchar or text, then it should be every time encoded to database
encoding. Is not possible to hold utf8 string in latin2 database varchar.

2. The proposed feature can increase some confusion in implementation of
NO IDENT. I am not an expert on this area, so I checked other databases.
DB2 does not have anything similar. But Oracle's "NO IDENT" clause is very
similar to the proposed "CANONICAL". Unfortunately, there is different
behaviour of NO IDENT - Oracle's really removes formatting, Postgres does
nothing.

Regards

I read https://www.w3.org/TR/xml-c14n11/ and if I understand this document,
then CANONICAL <> "NO INDENT" ?

Regards

Pavel

Show quoted text

Pavel

--
Jim

#22

Jim Jones

jim.jones@uni-muenster.de

over 1 year ago

In reply to: Pavel Stehule (#20)

Re: [PATCH] Add CANONICAL option to xmlserialize

Hi Pavel

On 25.08.24 20:57, Pavel Stehule wrote:

There is unwanted white space in the patch

-<-><--><-->xmlFreeDoc(doc);
+<->else if (format == XMLSERIALIZE_CANONICAL || format ==
XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
+ <>{
+<-><-->xmlChar    *xmlbuf = NULL;
+<-><-->int         nbytes;
+<-><-->int

I missed that one. Just removed it, thanks!

1. the xml is serialized to UTF8 string every time, but when target
type is varchar or text, then it should be every time encoded to
database encoding. Is not possible to hold utf8 string in latin2
database varchar.

I'm calling xml_parse using GetDatabaseEncoding(), so I thought I would
be on the safe side

if(format ==XMLSERIALIZE_CANONICAL ||format
==XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
doc =xml_parse(data, XMLOPTION_DOCUMENT, false,
GetDatabaseEncoding(), NULL, NULL, NULL);
... or you mean something else?

2. The proposed feature can increase some confusion in implementation
of NO IDENT. I am not an expert on this area, so I checked other
databases. DB2 does not have anything similar. But Oracle's "NO IDENT"
clause is very similar to the proposed "CANONICAL". Unfortunately,
there is different behaviour of NO IDENT - Oracle's really removes
formatting, Postgres does nothing.

Coincidentally, the [NO] INDENT support for xmlserialize is an old patch
of mine.
It basically "does nothing" and prints the xml as is, e.g.

SELECT xmlserialize(DOCUMENT '<foo><bar><val z="1"
a="8"><![CDATA[0&1]]></val></bar></foo>' AS text INDENT);
                xmlserialize
--------------------------------------------
<foo>                                     +
   <bar>                                   +
     <val z="1" a="8"><![CDATA[0&1]]></val>+
   </bar>                                  +
</foo>                                    +

(1 row)

SELECT xmlserialize(DOCUMENT '<foo><bar><val z="1"
a="8"><![CDATA[0&1]]></val></bar></foo>' AS text NO INDENT);
xmlserialize
--------------------------------------------------------------
<foo><bar><val z="1" a="8"><![CDATA[0&1]]></val></bar></foo>
(1 row)

SELECT xmlserialize(DOCUMENT '<foo><bar><val z="1"
a="8"><![CDATA[0&1]]></val></bar></foo>' AS text);
xmlserialize
--------------------------------------------------------------
<foo><bar><val z="1" a="8"><![CDATA[0&1]]></val></bar></foo>
(1 row)

.. while CANONICAL converts the xml to its canonical form,[1,2] e.g.
sorting attributes and replacing CDATA strings by its value:

SELECT xmlserialize(DOCUMENT '<foo><bar><val z="1"
a="8"><![CDATA[0&1]]></val></bar></foo>' AS text CANONICAL);
xmlserialize
------------------------------------------------------
<foo><bar><val a="8" z="1">0&1</val></bar></foo>
(1 row)

xmlserialize CANONICAL does not exist in any other database and it's not
part of the SQL/XML standard.

Regarding the different behaviour of NO INDENT in Oracle and PostgreSQL:
it is not entirely clear to me if SQL/XML states that NO INDENT must
remove the indentation from xml strings.
It says:

"INDENT — the choice of whether to “pretty-print” the serialized XML by
means of indentation, either
True or False.
....
i) If <XML serialize indent> is specified and does not contain NO, then
let IND be True.
ii) Otherwise, let IND be False."

When I wrote the patch I assumed it meant to leave the xml as is .. but
I might be wrong.
Perhaps it would be best if we open a new thread for this topic.

Thank you for reviewing this patch. Much appreciated!

Best,

--
Jim

1 - https://www.w3.org/TR/xml-c14n11/
2 - https://gnome.pages.gitlab.gnome.org/libxml2/devhelp/libxml2-c14n.html

#23

Pavel Stehule

pavel.stehule@gmail.com

over 1 year ago

In reply to: Jim Jones (#22)

Re: [PATCH] Add CANONICAL option to xmlserialize

po 26. 8. 2024 v 11:32 odesílatel Jim Jones <jim.jones@uni-muenster.de>
napsal:

Hi Pavel

On 25.08.24 20:57, Pavel Stehule wrote:
There is unwanted white space in the patch
-<-><--><-->xmlFreeDoc(doc);
+<->else if (format == XMLSERIALIZE_CANONICAL || format ==
XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
+ <>{
+<-><-->xmlChar    *xmlbuf = NULL;
+<-><-->int         nbytes;
+<-><-->int
I missed that one. Just removed it, thanks!

1. the xml is serialized to UTF8 string every time, but when target
type is varchar or text, then it should be every time encoded to
database encoding. Is not possible to hold utf8 string in latin2
database varchar.

I'm calling xml_parse using GetDatabaseEncoding(), so I thought I would
be on the safe side

if(format ==XMLSERIALIZE_CANONICAL ||format
==XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
doc =xml_parse(data, XMLOPTION_DOCUMENT, false,
GetDatabaseEncoding(), NULL, NULL, NULL);
... or you mean something else?

Maybe I was confused by the initial message.

2. The proposed feature can increase some confusion in implementation
of NO IDENT. I am not an expert on this area, so I checked other
databases. DB2 does not have anything similar. But Oracle's "NO IDENT"
clause is very similar to the proposed "CANONICAL". Unfortunately,
there is different behaviour of NO IDENT - Oracle's really removes
formatting, Postgres does nothing.

Coincidentally, the [NO] INDENT support for xmlserialize is an old patch
of mine.
It basically "does nothing" and prints the xml as is, e.g.

SELECT xmlserialize(DOCUMENT '<foo><bar><val z="1"
a="8"><![CDATA[0&1]]></val></bar></foo>' AS text INDENT);
xmlserialize
--------------------------------------------
<foo> +
<bar> +
<val z="1" a="8"><![CDATA[0&1]]></val>+
</bar> +
</foo> +

(1 row)

SELECT xmlserialize(DOCUMENT '<foo><bar><val z="1"
a="8"><![CDATA[0&1]]></val></bar></foo>' AS text NO INDENT);
xmlserialize
--------------------------------------------------------------
<foo><bar><val z="1" a="8"><![CDATA[0&1]]></val></bar></foo>
(1 row)

SELECT xmlserialize(DOCUMENT '<foo><bar><val z="1"
a="8"><![CDATA[0&1]]></val></bar></foo>' AS text);
xmlserialize
--------------------------------------------------------------
<foo><bar><val z="1" a="8"><![CDATA[0&1]]></val></bar></foo>
(1 row)

.. while CANONICAL converts the xml to its canonical form,[1,2] e.g.
sorting attributes and replacing CDATA strings by its value:

SELECT xmlserialize(DOCUMENT '<foo><bar><val z="1"
a="8"><![CDATA[0&1]]></val></bar></foo>' AS text CANONICAL);
xmlserialize
------------------------------------------------------
<foo><bar><val a="8" z="1">0&1</val></bar></foo>
(1 row)

xmlserialize CANONICAL does not exist in any other database and it's not
part of the SQL/XML standard.

Regarding the different behaviour of NO INDENT in Oracle and PostgreSQL:
it is not entirely clear to me if SQL/XML states that NO INDENT must
remove the indentation from xml strings.
It says:

"INDENT — the choice of whether to “pretty-print” the serialized XML by
means of indentation, either
True or False.
....
i) If <XML serialize indent> is specified and does not contain NO, then
let IND be True.
ii) Otherwise, let IND be False."

When I wrote the patch I assumed it meant to leave the xml as is .. but
I might be wrong.
Perhaps it would be best if we open a new thread for this topic.

I think so there should be specified the target of CANONICAL - it is a
partial replacement of NO INDENT or it produces format just for comparing?
The CANONICAL format is not probably extra standardized, because libxml2
removes indenting, but examples in https://www.w3.org/TR/xml-c14n11/
doesn't do it. So this format makes sense just for local operations.

I like this functionality, and it is great so the functionality from
libxml2 can be used, but I think, so the fact that there are four not
compatible implementations of xmlserialize is messy. Can be nice, if we
find some intersection between SQL/XML, Oracle instead of new proprietary
syntax.

In Oracle syntax the CANONICAL is +/- NO INDENT SHOW DEFAULT ?

My objection against CANONICAL so SQL/XML and Oracle allows to parametrize
XMLSERIALIZE more precious and before implementing new feature, we should
to clean table and say, what we want to have in XMLSERIALIZE.

An alternative of enhancing of XMLSERIALIZE I can imagine just function
"to_canonical(xml, without_comments bool default false)". In this case we
don't need to solve relations against SQL/XML or Oracle.

Show quoted text

Thank you for reviewing this patch. Much appreciated!

Best,

--
Jim

1 - https://www.w3.org/TR/xml-c14n11/
2 - https://gnome.pages.gitlab.gnome.org/libxml2/devhelp/libxml2-c14n.html

#24

Jim Jones

jim.jones@uni-muenster.de

over 1 year ago

In reply to: Pavel Stehule (#23)

Re: [PATCH] Add CANONICAL option to xmlserialize

On 26.08.24 12:30, Pavel Stehule wrote:

I think so there should be specified the target of CANONICAL - it is a
partial replacement of NO INDENT or it produces format just for
comparing? The CANONICAL format is not probably extra standardized,
because libxml2 removes indenting, but examples in
https://www.w3.org/TR/xml-c14n11/ doesn't do it. So this format makes
sense just for local operations.

My idea with CANONICAL was not to replace NO INDENT. The intent was to
format xml strings in an standardized way, so that they can be compared.
For instance, removing comments, sorting attributes, converting CDATA
strings, converting empty elements to start-end tag pairs, removing
white spaces between elements, etc ...

The W3C recommendation for Canonical XML[1] dictates the following
regarding the removal of whitespaces between elements :

* Whitespace outside of the document element and within start and end
tags is normalized
* All whitespace in character content is retained (excluding characters
removed during line feed normalization)

I like this functionality, and it is great so the functionality from
libxml2 can be used, but I think, so the fact that there are four not
compatible implementations of xmlserialize is messy. Can be nice, if
we find some intersection between SQL/XML, Oracle instead of new
proprietary syntax.

In Oracle syntax the CANONICAL is +/- NO INDENT SHOW DEFAULT ?

No.
XMLSERIALIZE ... NO INDENT is supposed, as the name suggests, to
serialize an xml string without indenting it. One could argue that not
indenting can be translated as removing indentation, but I couldn't find
anything concrete about this in the SQL/XML spec. If it's indeed the
case, we should correct XMLSERIALIZE .. NO INDENT, but it is unrelated
to this patch.

CANONICAL serializes a physical representation of an xml document. In a
nutshell, XMLSERIALIZE ... CANONICAL sort of "rewrites" the xml string
with the following rules (list from the W3C recommendation):

* The document is encoded in UTF-8
* Line breaks normalized to #xA on input, before parsing
* Attribute values are normalized, as if by a validating processor
* Character and parsed entity references are replaced
* CDATA sections are replaced with their character content
* The XML declaration and document type declaration are removed
* Empty elements are converted to start-end tag pairs
* Whitespace outside of the document element and within start and end
tags is normalized
* All whitespace in character content is retained (excluding characters
removed during line feed normalization)
* Attribute value delimiters are set to quotation marks (double quotes)
* Special characters in attribute values and character content are
replaced by character references
* Superfluous namespace declarations are removed from each element
* Default attributes are added to each element
* Fixup of xml:base attributes [C14N-Issues] is performed
* Lexicographic order is imposed on the namespace declarations and
attributes of each element

btw: Oracle's SIZE =, HIDE DEFAULTS, and SHOW DEFAULTS are not part of
the SQL/XML standard either :)

My objection against CANONICAL so SQL/XML and Oracle allows to
parametrize XMLSERIALIZE more precious and before implementing new
feature, we should to clean table and say, what we want to have in
XMLSERIALIZE.

An alternative of enhancing of XMLSERIALIZE I can imagine just
function "to_canonical(xml, without_comments bool default false)". In
this case we don't need to solve relations against SQL/XML or Oracle.

To create a separated serialization function would be IMHO way less
elegant than to parametrize XMLSERIALIZE, but it would be something I
could live with in case we decide to go down this path.

Thanks!

--
Jim

1 - https://www.w3.org/TR/xml-c14n11/

#25

Pavel Stehule

pavel.stehule@gmail.com

over 1 year ago

In reply to: Jim Jones (#24)

Re: [PATCH] Add CANONICAL option to xmlserialize

po 26. 8. 2024 v 13:28 odesílatel Jim Jones <jim.jones@uni-muenster.de>
napsal:

On 26.08.24 12:30, Pavel Stehule wrote:

I think so there should be specified the target of CANONICAL - it is a
partial replacement of NO INDENT or it produces format just for
comparing? The CANONICAL format is not probably extra standardized,
because libxml2 removes indenting, but examples in
https://www.w3.org/TR/xml-c14n11/ doesn't do it. So this format makes
sense just for local operations.

My idea with CANONICAL was not to replace NO INDENT. The intent was to
format xml strings in an standardized way, so that they can be compared.
For instance, removing comments, sorting attributes, converting CDATA
strings, converting empty elements to start-end tag pairs, removing
white spaces between elements, etc ...

The W3C recommendation for Canonical XML[1] dictates the following
regarding the removal of whitespaces between elements :

* Whitespace outside of the document element and within start and end
tags is normalized
* All whitespace in character content is retained (excluding characters
removed during line feed normalization)

I like this functionality, and it is great so the functionality from
libxml2 can be used, but I think, so the fact that there are four not
compatible implementations of xmlserialize is messy. Can be nice, if
we find some intersection between SQL/XML, Oracle instead of new
proprietary syntax.

In Oracle syntax the CANONICAL is +/- NO INDENT SHOW DEFAULT ?

No.
XMLSERIALIZE ... NO INDENT is supposed, as the name suggests, to
serialize an xml string without indenting it. One could argue that not
indenting can be translated as removing indentation, but I couldn't find
anything concrete about this in the SQL/XML spec. If it's indeed the
case, we should correct XMLSERIALIZE .. NO INDENT, but it is unrelated
to this patch.

CANONICAL serializes a physical representation of an xml document. In a
nutshell, XMLSERIALIZE ... CANONICAL sort of "rewrites" the xml string
with the following rules (list from the W3C recommendation):

* The document is encoded in UTF-8
* Line breaks normalized to #xA on input, before parsing
* Attribute values are normalized, as if by a validating processor
* Character and parsed entity references are replaced
* CDATA sections are replaced with their character content
* The XML declaration and document type declaration are removed
* Empty elements are converted to start-end tag pairs
* Whitespace outside of the document element and within start and end
tags is normalized
* All whitespace in character content is retained (excluding characters
removed during line feed normalization)
* Attribute value delimiters are set to quotation marks (double quotes)
* Special characters in attribute values and character content are
replaced by character references
* Superfluous namespace declarations are removed from each element
* Default attributes are added to each element
* Fixup of xml:base attributes [C14N-Issues] is performed
* Lexicographic order is imposed on the namespace declarations and
attributes of each element

btw: Oracle's SIZE =, HIDE DEFAULTS, and SHOW DEFAULTS are not part of
the SQL/XML standard either :)

I know - looks so this function is not well designed generally

My objection against CANONICAL so SQL/XML and Oracle allows to
parametrize XMLSERIALIZE more precious and before implementing new
feature, we should to clean table and say, what we want to have in
XMLSERIALIZE.

An alternative of enhancing of XMLSERIALIZE I can imagine just
function "to_canonical(xml, without_comments bool default false)". In
this case we don't need to solve relations against SQL/XML or Oracle.

To create a separated serialization function would be IMHO way less
elegant than to parametrize XMLSERIALIZE, but it would be something I
could live with in case we decide to go down this path.

I am not strongly against enhancing XMLSERIALIZE, but it can be nice to see
some wider concept first. Currently the state looks just random - and I
didn't see any serious discussion about implementation fo SQL/XML. We don't
need to be necessarily compatible with Oracle, but it can help if we have a
functionality that can be used for conversions.

Show quoted text

Thanks!

--
Jim

1 - https://www.w3.org/TR/xml-c14n11/

#26

Jim Jones

jim.jones@uni-muenster.de

over 1 year ago

In reply to: Pavel Stehule (#25)

Re: [PATCH] Add CANONICAL option to xmlserialize

On 26.08.24 14:15, Pavel Stehule wrote:

I am not strongly against enhancing XMLSERIALIZE, but it can be nice
to see some wider concept first. Currently the state looks just random
- and I didn't see any serious discussion about implementation fo
SQL/XML. We don't need to be necessarily compatible with Oracle, but
it can help if we have a functionality that can be used for conversions.

Fair point. A road map definitely wouldn't hurt. Not quite sure how to
start this motion though :D
So far I've just picked the missing SQL/XML features that were listed in
the PostgreSQL todo list and that I need for any of my projects. But I
would gladly change the priorities if there is any interest in the
community for specific features.

--
Jim

#27

Pavel Stehule

pavel.stehule@gmail.com

over 1 year ago

In reply to: Jim Jones (#26)

Re: [PATCH] Add CANONICAL option to xmlserialize

po 26. 8. 2024 v 16:30 odesílatel Jim Jones <jim.jones@uni-muenster.de>
napsal:

On 26.08.24 14:15, Pavel Stehule wrote:

I am not strongly against enhancing XMLSERIALIZE, but it can be nice
to see some wider concept first. Currently the state looks just random
- and I didn't see any serious discussion about implementation fo
SQL/XML. We don't need to be necessarily compatible with Oracle, but
it can help if we have a functionality that can be used for conversions.

Fair point. A road map definitely wouldn't hurt. Not quite sure how to
start this motion though :D
So far I've just picked the missing SQL/XML features that were listed in
the PostgreSQL todo list and that I need for any of my projects. But I
would gladly change the priorities if there is any interest in the
community for specific features.

yes, "like" road map and related questions - just for XMLSERIALIZE (in this
thread). I see points

1. what about behaviour of NO INDENT - the implementation is not too old,
so it can be changed if we want (I think), and it is better to do early
than too late

2. Are we able to implement SQL/XML syntax with libxml2?

3. Are we able to implement Oracle syntax with libxml2? And there are
benefits other than higher possible compatibility?

4. Can there be some possible collision (functionality, syntax) with
CANONICAL?

5. SQL/XML XMLSERIALIZE supports other target types than varchar. I can
imagine XMLSERIALIZE with CANONICAL to bytea (then we don't need to force
database encoding). Does it make sense? Are the results comparable?

Show quoted text

--
Jim

#28

Jim Jones

jim.jones@uni-muenster.de

over 1 year ago

In reply to: Pavel Stehule (#27)

Re: [PATCH] Add CANONICAL option to xmlserialize

On 26.08.24 16:59, Pavel Stehule wrote:

1. what about behaviour of NO INDENT - the implementation is not too
old, so it can be changed if we want (I think), and it is better to do
early than too late

While checking the feasibility of removing indentation with NO INDENT I
may have found a bug in XMLSERIALIZE ... INDENT.
xmlSaveToBuffer seems to ignore elements if there are whitespaces
between them:

SELECT xmlserialize(DOCUMENT '<foo><bar>42</bar></foo>' AS text INDENT);
xmlserialize
-----------------
<foo>          +
   <bar>42</bar>+
</foo>         +

(1 row)

SELECT xmlserialize(DOCUMENT '<foo> <bar>42</bar> </foo>'::xml AS text
INDENT);
xmlserialize
----------------------------
<foo> <bar>42</bar> </foo>+

(1 row)

I'll take a look at it.

Regarding removing indentation: yes, it would be possible with libxml2.
The question is if it would be right to do so.

2. Are we able to implement SQL/XML syntax with libxml2?

3. Are we able to implement Oracle syntax with libxml2? And there are
benefits other than higher possible compatibility?

I guess it would be beneficial if you're migrating from oracle to
postgres - or the other way around. It certainly wouldn't hurt, but so
far I personally had little use for the oracle's extra xmlserialize
features.

4. Can there be some possible collision (functionality, syntax) with
CANONICAL?

I couldn't find anything in the SQL/XML spec that might refer to
canonocal xml.

5. SQL/XML XMLSERIALIZE supports other target types than varchar. I
can imagine XMLSERIALIZE with CANONICAL to bytea (then we don't need
to force database encoding). Does it make sense? Are the results
comparable?

--
Jim

#29

Pavel Stehule

pavel.stehule@gmail.com

over 1 year ago

In reply to: Jim Jones (#28)

Re: [PATCH] Add CANONICAL option to xmlserialize

út 27. 8. 2024 v 13:57 odesílatel Jim Jones <jim.jones@uni-muenster.de>
napsal:

On 26.08.24 16:59, Pavel Stehule wrote:

1. what about behaviour of NO INDENT - the implementation is not too
old, so it can be changed if we want (I think), and it is better to do
early than too late

While checking the feasibility of removing indentation with NO INDENT I
may have found a bug in XMLSERIALIZE ... INDENT.
xmlSaveToBuffer seems to ignore elements if there are whitespaces
between them:

SELECT xmlserialize(DOCUMENT '<foo><bar>42</bar></foo>' AS text INDENT);
xmlserialize
-----------------
<foo> +
<bar>42</bar>+
</foo> +

(1 row)

SELECT xmlserialize(DOCUMENT '<foo> <bar>42</bar> </foo>'::xml AS text
INDENT);
xmlserialize
----------------------------
<foo> <bar>42</bar> </foo>+

(1 row)

I'll take a look at it.

Regarding removing indentation: yes, it would be possible with libxml2.
The question is if it would be right to do so.

2. Are we able to implement SQL/XML syntax with libxml2?

3. Are we able to implement Oracle syntax with libxml2? And there are
benefits other than higher possible compatibility?

I guess it would be beneficial if you're migrating from oracle to
postgres - or the other way around. It certainly wouldn't hurt, but so
far I personally had little use for the oracle's extra xmlserialize
features.

4. Can there be some possible collision (functionality, syntax) with
CANONICAL?

I couldn't find anything in the SQL/XML spec that might refer to
canonocal xml.

5. SQL/XML XMLSERIALIZE supports other target types than varchar. I
can imagine XMLSERIALIZE with CANONICAL to bytea (then we don't need
to force database encoding). Does it make sense? Are the results
comparable?

|
As of pg16 bytea is not supported. Currently type| can be |character|,
|character varying|, or |text - also their other flavours like 'name'.

I know, but theoretically, there can be some benefit for CANONICAL if pg
supports bytea there. Lot of databases still use non utf8 encoding.

It is a more theoretical question - if pg supports different types there in
future (because SQL/XML or Oracle), then CANONICAL can be used without
limit, or CANONICAL can be used just for text? And you are sure, so you can
compare text X text, instead xml X xml?

+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT
doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column?
+----------
+ t
+ t
+(2 rows)

Maybe I am a little bit confused by these regress tests, because at the end
it is not too useful - you compare two identical XML, and WITH COMMENTS and
WITHOUT COMMENTS is tested elsewhere. I tried to search for a sense of this
test. Better to use really different documents (columns) instead.

Regards

Pavel

Show quoted text

|

--
Jim

#30

Jim Jones

jim.jones@uni-muenster.de

over 1 year ago

In reply to: Pavel Stehule (#29)

Re: [PATCH] Add CANONICAL option to xmlserialize

On 29.08.24 20:50, Pavel Stehule wrote:

I know, but theoretically, there can be some benefit for CANONICAL if
pg supports bytea there. Lot of databases still use non utf8 encoding.

It is a more theoretical question - if pg supports different types
there in future (because SQL/XML or Oracle), then CANONICAL can be
used without limit,

I like the idea of extending the feature to support bytea. I can
definitely take a look at it, but perhaps in another patch? This change
would most likely involve transformXmlSerialize in parse_expr.c, and I'm
not sure of the impact in other usages of XMLSERIALIZE.

or CANONICAL can be used just for text? And you are sure, so you can
compare text X text, instead xml X xml?

Yes, currently it only supports varchar or text - and their cousins. The
idea is to format the xml and serialize it as text in a way that they
can compared based on their content, independently of how they were
written, e.g '<foo a="1" b="2"/>' is equal to '<foo b="2" a="1"/>'.

+SELECT xmlserialize(CONTENT doc AS text CANONICAL) =
xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM
xmltest_serialize;
+ ?column?
+----------
+ t
+ t
+(2 rows)
Maybe I am a little bit confused by these regress tests, because at
the end it is not too useful - you compare two identical XML, and WITH
COMMENTS and WITHOUT COMMENTS is tested elsewhere. I tried to search
for a sense of this test. Better to use really different documents
(columns) instead.

Yeah, I can see that it's confusing. In this example I actually just
wanted to test that the default option of CANONICAL is CANONICAL WITH
COMMENTS, even if you don't mention it. In the docs I mentioned it like
this:

"The optional parameters WITH COMMENTS (which is the default) or WITH NO
COMMENTS, respectively, keep or remove XML comments from the given
document."

Perhaps I should rephrase it? Or maybe a comment in the regression tests
would suffice?

Thanks a lot for the input!

--
Jim

#31

Pavel Stehule

pavel.stehule@gmail.com

over 1 year ago

In reply to: Jim Jones (#30)

Re: [PATCH] Add CANONICAL option to xmlserialize

čt 29. 8. 2024 v 23:54 odesílatel Jim Jones <jim.jones@uni-muenster.de>
napsal:

On 29.08.24 20:50, Pavel Stehule wrote:

I know, but theoretically, there can be some benefit for CANONICAL if
pg supports bytea there. Lot of databases still use non utf8 encoding.

It is a more theoretical question - if pg supports different types
there in future (because SQL/XML or Oracle), then CANONICAL can be
used without limit,

I like the idea of extending the feature to support bytea. I can
definitely take a look at it, but perhaps in another patch? This change
would most likely involve transformXmlSerialize in parse_expr.c, and I'm
not sure of the impact in other usages of XMLSERIALIZE.

or CANONICAL can be used just for text? And you are sure, so you can
compare text X text, instead xml X xml?

Yes, currently it only supports varchar or text - and their cousins. The
idea is to format the xml and serialize it as text in a way that they
can compared based on their content, independently of how they were
written, e.g '<foo a="1" b="2"/>' is equal to '<foo b="2" a="1"/>'.
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) =
xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM
xmltest_serialize;
+ ?column?
+----------
+ t
+ t
+(2 rows)
Maybe I am a little bit confused by these regress tests, because at
the end it is not too useful - you compare two identical XML, and WITH
COMMENTS and WITHOUT COMMENTS is tested elsewhere. I tried to search
for a sense of this test. Better to use really different documents
(columns) instead.
Yeah, I can see that it's confusing. In this example I actually just
wanted to test that the default option of CANONICAL is CANONICAL WITH
COMMENTS, even if you don't mention it. In the docs I mentioned it like
this:

"The optional parameters WITH COMMENTS (which is the default) or WITH NO
COMMENTS, respectively, keep or remove XML comments from the given
document."

Perhaps I should rephrase it? Or maybe a comment in the regression tests
would suffice?

comment will be enough

Show quoted text

Thanks a lot for the input!

--
Jim

#32

Jim Jones

jim.jones@uni-muenster.de

over 1 year ago

In reply to: Pavel Stehule (#31)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

On 30.08.24 06:46, Pavel Stehule wrote:

čt 29. 8. 2024 v 23:54 odesílatel Jim Jones
<jim.jones@uni-muenster.de> napsal:
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) =
xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM
xmltest_serialize;
+ ?column?
+----------
+ t
+ t
+(2 rows)
Maybe I am a little bit confused by these regress tests, because at
the end it is not too useful - you compare two identical XML,
and WITH

COMMENTS and WITHOUT COMMENTS is tested elsewhere. I tried to search
for a sense of this test. Better to use really different documents
(columns) instead.

Yeah, I can see that it's confusing. In this example I actually just
wanted to test that the default option of CANONICAL is CANONICAL WITH
COMMENTS, even if you don't mention it. In the docs I mentioned it
like
this:

"The optional parameters WITH COMMENTS (which is the default) or
WITH NO
COMMENTS, respectively, keep or remove XML comments from the given
document."

Perhaps I should rephrase it? Or maybe a comment in the regression
tests
would suffice?

comment will be enough

v12 attached adds a comment to this test.

Thanks

--
Jim

Attachments:

v12-0001-Add-CANONICAL-output-format-to-xmlserialize.patchtext/x-patch; charset=UTF-8; name=v12-0001-Add-CANONICAL-output-format-to-xmlserialize.patchDownload

From 8b17a05a3386aaffcc6c5dafa9c57f49278dc4b6 Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Fri, 30 Aug 2024 07:55:08 +0200
Subject: [PATCH v12] Add CANONICAL output format to xmlserialize

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output. In case no
parameter is provided, WITH COMMENTS will be used as default. This
feature is based on the function xmlC14NDocDumpMemory from the C14N
module of libxml2.

This patch also includes regression tests and documentation.
---
 doc/src/sgml/datatype.sgml            |  41 +++-
 src/backend/executor/execExprInterp.c |   2 +-
 src/backend/parser/gram.y             |  21 +-
 src/backend/parser/parse_expr.c       |   2 +-
 src/backend/utils/adt/xml.c           | 271 ++++++++++++++++----------
 src/include/nodes/parsenodes.h        |   1 +
 src/include/nodes/primnodes.h         |  10 +
 src/include/parser/kwlist.h           |   1 +
 src/include/utils/xml.h               |   2 +-
 src/test/regress/expected/xml.out     | 115 +++++++++++
 src/test/regress/expected/xml_1.out   | 109 +++++++++++
 src/test/regress/expected/xml_2.out   | 115 +++++++++++
 src/test/regress/sql/xml.sql          |  66 +++++++
 src/tools/pgindent/typedefs.list      |   1 +
 14 files changed, 647 insertions(+), 110 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index e0d33f12e1..19ba5bbf7f 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4496,7 +4496,7 @@ xml '<foo>bar</foo>'
     <type>xml</type>, uses the function
     <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
 <synopsis>
-XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ [ NO ] INDENT ] )
+XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ { [ NO ] INDENT ] | CANONICAL [ WITH [NO] COMMENTS ]})
 </synopsis>
     <replaceable>type</replaceable> can be
     <type>character</type>, <type>character varying</type>, or
@@ -4513,6 +4513,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <repla
     type likewise produces the original string.
    </para>
 
+   <para>
+    The option <type>CANONICAL</type> converts a given
+    XML document to its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>
+    based on the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>.
+    It is basically designed to provide applications the ability to compare xml documents or test if they
+    have been changed. The optional parameters <type>WITH COMMENTS</type> (which is the default) or
+    <type>WITH NO COMMENTS</type>, respectively, keep or remove XML comments from the given document.
+    </para>
+
+     <para>
+     Example:
+
+<screen><![CDATA[
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL);
+                                xmlserialize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL WITH NO COMMENTS);
+                       xmlserialize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+]]></screen>
+   </para>
    <para>
     When a character string value is cast to or from type
     <type>xml</type> without going through <type>XMLPARSE</type> or
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 77394e76c3..9222eed1e2 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -4189,7 +4189,7 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 				*op->resvalue =
 					PointerGetDatum(xmltotext_with_options(DatumGetXmlP(value),
 														   xexpr->xmloption,
-														   xexpr->indent));
+														   xexpr->format));
 				*op->resnull = false;
 			}
 			break;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 84cef57a70..bb05d2cb3b 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -618,12 +618,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <node>	xml_root_version opt_xml_root_standalone
 %type <node>	xmlexists_argument
 %type <ival>	document_or_content
-%type <boolean>	xml_indent_option xml_whitespace_option
+%type <boolean>	xml_whitespace_option
 %type <list>	xmltable_column_list xmltable_column_option_list
 %type <node>	xmltable_column_el
 %type <defelt>	xmltable_column_option_el
 %type <list>	xml_namespace_list
 %type <target>	xml_namespace_el
+%type <ival> 	opt_xml_serialize_format
 
 %type <node>	func_application func_expr_common_subexpr
 %type <node>	func_expr func_expr_windowless
@@ -709,7 +710,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONDITIONAL CONFIGURATION CONFLICT
@@ -15895,14 +15896,14 @@ func_expr_common_subexpr:
 					$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 									 list_make3($3, $5, $6), @1);
 				}
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename xml_indent_option ')'
+			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')'
 				{
 					XmlSerialize *n = makeNode(XmlSerialize);
 
 					n->xmloption = $3;
 					n->expr = $4;
 					n->typeName = $6;
-					n->indent = $7;
+					n->format = $7;
 					n->location = @1;
 					$$ = (Node *) n;
 				}
@@ -16122,9 +16123,13 @@ document_or_content: DOCUMENT_P						{ $$ = XMLOPTION_DOCUMENT; }
 			| CONTENT_P								{ $$ = XMLOPTION_CONTENT; }
 		;
 
-xml_indent_option: INDENT							{ $$ = true; }
-			| NO INDENT								{ $$ = false; }
-			| /*EMPTY*/								{ $$ = false; }
+opt_xml_serialize_format:
+			INDENT									{ $$ = XMLSERIALIZE_INDENT; }
+			| NO INDENT								{ $$ = XMLSERIALIZE_NO_FORMAT; }
+			| CANONICAL								{ $$ = XMLSERIALIZE_CANONICAL; }
+			| CANONICAL WITH NO COMMENTS			{ $$ = XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS; }
+			| CANONICAL WITH COMMENTS				{ $$ = XMLSERIALIZE_CANONICAL; }
+			| /*EMPTY*/								{ $$ = XMLSERIALIZE_NO_FORMAT; }
 		;
 
 xml_whitespace_option: PRESERVE WHITESPACE_P		{ $$ = true; }
@@ -17521,6 +17526,7 @@ unreserved_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CATALOG_P
@@ -18072,6 +18078,7 @@ bare_label_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CASE
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 56e413da9f..902cc328ef 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2490,7 +2490,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
 	typenameTypeIdAndMod(pstate, xs->typeName, &targetType, &targetTypmod);
 
 	xexpr->xmloption = xs->xmloption;
-	xexpr->indent = xs->indent;
+	xexpr->format = xs->format;
 	xexpr->location = xs->location;
 	/* We actually only need these to be able to parse back the expression. */
 	xexpr->type = targetType;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 447e72b21e..e5ec1b0229 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -58,6 +58,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -653,7 +654,7 @@ xmltotext(PG_FUNCTION_ARGS)
 
 
 text *
-xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
+xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, XmlSerializeFormat format)
 {
 #ifdef USE_LIBXML
 	text	   *volatile result;
@@ -666,7 +667,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	PgXmlErrorContext *xmlerrcxt;
 #endif
 
-	if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
+	if (xmloption_arg != XMLOPTION_DOCUMENT && format == XMLSERIALIZE_NO_FORMAT)
 	{
 		/*
 		 * We don't actually need to do anything, so just return the
@@ -677,10 +678,23 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 #ifdef USE_LIBXML
-	/* Parse the input according to the xmloption */
-	doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
-					&parsed_xmloptiontype, &content_nodes,
-					(Node *) &escontext);
+	/*
+	 * Parse the input according to the xmloption. XML canonical expects
+	 * a well-formed XML input, so here in case of XMLSERIALIZE_CANONICAL
+	 * or XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS we force xml_parse() to parse
+	 * 'data' as XMLOPTION_DOCUMENT despite of the XmlOptionType given in
+	 * 'xmloption_arg'. This enables the canonicalization of CONTENT fragments
+	 * if they contain a singly-rooted XML - xml_parse() will thrown an error
+	 * otherwise.
+	 */
+	if(format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
+		doc = xml_parse(data, XMLOPTION_DOCUMENT, false,
+						GetDatabaseEncoding(), NULL, NULL, NULL);
+	else
+		doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
+						&parsed_xmloptiontype, &content_nodes,
+						(Node *) &escontext);
+
 	if (doc == NULL || escontext.error_occurred)
 	{
 		if (doc)
@@ -692,7 +706,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 	/* If we weren't asked to indent, we're done. */
-	if (!indent)
+	if (format == XMLSERIALIZE_NO_FORMAT)
 	{
 		xmlFreeDoc(doc);
 		return (text *) data;
@@ -701,128 +715,187 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	/* Otherwise, we gotta spin up some error handling. */
 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
-	PG_TRY();
+	if (format == XMLSERIALIZE_INDENT)
 	{
-		size_t		decl_len = 0;
-
-		/* The serialized data will go into this buffer. */
-		buf = xmlBufferCreate();
-
-		if (buf == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlBuffer");
-
-		/* Detect whether there's an XML declaration */
-		parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
-
-		/*
-		 * Emit declaration only if the input had one.  Note: some versions of
-		 * xmlSaveToBuffer leak memory if a non-null encoding argument is
-		 * passed, so don't do that.  We don't want any encoding conversion
-		 * anyway.
-		 */
-		if (decl_len == 0)
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
-		else
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_FORMAT);
-
-		if (ctxt == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlSaveCtxt");
-
-		if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
-		{
-			/* If it's a document, saving is easy. */
-			if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
-				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-							"could not save document to xmlBuffer");
-		}
-		else if (content_nodes != NULL)
+		PG_TRY();
 		{
-			/*
-			 * Deal with the case where we have non-singly-rooted XML.
-			 * libxml's dump functions don't work well for that without help.
-			 * We build a fake root node that serves as a container for the
-			 * content nodes, and then iterate over the nodes.
-			 */
-			xmlNodePtr	root;
-			xmlNodePtr	newline;
+			size_t decl_len = 0;
 
-			root = xmlNewNode(NULL, (const xmlChar *) "content-root");
-			if (root == NULL || xmlerrcxt->err_occurred)
+			/* The serialized data will go into this buffer. */
+			buf = xmlBufferCreate();
+
+			if (buf == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlBuffer");
 
-			/* This attaches root to doc, so we need not free it separately. */
-			xmlDocSetRootElement(doc, root);
-			xmlAddChildList(root, content_nodes);
+			/* Detect whether there's an XML declaration */
+			parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
 
 			/*
-			 * We use this node to insert newlines in the dump.  Note: in at
-			 * least some libxml versions, xmlNewDocText would not attach the
-			 * node to the document even if we passed it.  Therefore, manage
-			 * freeing of this node manually, and pass NULL here to make sure
-			 * there's not a dangling link.
+			 * Emit declaration only if the input had one.  Note: some versions of
+			 * xmlSaveToBuffer leak memory if a non-null encoding argument is
+			 * passed, so don't do that.  We don't want any encoding conversion
+			 * anyway.
 			 */
-			newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
-			if (newline == NULL || xmlerrcxt->err_occurred)
+			if (decl_len == 0)
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									   XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
+			else
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									   XML_SAVE_FORMAT);
+
+			if (ctxt == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlSaveCtxt");
 
-			for (xmlNodePtr node = root->children; node; node = node->next)
+			if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
 			{
-				/* insert newlines between nodes */
-				if (node->type != XML_TEXT_NODE && node->prev != NULL)
+				/* If it's a document, saving is easy. */
+				if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+								"could not save document to xmlBuffer");
+			}
+			else if (content_nodes != NULL)
+			{
+				/*
+				 * Deal with the case where we have non-singly-rooted XML.
+				 * libxml's dump functions don't work well for that without help.
+				 * We build a fake root node that serves as a container for the
+				 * content nodes, and then iterate over the nodes.
+				 */
+				xmlNodePtr root;
+				xmlNodePtr newline;
+
+				root = xmlNewNode(NULL, (const xmlChar *)"content-root");
+				if (root == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				/* This attaches root to doc, so we need not free it separately. */
+				xmlDocSetRootElement(doc, root);
+				xmlAddChildList(root, content_nodes);
+
+				/*
+				 * We use this node to insert newlines in the dump.  Note: in at
+				 * least some libxml versions, xmlNewDocText would not attach the
+				 * node to the document even if we passed it.  Therefore, manage
+				 * freeing of this node manually, and pass NULL here to make sure
+				 * there's not a dangling link.
+				 */
+				newline = xmlNewDocText(NULL, (const xmlChar *)"\n");
+				if (newline == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				for (xmlNodePtr node = root->children; node; node = node->next)
 				{
-					if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+					/* insert newlines between nodes */
+					if (node->type != XML_TEXT_NODE && node->prev != NULL)
+					{
+						if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+						{
+							xmlFreeNode(newline);
+							xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+										"could not save newline to xmlBuffer");
+						}
+					}
+
+					if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
 					{
 						xmlFreeNode(newline);
 						xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-									"could not save newline to xmlBuffer");
+									"could not save content to xmlBuffer");
 					}
 				}
 
-				if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
-				{
-					xmlFreeNode(newline);
-					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-								"could not save content to xmlBuffer");
-				}
+				xmlFreeNode(newline);
 			}
 
-			xmlFreeNode(newline);
-		}
+			if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			{
+				ctxt = NULL; /* don't try to close it again */
+				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+							"could not close xmlSaveCtxtPtr");
+			}
 
-		if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			result = (text *)xmlBuffer_to_xmltype(buf);
+		}
+		PG_CATCH();
 		{
-			ctxt = NULL;		/* don't try to close it again */
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-						"could not close xmlSaveCtxtPtr");
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (buf)
+				xmlBufferFree(buf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
 		}
+		PG_END_TRY();
+
+		xmlBufferFree(buf);
+		xmlFreeDoc(doc);
 
-		result = (text *) xmlBuffer_to_xmltype(buf);
+		pg_xml_done(xmlerrcxt, false);
 	}
-	PG_CATCH();
+	else if (format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
 	{
-		if (ctxt)
-			xmlSaveClose(ctxt);
-		if (buf)
-			xmlBufferFree(buf);
-		if (doc)
-			xmlFreeDoc(doc);
+		xmlChar    *xmlbuf = NULL;
+		int         nbytes;
+		int         with_comments = 0; /* 0 = no xml comments (default) */
 
-		pg_xml_done(xmlerrcxt, true);
+		PG_TRY();
+		{
+			/* 1 = keeps xml comments */
+			if (format == XMLSERIALIZE_CANONICAL)
+				with_comments = 1;
 
-		PG_RE_THROW();
-	}
-	PG_END_TRY();
+		if (doc == NULL || escontext.error_occurred)
+		{
+			if (doc)
+				xmlFreeDoc(doc);
+			/* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
+			ereport(ERROR,
+					(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
+					errmsg("not an XML document")));
+		}
 
-	xmlBufferFree(buf);
-	xmlFreeDoc(doc);
+		/*
+		 * This dumps the canonicalized XML doc into the xmlChar* buffer.
+		 * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
+		 */
+		nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, with_comments, &xmlbuf);
+		if(nbytes < 0 || escontext.error_occurred)
+				ereport(ERROR,
+						(errcode(ERRCODE_INTERNAL_ERROR),
+						errmsg("could not canonicalize the given XML document")));
 
-	pg_xml_done(xmlerrcxt, false);
+			result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
+		}
+		PG_CATCH();
+		{
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (xmlbuf)
+				xmlFree(xmlbuf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
+
+		xmlFreeDoc(doc);
+		xmlFree(xmlbuf);
+
+		pg_xml_done(xmlerrcxt, false);
+	}
+	else
+		elog(ERROR,"invalid xmlserialize option");
 
 	return result;
 #else
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 124d853e49..a642426123 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -846,6 +846,7 @@ typedef struct XmlSerialize
 	Node	   *expr;
 	TypeName   *typeName;
 	bool		indent;			/* [NO] INDENT */
+	XmlSerializeFormat	format;	/* serialization format */
 	ParseLoc	location;		/* token location, or -1 if unknown */
 } XmlSerialize;
 
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index ea47652adb..09f54ff247 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1593,6 +1593,14 @@ typedef enum XmlOptionType
 	XMLOPTION_CONTENT,
 } XmlOptionType;
 
+typedef enum XmlSerializeFormat
+{
+	XMLSERIALIZE_INDENT,                       /* pretty-printed xml serialization  */
+	XMLSERIALIZE_CANONICAL,                    /* canonical form with xml comments */
+	XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS,   /* canonical form without xml comments */
+	XMLSERIALIZE_NO_FORMAT                     /* unformatted xml representation */
+} XmlSerializeFormat;
+
 typedef struct XmlExpr
 {
 	Expr		xpr;
@@ -1615,6 +1623,8 @@ typedef struct XmlExpr
 	int32		typmod pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	ParseLoc	location;
+	/* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */
+	XmlSerializeFormat format pg_node_attr(query_jumble_ignore);
 } XmlExpr;
 
 /*
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index f8659078ce..132bd3a82b 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -68,6 +68,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index ed20e21375..05f11c0517 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -78,7 +78,7 @@ extern xmltype *xmlpi(const char *target, text *arg, bool arg_is_null, bool *res
 extern xmltype *xmlroot(xmltype *data, text *version, int standalone);
 extern bool xml_is_document(xmltype *arg);
 extern text *xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg,
-									bool indent);
+									XmlSerializeFormat format);
 extern char *escape_xml(const char *str);
 
 extern char *map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, bool escape_period);
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 93a79cda8f..256fb2b90f 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -663,6 +663,121 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                                                                                                                                                                                                                                                  xmlserialize                                                                                                                                                                                                                                                                                                                                                                                                                  
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside doc -->
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+-- this tests that the flag WITH COMMENTS is the default behaviour
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 9323b84ae2..f7533c0ee2 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -443,6 +443,115 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val><...
                                      ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+ERROR:  unsupported XML feature
+LINE 2:   (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+             ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+-- this tests that the flag WITH COMMENTS is the default behaviour
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 ERROR:  unsupported XML feature
 LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index f956322c69..bbf6615219 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -649,6 +649,121 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                                                                                                                                                                                                                                                  xmlserialize                                                                                                                                                                                                                                                                                                                                                                                                                  
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside doc -->
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+-- this tests that the flag WITH COMMENTS is the default behaviour
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 953bac09e4..db7dd247f0 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -168,6 +168,72 @@ SELECT xmlserialize(CONTENT  '<foo><bar></bar></foo>' AS text INDENT);
 -- 'no indent' = not using 'no indent'
 SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
 SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+
+-- this tests that the flag WITH COMMENTS is the default behaviour
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+\set VERBOSITY default
 
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 9e951a9e6f..60ca567c0d 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3237,6 +3237,7 @@ XmlExpr
 XmlExprOp
 XmlOptionType
 XmlSerialize
+XmlSerializeFormat
 XmlTableBuilderData
 YYLTYPE
 YYSTYPE
-- 
2.34.1

#33

Jim Jones

jim.jones@uni-muenster.de

over 1 year ago

In reply to: Jim Jones (#32)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

v13 attached removes two variables that were left unused after
refactoring parsenodes.h and primnodes.h, both booleans related to the
INDENT feature of xmlserialize.

On 30.08.24 08:05, Jim Jones wrote:

On 30.08.24 06:46, Pavel Stehule wrote:
čt 29. 8. 2024 v 23:54 odesílatel Jim Jones
<jim.jones@uni-muenster.de> napsal:
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) =
xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM
xmltest_serialize;
+ ?column?
+----------
+ t
+ t
+(2 rows)
Maybe I am a little bit confused by these regress tests, because at
the end it is not too useful - you compare two identical XML,
and WITH

COMMENTS and WITHOUT COMMENTS is tested elsewhere. I tried to search
for a sense of this test. Better to use really different documents
(columns) instead.

Yeah, I can see that it's confusing. In this example I actually just
wanted to test that the default option of CANONICAL is CANONICAL WITH
COMMENTS, even if you don't mention it. In the docs I mentioned it
like
this:

"The optional parameters WITH COMMENTS (which is the default) or
WITH NO
COMMENTS, respectively, keep or remove XML comments from the given
document."

Perhaps I should rephrase it? Or maybe a comment in the regression
tests
would suffice?

comment will be enough
v12 attached adds a comment to this test.

Thanks

--
Jim

Attachments:

v13-0001-Add-CANONICAL-output-format-to-xmlserialize.patchtext/x-patch; charset=UTF-8; name=v13-0001-Add-CANONICAL-output-format-to-xmlserialize.patchDownload

From 95f69d59435c1e4906235004c08258de956e2613 Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Tue, 3 Sep 2024 15:51:38 +0200
Subject: [PATCH v13] Add CANONICAL output format to xmlserialize

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output. In case no
parameter is provided, WITH COMMENTS will be used as default. This
feature is based on the function xmlC14NDocDumpMemory from the C14N
module of libxml2.

This patch also includes regression tests and documentation.
---
 doc/src/sgml/datatype.sgml            |  41 +++-
 src/backend/executor/execExprInterp.c |   2 +-
 src/backend/parser/gram.y             |  21 +-
 src/backend/parser/parse_expr.c       |   2 +-
 src/backend/utils/adt/xml.c           | 271 ++++++++++++++++----------
 src/include/nodes/parsenodes.h        |   2 +-
 src/include/nodes/primnodes.h         |  12 +-
 src/include/parser/kwlist.h           |   1 +
 src/include/utils/xml.h               |   2 +-
 src/test/regress/expected/xml.out     | 115 +++++++++++
 src/test/regress/expected/xml_1.out   | 109 +++++++++++
 src/test/regress/expected/xml_2.out   | 115 +++++++++++
 src/test/regress/sql/xml.sql          |  66 +++++++
 src/tools/pgindent/typedefs.list      |   1 +
 14 files changed, 647 insertions(+), 113 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index e0d33f12e1..19ba5bbf7f 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4496,7 +4496,7 @@ xml '<foo>bar</foo>'
     <type>xml</type>, uses the function
     <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
 <synopsis>
-XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ [ NO ] INDENT ] )
+XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ { [ NO ] INDENT ] | CANONICAL [ WITH [NO] COMMENTS ]})
 </synopsis>
     <replaceable>type</replaceable> can be
     <type>character</type>, <type>character varying</type>, or
@@ -4513,6 +4513,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <repla
     type likewise produces the original string.
    </para>
 
+   <para>
+    The option <type>CANONICAL</type> converts a given
+    XML document to its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>
+    based on the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>.
+    It is basically designed to provide applications the ability to compare xml documents or test if they
+    have been changed. The optional parameters <type>WITH COMMENTS</type> (which is the default) or
+    <type>WITH NO COMMENTS</type>, respectively, keep or remove XML comments from the given document.
+    </para>
+
+     <para>
+     Example:
+
+<screen><![CDATA[
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL);
+                                xmlserialize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL WITH NO COMMENTS);
+                       xmlserialize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+]]></screen>
+   </para>
    <para>
     When a character string value is cast to or from type
     <type>xml</type> without going through <type>XMLPARSE</type> or
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 77394e76c3..9222eed1e2 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -4189,7 +4189,7 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 				*op->resvalue =
 					PointerGetDatum(xmltotext_with_options(DatumGetXmlP(value),
 														   xexpr->xmloption,
-														   xexpr->indent));
+														   xexpr->format));
 				*op->resnull = false;
 			}
 			break;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 84cef57a70..bb05d2cb3b 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -618,12 +618,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <node>	xml_root_version opt_xml_root_standalone
 %type <node>	xmlexists_argument
 %type <ival>	document_or_content
-%type <boolean>	xml_indent_option xml_whitespace_option
+%type <boolean>	xml_whitespace_option
 %type <list>	xmltable_column_list xmltable_column_option_list
 %type <node>	xmltable_column_el
 %type <defelt>	xmltable_column_option_el
 %type <list>	xml_namespace_list
 %type <target>	xml_namespace_el
+%type <ival> 	opt_xml_serialize_format
 
 %type <node>	func_application func_expr_common_subexpr
 %type <node>	func_expr func_expr_windowless
@@ -709,7 +710,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONDITIONAL CONFIGURATION CONFLICT
@@ -15895,14 +15896,14 @@ func_expr_common_subexpr:
 					$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 									 list_make3($3, $5, $6), @1);
 				}
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename xml_indent_option ')'
+			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')'
 				{
 					XmlSerialize *n = makeNode(XmlSerialize);
 
 					n->xmloption = $3;
 					n->expr = $4;
 					n->typeName = $6;
-					n->indent = $7;
+					n->format = $7;
 					n->location = @1;
 					$$ = (Node *) n;
 				}
@@ -16122,9 +16123,13 @@ document_or_content: DOCUMENT_P						{ $$ = XMLOPTION_DOCUMENT; }
 			| CONTENT_P								{ $$ = XMLOPTION_CONTENT; }
 		;
 
-xml_indent_option: INDENT							{ $$ = true; }
-			| NO INDENT								{ $$ = false; }
-			| /*EMPTY*/								{ $$ = false; }
+opt_xml_serialize_format:
+			INDENT									{ $$ = XMLSERIALIZE_INDENT; }
+			| NO INDENT								{ $$ = XMLSERIALIZE_NO_FORMAT; }
+			| CANONICAL								{ $$ = XMLSERIALIZE_CANONICAL; }
+			| CANONICAL WITH NO COMMENTS			{ $$ = XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS; }
+			| CANONICAL WITH COMMENTS				{ $$ = XMLSERIALIZE_CANONICAL; }
+			| /*EMPTY*/								{ $$ = XMLSERIALIZE_NO_FORMAT; }
 		;
 
 xml_whitespace_option: PRESERVE WHITESPACE_P		{ $$ = true; }
@@ -17521,6 +17526,7 @@ unreserved_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CATALOG_P
@@ -18072,6 +18078,7 @@ bare_label_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CASE
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 56e413da9f..902cc328ef 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2490,7 +2490,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
 	typenameTypeIdAndMod(pstate, xs->typeName, &targetType, &targetTypmod);
 
 	xexpr->xmloption = xs->xmloption;
-	xexpr->indent = xs->indent;
+	xexpr->format = xs->format;
 	xexpr->location = xs->location;
 	/* We actually only need these to be able to parse back the expression. */
 	xexpr->type = targetType;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 447e72b21e..e5ec1b0229 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -58,6 +58,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -653,7 +654,7 @@ xmltotext(PG_FUNCTION_ARGS)
 
 
 text *
-xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
+xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, XmlSerializeFormat format)
 {
 #ifdef USE_LIBXML
 	text	   *volatile result;
@@ -666,7 +667,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	PgXmlErrorContext *xmlerrcxt;
 #endif
 
-	if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
+	if (xmloption_arg != XMLOPTION_DOCUMENT && format == XMLSERIALIZE_NO_FORMAT)
 	{
 		/*
 		 * We don't actually need to do anything, so just return the
@@ -677,10 +678,23 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 #ifdef USE_LIBXML
-	/* Parse the input according to the xmloption */
-	doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
-					&parsed_xmloptiontype, &content_nodes,
-					(Node *) &escontext);
+	/*
+	 * Parse the input according to the xmloption. XML canonical expects
+	 * a well-formed XML input, so here in case of XMLSERIALIZE_CANONICAL
+	 * or XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS we force xml_parse() to parse
+	 * 'data' as XMLOPTION_DOCUMENT despite of the XmlOptionType given in
+	 * 'xmloption_arg'. This enables the canonicalization of CONTENT fragments
+	 * if they contain a singly-rooted XML - xml_parse() will thrown an error
+	 * otherwise.
+	 */
+	if(format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
+		doc = xml_parse(data, XMLOPTION_DOCUMENT, false,
+						GetDatabaseEncoding(), NULL, NULL, NULL);
+	else
+		doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
+						&parsed_xmloptiontype, &content_nodes,
+						(Node *) &escontext);
+
 	if (doc == NULL || escontext.error_occurred)
 	{
 		if (doc)
@@ -692,7 +706,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 	/* If we weren't asked to indent, we're done. */
-	if (!indent)
+	if (format == XMLSERIALIZE_NO_FORMAT)
 	{
 		xmlFreeDoc(doc);
 		return (text *) data;
@@ -701,128 +715,187 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	/* Otherwise, we gotta spin up some error handling. */
 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
-	PG_TRY();
+	if (format == XMLSERIALIZE_INDENT)
 	{
-		size_t		decl_len = 0;
-
-		/* The serialized data will go into this buffer. */
-		buf = xmlBufferCreate();
-
-		if (buf == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlBuffer");
-
-		/* Detect whether there's an XML declaration */
-		parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
-
-		/*
-		 * Emit declaration only if the input had one.  Note: some versions of
-		 * xmlSaveToBuffer leak memory if a non-null encoding argument is
-		 * passed, so don't do that.  We don't want any encoding conversion
-		 * anyway.
-		 */
-		if (decl_len == 0)
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
-		else
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_FORMAT);
-
-		if (ctxt == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlSaveCtxt");
-
-		if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
-		{
-			/* If it's a document, saving is easy. */
-			if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
-				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-							"could not save document to xmlBuffer");
-		}
-		else if (content_nodes != NULL)
+		PG_TRY();
 		{
-			/*
-			 * Deal with the case where we have non-singly-rooted XML.
-			 * libxml's dump functions don't work well for that without help.
-			 * We build a fake root node that serves as a container for the
-			 * content nodes, and then iterate over the nodes.
-			 */
-			xmlNodePtr	root;
-			xmlNodePtr	newline;
+			size_t decl_len = 0;
 
-			root = xmlNewNode(NULL, (const xmlChar *) "content-root");
-			if (root == NULL || xmlerrcxt->err_occurred)
+			/* The serialized data will go into this buffer. */
+			buf = xmlBufferCreate();
+
+			if (buf == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlBuffer");
 
-			/* This attaches root to doc, so we need not free it separately. */
-			xmlDocSetRootElement(doc, root);
-			xmlAddChildList(root, content_nodes);
+			/* Detect whether there's an XML declaration */
+			parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
 
 			/*
-			 * We use this node to insert newlines in the dump.  Note: in at
-			 * least some libxml versions, xmlNewDocText would not attach the
-			 * node to the document even if we passed it.  Therefore, manage
-			 * freeing of this node manually, and pass NULL here to make sure
-			 * there's not a dangling link.
+			 * Emit declaration only if the input had one.  Note: some versions of
+			 * xmlSaveToBuffer leak memory if a non-null encoding argument is
+			 * passed, so don't do that.  We don't want any encoding conversion
+			 * anyway.
 			 */
-			newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
-			if (newline == NULL || xmlerrcxt->err_occurred)
+			if (decl_len == 0)
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									   XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
+			else
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									   XML_SAVE_FORMAT);
+
+			if (ctxt == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlSaveCtxt");
 
-			for (xmlNodePtr node = root->children; node; node = node->next)
+			if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
 			{
-				/* insert newlines between nodes */
-				if (node->type != XML_TEXT_NODE && node->prev != NULL)
+				/* If it's a document, saving is easy. */
+				if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+								"could not save document to xmlBuffer");
+			}
+			else if (content_nodes != NULL)
+			{
+				/*
+				 * Deal with the case where we have non-singly-rooted XML.
+				 * libxml's dump functions don't work well for that without help.
+				 * We build a fake root node that serves as a container for the
+				 * content nodes, and then iterate over the nodes.
+				 */
+				xmlNodePtr root;
+				xmlNodePtr newline;
+
+				root = xmlNewNode(NULL, (const xmlChar *)"content-root");
+				if (root == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				/* This attaches root to doc, so we need not free it separately. */
+				xmlDocSetRootElement(doc, root);
+				xmlAddChildList(root, content_nodes);
+
+				/*
+				 * We use this node to insert newlines in the dump.  Note: in at
+				 * least some libxml versions, xmlNewDocText would not attach the
+				 * node to the document even if we passed it.  Therefore, manage
+				 * freeing of this node manually, and pass NULL here to make sure
+				 * there's not a dangling link.
+				 */
+				newline = xmlNewDocText(NULL, (const xmlChar *)"\n");
+				if (newline == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				for (xmlNodePtr node = root->children; node; node = node->next)
 				{
-					if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+					/* insert newlines between nodes */
+					if (node->type != XML_TEXT_NODE && node->prev != NULL)
+					{
+						if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+						{
+							xmlFreeNode(newline);
+							xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+										"could not save newline to xmlBuffer");
+						}
+					}
+
+					if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
 					{
 						xmlFreeNode(newline);
 						xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-									"could not save newline to xmlBuffer");
+									"could not save content to xmlBuffer");
 					}
 				}
 
-				if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
-				{
-					xmlFreeNode(newline);
-					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-								"could not save content to xmlBuffer");
-				}
+				xmlFreeNode(newline);
 			}
 
-			xmlFreeNode(newline);
-		}
+			if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			{
+				ctxt = NULL; /* don't try to close it again */
+				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+							"could not close xmlSaveCtxtPtr");
+			}
 
-		if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			result = (text *)xmlBuffer_to_xmltype(buf);
+		}
+		PG_CATCH();
 		{
-			ctxt = NULL;		/* don't try to close it again */
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-						"could not close xmlSaveCtxtPtr");
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (buf)
+				xmlBufferFree(buf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
 		}
+		PG_END_TRY();
+
+		xmlBufferFree(buf);
+		xmlFreeDoc(doc);
 
-		result = (text *) xmlBuffer_to_xmltype(buf);
+		pg_xml_done(xmlerrcxt, false);
 	}
-	PG_CATCH();
+	else if (format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
 	{
-		if (ctxt)
-			xmlSaveClose(ctxt);
-		if (buf)
-			xmlBufferFree(buf);
-		if (doc)
-			xmlFreeDoc(doc);
+		xmlChar    *xmlbuf = NULL;
+		int         nbytes;
+		int         with_comments = 0; /* 0 = no xml comments (default) */
 
-		pg_xml_done(xmlerrcxt, true);
+		PG_TRY();
+		{
+			/* 1 = keeps xml comments */
+			if (format == XMLSERIALIZE_CANONICAL)
+				with_comments = 1;
 
-		PG_RE_THROW();
-	}
-	PG_END_TRY();
+		if (doc == NULL || escontext.error_occurred)
+		{
+			if (doc)
+				xmlFreeDoc(doc);
+			/* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
+			ereport(ERROR,
+					(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
+					errmsg("not an XML document")));
+		}
 
-	xmlBufferFree(buf);
-	xmlFreeDoc(doc);
+		/*
+		 * This dumps the canonicalized XML doc into the xmlChar* buffer.
+		 * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
+		 */
+		nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, with_comments, &xmlbuf);
+		if(nbytes < 0 || escontext.error_occurred)
+				ereport(ERROR,
+						(errcode(ERRCODE_INTERNAL_ERROR),
+						errmsg("could not canonicalize the given XML document")));
 
-	pg_xml_done(xmlerrcxt, false);
+			result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
+		}
+		PG_CATCH();
+		{
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (xmlbuf)
+				xmlFree(xmlbuf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
+
+		xmlFreeDoc(doc);
+		xmlFree(xmlbuf);
+
+		pg_xml_done(xmlerrcxt, false);
+	}
+	else
+		elog(ERROR,"invalid xmlserialize option");
 
 	return result;
 #else
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 124d853e49..91aaf26e36 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -845,7 +845,7 @@ typedef struct XmlSerialize
 	XmlOptionType xmloption;	/* DOCUMENT or CONTENT */
 	Node	   *expr;
 	TypeName   *typeName;
-	bool		indent;			/* [NO] INDENT */
+	XmlSerializeFormat	format;	/* serialization format */
 	ParseLoc	location;		/* token location, or -1 if unknown */
 } XmlSerialize;
 
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index ea47652adb..f2c43e4f3a 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1593,6 +1593,14 @@ typedef enum XmlOptionType
 	XMLOPTION_CONTENT,
 } XmlOptionType;
 
+typedef enum XmlSerializeFormat
+{
+	XMLSERIALIZE_INDENT,                       /* pretty-printed xml serialization  */
+	XMLSERIALIZE_CANONICAL,                    /* canonical form with xml comments */
+	XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS,   /* canonical form without xml comments */
+	XMLSERIALIZE_NO_FORMAT                     /* unformatted xml representation */
+} XmlSerializeFormat;
+
 typedef struct XmlExpr
 {
 	Expr		xpr;
@@ -1608,13 +1616,13 @@ typedef struct XmlExpr
 	List	   *args;
 	/* DOCUMENT or CONTENT */
 	XmlOptionType xmloption pg_node_attr(query_jumble_ignore);
-	/* INDENT option for XMLSERIALIZE */
-	bool		indent;
 	/* target type/typmod for XMLSERIALIZE */
 	Oid			type pg_node_attr(query_jumble_ignore);
 	int32		typmod pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	ParseLoc	location;
+	/* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */
+	XmlSerializeFormat format pg_node_attr(query_jumble_ignore);
 } XmlExpr;
 
 /*
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index f8659078ce..132bd3a82b 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -68,6 +68,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index ed20e21375..05f11c0517 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -78,7 +78,7 @@ extern xmltype *xmlpi(const char *target, text *arg, bool arg_is_null, bool *res
 extern xmltype *xmlroot(xmltype *data, text *version, int standalone);
 extern bool xml_is_document(xmltype *arg);
 extern text *xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg,
-									bool indent);
+									XmlSerializeFormat format);
 extern char *escape_xml(const char *str);
 
 extern char *map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, bool escape_period);
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 93a79cda8f..256fb2b90f 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -663,6 +663,121 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                                                                                                                                                                                                                                                  xmlserialize                                                                                                                                                                                                                                                                                                                                                                                                                  
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside doc -->
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+-- this tests that the flag WITH COMMENTS is the default behaviour
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 9323b84ae2..f7533c0ee2 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -443,6 +443,115 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val><...
                                      ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+ERROR:  unsupported XML feature
+LINE 2:   (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+             ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+-- this tests that the flag WITH COMMENTS is the default behaviour
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 ERROR:  unsupported XML feature
 LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index f956322c69..bbf6615219 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -649,6 +649,121 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                                                                                                                                                                                                                                                  xmlserialize                                                                                                                                                                                                                                                                                                                                                                                                                  
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside doc -->
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+-- this tests that the flag WITH COMMENTS is the default behaviour
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 953bac09e4..db7dd247f0 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -168,6 +168,72 @@ SELECT xmlserialize(CONTENT  '<foo><bar></bar></foo>' AS text INDENT);
 -- 'no indent' = not using 'no indent'
 SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
 SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+
+-- this tests that the flag WITH COMMENTS is the default behaviour
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+\set VERBOSITY default
 
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 9e951a9e6f..60ca567c0d 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3237,6 +3237,7 @@ XmlExpr
 XmlExprOp
 XmlOptionType
 XmlSerialize
+XmlSerializeFormat
 XmlTableBuilderData
 YYLTYPE
 YYSTYPE
-- 
2.34.1

#34

Oliver Ford

ojford@gmail.com

over 1 year ago

In reply to: Jim Jones (#33)

Re: [PoC] Add CANONICAL option to xmlserialize

The following review has been posted through the commitfest application:
make installcheck-world: tested, failed
Implements feature: tested, failed
Spec compliant: tested, failed
Documentation: tested, failed

LGTM

The new status of this patch is: Ready for Committer

#35

Laurenz Albe

laurenz.albe@cybertec.at

over 1 year ago

In reply to: Oliver Ford (#34)

Re: [PoC] Add CANONICAL option to xmlserialize

On Sun, 2024-09-08 at 11:43 +0000, Oliver Ford wrote:

The following review has been posted through the commitfest application:
make installcheck-world: tested, failed
Implements feature: tested, failed
Spec compliant: tested, failed
Documentation: tested, failed

LGTM

The new status of this patch is: Ready for Committer

Huh? Do you mean "tested, passes"?

Yours,
Laurenz Albe

#36

Oliver Ford

ojford@gmail.com

over 1 year ago

In reply to: Laurenz Albe (#35)

Re: [PoC] Add CANONICAL option to xmlserialize

On Sun, Sep 8, 2024 at 2:44 PM Laurenz Albe <laurenz.albe@cybertec.at> wrote:

On Sun, 2024-09-08 at 11:43 +0000, Oliver Ford wrote:

The following review has been posted through the commitfest application:
make installcheck-world: tested, failed
Implements feature: tested, failed
Spec compliant: tested, failed
Documentation: tested, failed

LGTM

The new status of this patch is: Ready for Committer

Huh? Do you mean "tested, passes"?

Yours,
Laurenz Albe

Whoops, yes all tests and docs pass!

#37

Jim Jones

jim.jones@uni-muenster.de

over 1 year ago

In reply to: Oliver Ford (#36)

Re: [PoC] Add CANONICAL option to xmlserialize

Hi Oliver

On 08.09.24 15:56, Oliver Ford wrote:

Whoops, yes all tests and docs pass!

Thanks for the review!

Best, Jim

#38

Tom Lane

tgl@sss.pgh.pa.us

over 1 year ago

In reply to: Jim Jones (#33)

Re: [PATCH] Add CANONICAL option to xmlserialize

Jim Jones <jim.jones@uni-muenster.de> writes:

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output.

While I don't object to providing this functionality in some form,
I think that doing it with this specific syntax is a seriously
bad idea. I think there's significant risk that at some point
the SQL committee will either standardize this syntax with a
somewhat different meaning or standardize some other syntax for
the same functionality.

How about instead introducing a plain function along the lines of
"xml_canonicalize(xml, bool keep_comments) returns text" ? The SQL
committee will certainly never do that, but we won't regret having
created a plain function whenever they get around to doing something
in the same space.

regards, tom lane

#39

Jim Jones

jim.jones@uni-muenster.de

over 1 year ago

In reply to: Tom Lane (#38)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

On 10.09.24 19:43, Tom Lane wrote:

How about instead introducing a plain function along the lines of
"xml_canonicalize(xml, bool keep_comments) returns text" ? The SQL
committee will certainly never do that, but we won't regret having
created a plain function whenever they get around to doing something
in the same space.

A second function to serialize xml documents may sound a bit redundant,
but I totally understand the concern of possibly conflicting with
SQL/XMl spec in the feature. I guess we can always come back here and
extend xmlserialize when the SQL committee moves in this direction.

v14 attached adds the function xmlcanonicalize, as suggested.

Thanks

--
Jim

Attachments:

v14-0001-Add-xmlcanonicalize-function.patchtext/x-patch; charset=UTF-8; name=v14-0001-Add-xmlcanonicalize-function.patchDownload

From 08850417c9f0e1504a5e0cfbbd815c3a7aaaf7e8 Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Thu, 12 Sep 2024 12:23:34 +0200
Subject: [PATCH v14] Add xmlcanonicalize function

This patch introduces the function xmlcanonicalize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification.

xmlcanonicalize(doc xml, keep_comments boolean) -> xml

doc: the XML document to be canonicalized
keep_comments: keeps or removes xml comments from doc

This feature is based on the function xmlC14NDocDumpMemory from the
C14N module of libxml2.
---
 doc/src/sgml/func.sgml              | 48 ++++++++++++++++++++
 src/backend/utils/adt/xml.c         | 40 +++++++++++++++++
 src/include/catalog/pg_proc.dat     |  3 ++
 src/test/regress/expected/xml.out   | 70 +++++++++++++++++++++++++++++
 src/test/regress/expected/xml_1.out | 69 ++++++++++++++++++++++++++++
 src/test/regress/expected/xml_2.out | 70 +++++++++++++++++++++++++++++
 src/test/regress/sql/xml.sql        | 49 ++++++++++++++++++++
 7 files changed, 349 insertions(+)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 1bde4091ca..f63787d633 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -14325,6 +14325,54 @@ SELECT xmltext('< foo & bar >');
     </para>
    </sect3>
 
+<sect3 id="functions-producing-xml-xmlcanonicalize">
+    <title><literal>xmlcanonicalize</literal></title>
+
+    <indexterm>
+     <primary>xmlcanonicalize</primary>
+    </indexterm>
+
+<synopsis>
+<function>xmlcanonicalize</function> ( <parameter>doc</parameter> <type>xml</type>, <parameter>keep_comments</parameter> <type>boolean</type> ) <returnvalue>xml</returnvalue>
+</synopsis>
+
+    <para>
+     This function converts a given XML document into its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>
+     based on the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>.
+     It is basically designed to provide applications the ability to compare xml documents or test if they
+     have been changed. The parameter <parameter>keep_comments</parameter>, specifies if the XML comments from the given document should be kept or not.
+    </para>
+
+    <para>
+     Example:
+<screen><![CDATA[
+SELECT
+  xmlcanonicalize(
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml, true);
+                               xmlcanonicalize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlcanonicalize(
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml, false);
+                      xmlcanonicalize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+]]></screen>
+    </para>
+   </sect3>
+
    <sect3 id="functions-producing-xml-xmlcomment">
     <title><literal>xmlcomment</literal></title>
 
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 1a07876cd5..3d9ca2e040 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -58,6 +58,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -545,6 +546,45 @@ xmltext(PG_FUNCTION_ARGS)
 }
 
 
+Datum
+xmlcanonicalize(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+	xmltype    *arg = PG_GETARG_XML_P(0);
+	bool		keep_comments = PG_GETARG_BOOL(1);
+	text	   *result;
+	int			nbytes;
+	xmlDocPtr	doc;
+	xmlChar    *xmlbuf = NULL;
+
+	doc = xml_parse(arg, XMLOPTION_DOCUMENT, false,
+					GetDatabaseEncoding(), NULL, NULL, NULL);
+
+	/*
+	 * This dumps the canonicalized XML doc into the xmlChar* buffer.
+	 * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
+	 */
+	nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, keep_comments, &xmlbuf);
+
+	if(doc)
+		xmlFreeDoc(doc);
+
+	if(nbytes < 0)
+		ereport(ERROR,
+			(errcode(ERRCODE_INTERNAL_ERROR),
+			errmsg("could not canonicalize the given XML document")));
+
+	result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
+
+	xmlFree(xmlbuf);
+
+	PG_RETURN_XML_P(result);
+#else
+	NO_XML_SUPPORT();
+	return 0;
+#endif							/* not USE_LIBXML */
+}
+
 /*
  * TODO: xmlconcat needs to merge the notations and unparsed entities
  * of the argument values.  Not very important in practice, though.
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index ff5436acac..1a177647ef 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -8905,6 +8905,9 @@
 { oid => '3813', descr => 'generate XML text node',
   proname => 'xmltext', prorettype => 'xml', proargtypes => 'text',
   prosrc => 'xmltext' },
+{ oid => '3814', descr => 'generate the canonical form of an XML document',
+  proname => 'xmlcanonicalize', prorettype => 'xml', proargtypes => 'xml bool',
+  prosrc => 'xmlcanonicalize' },
 
 { oid => '2923', descr => 'map table contents to XML',
   proname => 'table_to_xml', procost => '100', provolatile => 's',
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 361a6f9b27..4994f31778 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -1866,3 +1866,73 @@ SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
  x&lt;P&gt;73&lt;/P&gt;0.42truej
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+                                                                                                                                                                                                                                                                                                                                                                                            xmlcanonicalize                                                                                                                                                                                                                                                                                                                                                                                             
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside root element -->
+(1 row)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+                                                                                                                                                                                   xmlcanonicalize                                                                                                                                                                                    
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo', true);
+ERROR:  invalid XML document
+\set VERBOSITY default
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index d26e10441e..640db2c05c 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -1477,3 +1477,72 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j':...
                              ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+ERROR:  unsupported XML feature
+LINE 2:   ('<?xml version="1.0" encoding="ISO-8859-1"?>
+           ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('  ', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('foo', true);
+ERROR:  unsupported XML feature at character 24
+\set VERBOSITY default
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index 73c2851d3f..89835430d5 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -1852,3 +1852,73 @@ SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
  x&lt;P&gt;73&lt;/P&gt;0.42truej
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+                                                                                                                                                                                                                                                                                                                                                                                            xmlcanonicalize                                                                                                                                                                                                                                                                                                                                                                                             
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside root element -->
+(1 row)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+                                                                                                                                                                                   xmlcanonicalize                                                                                                                                                                                    
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo', true);
+ERROR:  invalid XML document
+\set VERBOSITY default
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index f752ecb142..02ea3834ab 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -673,3 +673,52 @@ SELECT xmltext('  ');
 SELECT xmltext('foo `$_-+?=*^%!|/\()[]{}');
 SELECT xmltext('foo & <"bar">');
 SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
+
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(NULL, true);
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+SELECT xmlcanonicalize('  ', true);
+SELECT xmlcanonicalize('foo', true);
+\set VERBOSITY default
\ No newline at end of file
-- 
2.34.1

#40

Jim Jones

jim.jones@uni-muenster.de

11 months ago

In reply to: Jim Jones (#39)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

On 12.09.24 12:56, Jim Jones wrote:

v14 attached adds the function xmlcanonicalize, as suggested.

rebase.

Best regards, Jim

Attachments:

v15-0001-Add-xmlcanonicalize-function.patchtext/x-patch; charset=UTF-8; name=v15-0001-Add-xmlcanonicalize-function.patchDownload

From 2121e54145eae40c8a6c172038ca3a4d07f9b78a Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Mon, 17 Feb 2025 12:05:34 +0100
Subject: [PATCH v15] Add xmlcanonicalize function

This patch adds the xmlcanonicalize function, which transforms an
XML document into its canonical form according to the W3C Canonical
XML Version 1.1 specification.

xmlcanonicalize(doc xml, keep_comments boolean) -> xml

* doc: The XML document to be canonicalized.
* keep_comments: A flag indicating whether to preserve or discard
  XML comments from the input document.

This implementation is based on the xmlC14NDocDumpMemory function
from the C14N module of libxml2.
---
 doc/src/sgml/func.sgml              | 48 ++++++++++++++++++++
 src/backend/utils/adt/xml.c         | 43 ++++++++++++++++++
 src/include/catalog/pg_proc.dat     |  3 ++
 src/test/regress/expected/xml.out   | 70 +++++++++++++++++++++++++++++
 src/test/regress/expected/xml_1.out | 69 ++++++++++++++++++++++++++++
 src/test/regress/expected/xml_2.out | 70 +++++++++++++++++++++++++++++
 src/test/regress/sql/xml.sql        | 49 ++++++++++++++++++++
 7 files changed, 352 insertions(+)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 7efc81936a..bba9b7591d 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -14436,6 +14436,54 @@ SELECT xmltext('< foo & bar >');
     </para>
    </sect3>
 
+<sect3 id="functions-producing-xml-xmlcanonicalize">
+    <title><literal>xmlcanonicalize</literal></title>
+
+    <indexterm>
+     <primary>xmlcanonicalize</primary>
+    </indexterm>
+
+<synopsis>
+<function>xmlcanonicalize</function> ( <parameter>doc</parameter> <type>xml</type>, <parameter>keep_comments</parameter> <type>boolean</type> ) <returnvalue>xml</returnvalue>
+</synopsis>
+
+    <para>
+     This function transforms a given XML document into its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>,
+     as defined by the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>, which standardizes the document's
+     structure and syntax to facilitate comparison and validation.
+     The <parameter>keep_comments</parameter> parameter controls whether XML comments from the input document are preserved or discarded.
+    </para>
+
+    <para>
+     Example:
+<screen><![CDATA[
+SELECT
+  xmlcanonicalize(
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml, true);
+                               xmlcanonicalize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlcanonicalize(
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml, false);
+                      xmlcanonicalize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+]]></screen>
+    </para>
+   </sect3>
+
    <sect3 id="functions-producing-xml-xmlcomment">
     <title><literal>xmlcomment</literal></title>
 
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index db8d0d6a7e..fb956710b8 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -58,6 +58,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -544,6 +545,48 @@ xmltext(PG_FUNCTION_ARGS)
 #endif							/* not USE_LIBXML */
 }
 
+/**
+ * Converts an XML document to its canonical form according to the
+ * W3C Canonical XML 1.1 specification implemented on xmlC14NDocDumpMemory.
+ */
+Datum
+xmlcanonicalize(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+	xmltype    *arg = PG_GETARG_XML_P(0);
+	bool		keep_comments = PG_GETARG_BOOL(1);
+	text	   *result;
+	int			nbytes;
+	xmlDocPtr	doc;
+	xmlChar    *xmlbuf = NULL;
+
+	doc = xml_parse(arg, XMLOPTION_DOCUMENT, false,
+					GetDatabaseEncoding(), NULL, NULL, NULL);
+
+	/*
+	 * This dumps the canonicalized XML doc into the xmlChar* buffer.
+	 * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
+	 */
+	nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, keep_comments, &xmlbuf);
+
+	if(doc)
+		xmlFreeDoc(doc);
+
+	if(nbytes < 0)
+		ereport(ERROR,
+			(errcode(ERRCODE_INTERNAL_ERROR),
+			errmsg("could not canonicalize the given XML document")));
+
+	result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
+
+	xmlFree(xmlbuf);
+
+	PG_RETURN_XML_P(result);
+#else
+	NO_XML_SUPPORT();
+	return 0;
+#endif							/* not USE_LIBXML */
+}
 
 /*
  * TODO: xmlconcat needs to merge the notations and unparsed entities
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 9e803d610d..5989d2936b 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9042,6 +9042,9 @@
 { oid => '3813', descr => 'generate XML text node',
   proname => 'xmltext', prorettype => 'xml', proargtypes => 'text',
   prosrc => 'xmltext' },
+{ oid => '3814', descr => 'generate the canonical form of an XML document',
+  proname => 'xmlcanonicalize', prorettype => 'xml', proargtypes => 'xml bool',
+  prosrc => 'xmlcanonicalize' },
 
 { oid => '2923', descr => 'map table contents to XML',
   proname => 'table_to_xml', procost => '100', provolatile => 's',
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 2e9616acda..af17094cca 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -1873,3 +1873,73 @@ SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
  x&lt;P&gt;73&lt;/P&gt;0.42truej
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+                                                                                                                                                                                                                                                                                                                                                                                            xmlcanonicalize                                                                                                                                                                                                                                                                                                                                                                                             
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside root element -->
+(1 row)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+                                                                                                                                                                                   xmlcanonicalize                                                                                                                                                                                    
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo', true);
+ERROR:  invalid XML document
+\set VERBOSITY default
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 7505a14077..a7f10a5036 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -1482,3 +1482,72 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j':...
                              ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+ERROR:  unsupported XML feature
+LINE 2:   ('<?xml version="1.0" encoding="ISO-8859-1"?>
+           ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('  ', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('foo', true);
+ERROR:  unsupported XML feature at character 24
+\set VERBOSITY default
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index c07ed2b269..425e28c528 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -1859,3 +1859,73 @@ SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
  x&lt;P&gt;73&lt;/P&gt;0.42truej
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+                                                                                                                                                                                                                                                                                                                                                                                            xmlcanonicalize                                                                                                                                                                                                                                                                                                                                                                                             
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside root element -->
+(1 row)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+                                                                                                                                                                                   xmlcanonicalize                                                                                                                                                                                    
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo', true);
+ERROR:  invalid XML document
+\set VERBOSITY default
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index bac0388ac1..ec04d1d56d 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -675,3 +675,52 @@ SELECT xmltext('  ');
 SELECT xmltext('foo `$_-+?=*^%!|/\()[]{}');
 SELECT xmltext('foo & <"bar">');
 SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
+
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(NULL, true);
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+SELECT xmlcanonicalize('  ', true);
+SELECT xmlcanonicalize('foo', true);
+\set VERBOSITY default
\ No newline at end of file
-- 
2.34.1

#41

newtglobal postgresql_contributors

postgresql_contributors@newtglobalcorp.com

10 months ago

In reply to: Jim Jones (#40)

Re: [PoC] Add CANONICAL option to xmlserialize

Hi Jim,
I tested the function with dynamically generated XML using `XMLELEMENT`, `XMLATTRIBUTES`, and `XMLCOMMENT`.

Example:

SELECT XMLCANONICALIZE(XMLELEMENT(
NAME employee,
XMLCOMMENT('Employee details start'),
XMLELEMENT(NAME id, 101),
XMLELEMENT(NAME name, 'Mahesh'),
XMLELEMENT(NAME department, 'Engineering'),
XMLCOMMENT('Employee details end')
), true);

The function correctly handled comments and structured XML elements as expected.

Testing with a Table Containing Employee Data
Created a table and inserted sample employee records to verify XML generation.

Table Creation and Data Insertion:

CREATE TABLE employees (id INT, name TEXT, department TEXT);
INSERT INTO employees VALUES (1, 'Alice', 'HR'), (2, 'Bob', 'IT');

Canonicalizing Employee XML Data:

SELECT XMLCANONICALIZE(XMLELEMENT(
NAME employee,
XMLATTRIBUTES(id AS id),
XMLCOMMENT('Employee details start'),
XMLELEMENT(NAME name, name),
XMLELEMENT(NAME department, department),
XMLCOMMENT('Employee details end')
), true)
FROM employees;

The patch correctly processes XML elements and attributes from database records.

Testing with Dynamic Comments
Added a column to store dynamic comments and verified if `XMLCANONICALIZE` handles them properly.

Modifications:
ALTER TABLE employees ADD COLUMN comment TEXT;
UPDATE employees SET comment = 'Employee details for ' || name;

Verification Query:

SELECT XMLCANONICALIZE(XMLELEMENT(
NAME employee,
XMLATTRIBUTES(id AS id),
XMLCOMMENT(comment),
XMLELEMENT(NAME name, name),
XMLELEMENT(NAME department, department)
), true)
FROM employees;

Dynamic comments were correctly included in the XML output.

Testing with Manual Newlines
Inserted manual newlines to check the function's behavior with formatted XML.

SELECT XMLCANONICALIZE(
XMLELEMENT(
NAME employee,
XMLATTRIBUTES(id AS id),
XMLCOMMENT(comment),
E'\n ',
XMLELEMENT(NAME name, name),
E'\n ',
XMLELEMENT(NAME department, department),
E'\n'
),
true
)
FROM employees;

Whitespace and newlines were correctly handled in XML output.

After testing various scenarios, I found that `XMLCANONICALIZE` is working as expected. It:
- Removes unnecessary whitespace and newlines.
- Sorts attributes in a consistent order.
- Converts empty elements to start-end tag pairs.
- Preserves or removes comments based on the flag.
- Works well with table data and dynamically generated XML.

Regards,
NewtGlobal PostgreSQL contributors

#42

Jim Jones

jim.jones@uni-muenster.de

10 months ago

In reply to: newtglobal postgresql_contributors (#41)

Re: [PoC] Add CANONICAL option to xmlserialize

On 12.03.25 08:21, newtglobal postgresql_contributors wrote:

The following review has been posted through the commitfest application:
make installcheck-world: tested, failed
Implements feature: tested, failed
Spec compliant: tested, failed
Documentation: tested, failed

Did it really fail?

After testing various scenarios, I found that `XMLCANONICALIZE` is working as expected. It:
- Removes unnecessary whitespace and newlines.
- Sorts attributes in a consistent order.
- Converts empty elements to start-end tag pairs.
- Preserves or removes comments based on the flag.
- Works well with table data and dynamically generated XML.

Thanks a lot for the thorough review. Much appreciated!

Best regards, Jim

#43

Jim Jones

jim.jones@uni-muenster.de

10 months ago

In reply to: Jim Jones (#40)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

While reviewing another patch, I noticed that the keep_comments argument
was missing a default value. To allow calling the function with only the
XML argument, I've added DEFAULT true to keep_comments.

SELECT xmlcanonicalize(xmldoc);

Updated patch (v16) attached.

Best, Jim

Attachments:

v16-0001-Add-xmlcanonicalize-function.patchtext/x-patch; charset=UTF-8; name=v16-0001-Add-xmlcanonicalize-function.patchDownload

From 28f7c62de548468c8fb15ba95d4a68e2fdf0528e Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Fri, 21 Mar 2025 09:46:49 +0100
Subject: [PATCH v16] Add xmlcanonicalize function

This patch adds the xmlcanonicalize function, which transforms an
XML document into its canonical form according to the W3C Canonical
XML Version 1.1 specification.

xmlcanonicalize(doc xml, keep_comments boolean DEFAULT true) -> xml

* doc: The XML document to be canonicalized.
* keep_comments: A flag indicating whether to preserve or discard
  XML comments from the input document. If omitted, it defaults to
'true'.

This implementation is based on the xmlC14NDocDumpMemory function
from the C14N module of libxml2.
---
 doc/src/sgml/func.sgml                   | 50 +++++++++++++++
 src/backend/catalog/system_functions.sql |  6 ++
 src/backend/utils/adt/xml.c              | 43 +++++++++++++
 src/include/catalog/pg_proc.dat          |  3 +
 src/test/regress/expected/xml.out        | 82 ++++++++++++++++++++++++
 src/test/regress/expected/xml_1.out      | 80 +++++++++++++++++++++++
 src/test/regress/expected/xml_2.out      | 82 ++++++++++++++++++++++++
 src/test/regress/sql/xml.sql             | 52 +++++++++++++++
 8 files changed, 398 insertions(+)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 6fa1d6586b..7635112854 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -14585,6 +14585,56 @@ SELECT xmltext('< foo & bar >');
     </para>
    </sect3>
 
+<sect3 id="functions-producing-xml-xmlcanonicalize">
+    <title><literal>xmlcanonicalize</literal></title>
+
+    <indexterm>
+     <primary>xmlcanonicalize</primary>
+    </indexterm>
+
+<synopsis>
+<function>xmlcanonicalize</function> ( <parameter>doc</parameter> <type>xml</type> [, <parameter>keep_comments</parameter> <type>boolean</type> DEFAULT <literal>true</literal>] ) <returnvalue>xml</returnvalue>
+
+</synopsis>
+
+    <para>
+     This function transforms a given XML document into its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>,
+     as defined by the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>, which standardizes the document's
+     structure and syntax to facilitate comparison and validation.
+     The <parameter>keep_comments</parameter> parameter controls whether XML comments from the input document are preserved or discarded.
+     If omitted, it defaults to <literal>true</literal>.
+    </para>
+
+    <para>
+     Example:
+<screen><![CDATA[
+SELECT
+  xmlcanonicalize(
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml);
+                               xmlcanonicalize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlcanonicalize(
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml, false);
+                      xmlcanonicalize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+]]></screen>
+    </para>
+   </sect3>
+
    <sect3 id="functions-producing-xml-xmlcomment">
     <title><literal>xmlcomment</literal></title>
 
diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql
index 566f308e44..15c33335dc 100644
--- a/src/backend/catalog/system_functions.sql
+++ b/src/backend/catalog/system_functions.sql
@@ -268,6 +268,12 @@ CREATE OR REPLACE FUNCTION xpath_exists(text, xml)
  IMMUTABLE PARALLEL SAFE STRICT COST 1
 RETURN xpath_exists($1, $2, '{}'::text[]);
 
+CREATE OR REPLACE FUNCTION xmlcanonicalize(xml, boolean DEFAULT true)
+ RETURNS xml
+ LANGUAGE internal
+ IMMUTABLE PARALLEL SAFE STRICT
+AS 'xmlcanonicalize';
+
 CREATE OR REPLACE FUNCTION pg_sleep_for(interval)
  RETURNS void
  LANGUAGE sql
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index db8d0d6a7e..fb956710b8 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -58,6 +58,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -544,6 +545,48 @@ xmltext(PG_FUNCTION_ARGS)
 #endif							/* not USE_LIBXML */
 }
 
+/**
+ * Converts an XML document to its canonical form according to the
+ * W3C Canonical XML 1.1 specification implemented on xmlC14NDocDumpMemory.
+ */
+Datum
+xmlcanonicalize(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+	xmltype    *arg = PG_GETARG_XML_P(0);
+	bool		keep_comments = PG_GETARG_BOOL(1);
+	text	   *result;
+	int			nbytes;
+	xmlDocPtr	doc;
+	xmlChar    *xmlbuf = NULL;
+
+	doc = xml_parse(arg, XMLOPTION_DOCUMENT, false,
+					GetDatabaseEncoding(), NULL, NULL, NULL);
+
+	/*
+	 * This dumps the canonicalized XML doc into the xmlChar* buffer.
+	 * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
+	 */
+	nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, keep_comments, &xmlbuf);
+
+	if(doc)
+		xmlFreeDoc(doc);
+
+	if(nbytes < 0)
+		ereport(ERROR,
+			(errcode(ERRCODE_INTERNAL_ERROR),
+			errmsg("could not canonicalize the given XML document")));
+
+	result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
+
+	xmlFree(xmlbuf);
+
+	PG_RETURN_XML_P(result);
+#else
+	NO_XML_SUPPORT();
+	return 0;
+#endif							/* not USE_LIBXML */
+}
 
 /*
  * TODO: xmlconcat needs to merge the notations and unparsed entities
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 890822eaf7..f279943176 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9071,6 +9071,9 @@
 { oid => '3813', descr => 'generate XML text node',
   proname => 'xmltext', prorettype => 'xml', proargtypes => 'text',
   prosrc => 'xmltext' },
+{ oid => '3814', descr => 'generate the canonical form of an XML document',
+  proname => 'xmlcanonicalize', prorettype => 'xml', proargtypes => 'xml bool',
+  prosrc => 'xmlcanonicalize' },
 
 { oid => '2923', descr => 'map table contents to XML',
   proname => 'table_to_xml', procost => '100', provolatile => 's',
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index bcc743f485..63d0e71259 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -1877,3 +1877,85 @@ SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
  x&lt;P&gt;73&lt;/P&gt;0.42truej
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+                                                                                                                                                                                                                                                                                                                                                                                            xmlcanonicalize                                                                                                                                                                                                                                                                                                                                                                                             
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside root element -->
+(1 row)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+                                                                                                                                                                                   xmlcanonicalize                                                                                                                                                                                    
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo');
+ERROR:  invalid XML document
+\set VERBOSITY default
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index a1c5d31417..cf5dab372d 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -1492,3 +1492,83 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j':...
                              ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+ERROR:  unsupported XML feature
+LINE 2:   ('<?xml version="1.0" encoding="ISO-8859-1"?>
+           ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('  ', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('foo', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('');
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('  ');
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('foo');
+ERROR:  unsupported XML feature at character 24
+\set VERBOSITY default
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index 045641dae6..0735cfd9bc 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -1863,3 +1863,85 @@ SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
  x&lt;P&gt;73&lt;/P&gt;0.42truej
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+                                                                                                                                                                                                                                                                                                                                                                                            xmlcanonicalize                                                                                                                                                                                                                                                                                                                                                                                             
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside root element -->
+(1 row)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+                                                                                                                                                                                   xmlcanonicalize                                                                                                                                                                                    
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo');
+ERROR:  invalid XML document
+\set VERBOSITY default
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 4c3520ce89..c0c5b2a419 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -677,3 +677,55 @@ SELECT xmltext('  ');
 SELECT xmltext('foo `$_-+?=*^%!|/\()[]{}');
 SELECT xmltext('foo & <"bar">');
 SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
+
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(NULL, true);
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+SELECT xmlcanonicalize('  ', true);
+SELECT xmlcanonicalize('foo', true);
+SELECT xmlcanonicalize('');
+SELECT xmlcanonicalize('  ');
+SELECT xmlcanonicalize('foo');
+\set VERBOSITY default
\ No newline at end of file
-- 
2.34.1

#44

Jim Jones

jim.jones@uni-muenster.de

9 months ago

In reply to: Jim Jones (#43)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

v17 attached adds a (missing) reference to C14N 1.1 specification.

Jim

Attachments:

v17-0001-Add-xmlcanonicalize-function.patchtext/x-patch; charset=UTF-8; name=v17-0001-Add-xmlcanonicalize-function.patchDownload

From e5d1b74af7ff17eaa4ef35cdeaa8305fcad6ce6f Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Mon, 7 Apr 2025 14:00:16 +0200
Subject: [PATCH v17] Add xmlcanonicalize function

This patch adds the xmlcanonicalize function, which transforms an
XML document into its canonical form according to the W3C C14N 1.1
specification.

xmlcanonicalize(doc xml, keep_comments boolean DEFAULT true) -> xml

* doc: The XML document to be canonicalized.
* keep_comments: A flag indicating whether to preserve or discard
  XML comments from the input document. If omitted, it defaults to
'true'.

This implementation is based on the xmlC14NDocDumpMemory function
from the C14N module of libxml2.
---
 doc/src/sgml/func.sgml                   | 50 +++++++++++++++
 src/backend/catalog/system_functions.sql |  6 ++
 src/backend/utils/adt/xml.c              | 43 +++++++++++++
 src/include/catalog/pg_proc.dat          |  3 +
 src/test/regress/expected/xml.out        | 82 ++++++++++++++++++++++++
 src/test/regress/expected/xml_1.out      | 80 +++++++++++++++++++++++
 src/test/regress/expected/xml_2.out      | 82 ++++++++++++++++++++++++
 src/test/regress/sql/xml.sql             | 52 +++++++++++++++
 8 files changed, 398 insertions(+)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 0224f93733..d8b857d151 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -14623,6 +14623,56 @@ SELECT xmltext('< foo & bar >');
     </para>
    </sect3>
 
+<sect3 id="functions-producing-xml-xmlcanonicalize">
+    <title><literal>xmlcanonicalize</literal></title>
+
+    <indexterm>
+     <primary>xmlcanonicalize</primary>
+    </indexterm>
+
+<synopsis>
+<function>xmlcanonicalize</function> ( <parameter>doc</parameter> <type>xml</type> [, <parameter>keep_comments</parameter> <type>boolean</type> DEFAULT <literal>true</literal>] ) <returnvalue>xml</returnvalue>
+
+</synopsis>
+
+    <para>
+     This function transforms a given XML document into its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>,
+     as defined by the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>, which standardizes the document's
+     structure and syntax to facilitate comparison and validation.
+     The <parameter>keep_comments</parameter> parameter controls whether XML comments from the input document are preserved or discarded.
+     If omitted, it defaults to <literal>true</literal>.
+    </para>
+
+    <para>
+     Example:
+<screen><![CDATA[
+SELECT
+  xmlcanonicalize(
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml);
+                               xmlcanonicalize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlcanonicalize(
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml, false);
+                      xmlcanonicalize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+]]></screen>
+    </para>
+   </sect3>
+
    <sect3 id="functions-producing-xml-xmlcomment">
     <title><literal>xmlcomment</literal></title>
 
diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql
index 566f308e44..15c33335dc 100644
--- a/src/backend/catalog/system_functions.sql
+++ b/src/backend/catalog/system_functions.sql
@@ -268,6 +268,12 @@ CREATE OR REPLACE FUNCTION xpath_exists(text, xml)
  IMMUTABLE PARALLEL SAFE STRICT COST 1
 RETURN xpath_exists($1, $2, '{}'::text[]);
 
+CREATE OR REPLACE FUNCTION xmlcanonicalize(xml, boolean DEFAULT true)
+ RETURNS xml
+ LANGUAGE internal
+ IMMUTABLE PARALLEL SAFE STRICT
+AS 'xmlcanonicalize';
+
 CREATE OR REPLACE FUNCTION pg_sleep_for(interval)
  RETURNS void
  LANGUAGE sql
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index db8d0d6a7e..fb956710b8 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -58,6 +58,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -544,6 +545,48 @@ xmltext(PG_FUNCTION_ARGS)
 #endif							/* not USE_LIBXML */
 }
 
+/**
+ * Converts an XML document to its canonical form according to the
+ * W3C Canonical XML 1.1 specification implemented on xmlC14NDocDumpMemory.
+ */
+Datum
+xmlcanonicalize(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+	xmltype    *arg = PG_GETARG_XML_P(0);
+	bool		keep_comments = PG_GETARG_BOOL(1);
+	text	   *result;
+	int			nbytes;
+	xmlDocPtr	doc;
+	xmlChar    *xmlbuf = NULL;
+
+	doc = xml_parse(arg, XMLOPTION_DOCUMENT, false,
+					GetDatabaseEncoding(), NULL, NULL, NULL);
+
+	/*
+	 * This dumps the canonicalized XML doc into the xmlChar* buffer.
+	 * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
+	 */
+	nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, keep_comments, &xmlbuf);
+
+	if(doc)
+		xmlFreeDoc(doc);
+
+	if(nbytes < 0)
+		ereport(ERROR,
+			(errcode(ERRCODE_INTERNAL_ERROR),
+			errmsg("could not canonicalize the given XML document")));
+
+	result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
+
+	xmlFree(xmlbuf);
+
+	PG_RETURN_XML_P(result);
+#else
+	NO_XML_SUPPORT();
+	return 0;
+#endif							/* not USE_LIBXML */
+}
 
 /*
  * TODO: xmlconcat needs to merge the notations and unparsed entities
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 5d5be8ba4e..64e27da85a 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9121,6 +9121,9 @@
 { oid => '3813', descr => 'generate XML text node',
   proname => 'xmltext', prorettype => 'xml', proargtypes => 'text',
   prosrc => 'xmltext' },
+{ oid => '3814', descr => 'generate the canonical form of an XML document',
+  proname => 'xmlcanonicalize', prorettype => 'xml', proargtypes => 'xml bool',
+  prosrc => 'xmlcanonicalize' },
 
 { oid => '2923', descr => 'map table contents to XML',
   proname => 'table_to_xml', procost => '100', provolatile => 's',
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index bcc743f485..63d0e71259 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -1877,3 +1877,85 @@ SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
  x&lt;P&gt;73&lt;/P&gt;0.42truej
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+                                                                                                                                                                                                                                                                                                                                                                                            xmlcanonicalize                                                                                                                                                                                                                                                                                                                                                                                             
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside root element -->
+(1 row)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+                                                                                                                                                                                   xmlcanonicalize                                                                                                                                                                                    
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo');
+ERROR:  invalid XML document
+\set VERBOSITY default
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index a1c5d31417..cf5dab372d 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -1492,3 +1492,83 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j':...
                              ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+ERROR:  unsupported XML feature
+LINE 2:   ('<?xml version="1.0" encoding="ISO-8859-1"?>
+           ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('  ', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('foo', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('');
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('  ');
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('foo');
+ERROR:  unsupported XML feature at character 24
+\set VERBOSITY default
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index 045641dae6..0735cfd9bc 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -1863,3 +1863,85 @@ SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
  x&lt;P&gt;73&lt;/P&gt;0.42truej
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+                                                                                                                                                                                                                                                                                                                                                                                            xmlcanonicalize                                                                                                                                                                                                                                                                                                                                                                                             
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside root element -->
+(1 row)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+                                                                                                                                                                                   xmlcanonicalize                                                                                                                                                                                    
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo');
+ERROR:  invalid XML document
+\set VERBOSITY default
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 4c3520ce89..c0c5b2a419 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -677,3 +677,55 @@ SELECT xmltext('  ');
 SELECT xmltext('foo `$_-+?=*^%!|/\()[]{}');
 SELECT xmltext('foo & <"bar">');
 SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
+
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(NULL, true);
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+SELECT xmlcanonicalize('  ', true);
+SELECT xmlcanonicalize('foo', true);
+SELECT xmlcanonicalize('');
+SELECT xmlcanonicalize('  ');
+SELECT xmlcanonicalize('foo');
+\set VERBOSITY default
\ No newline at end of file
-- 
2.34.1

#45

Jim Jones

jim.jones@uni-muenster.de

7 months ago

In reply to: Jim Jones (#44)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

rebase and add an extra check for xmlbuf after the xmlC14NDocDumpMemory
call:

if (nbytes < 0 || xmlbuf == NULL)
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("could not canonicalize XML document")));
--
Jim

Attachments:

v18-0001-Add-xmlcanonicalize-function.patchtext/x-patch; charset=UTF-8; name=v18-0001-Add-xmlcanonicalize-function.patchDownload

From b892f597cbb8c88cdc9db60adb4f407b5bb6c22d Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Tue, 10 Jun 2025 10:46:24 +0200
Subject: [PATCH v18] Add xmlcanonicalize function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds the xmlcanonicalize function, which transforms an
XML document into its canonical form according to the W3C C14N 1.1
specification.

xmlcanonicalize(doc xml, keep_comments boolean DEFAULT true) -> xml

* doc: The XML document to be canonicalized.
* keep_comments: A flag indicating whether to preserve or discard
  XML comments from the input document. If omitted, it defaults to
'true'.

This function is implemented using the xmlC14NDocDumpMemory()
function from libxml2’s Canonical XML (C14N) module.
---
 doc/src/sgml/func.sgml                   | 50 +++++++++++++++
 src/backend/catalog/system_functions.sql |  6 ++
 src/backend/utils/adt/xml.c              | 60 +++++++++++++++++
 src/include/catalog/pg_proc.dat          |  3 +
 src/test/regress/expected/xml.out        | 82 ++++++++++++++++++++++++
 src/test/regress/expected/xml_1.out      | 80 +++++++++++++++++++++++
 src/test/regress/expected/xml_2.out      | 82 ++++++++++++++++++++++++
 src/test/regress/sql/xml.sql             | 52 +++++++++++++++
 8 files changed, 415 insertions(+)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index c67688cbf5..db42d7856e 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -14623,6 +14623,56 @@ SELECT xmltext('< foo & bar >');
     </para>
    </sect3>
 
+<sect3 id="functions-producing-xml-xmlcanonicalize">
+    <title><literal>xmlcanonicalize</literal></title>
+
+    <indexterm>
+     <primary>xmlcanonicalize</primary>
+    </indexterm>
+
+<synopsis>
+<function>xmlcanonicalize</function> ( <parameter>doc</parameter> <type>xml</type> [, <parameter>keep_comments</parameter> <type>boolean</type> DEFAULT <literal>true</literal>] ) <returnvalue>xml</returnvalue>
+
+</synopsis>
+
+    <para>
+     This function transforms a given XML document into its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>,
+     as defined by the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>, which standardizes the document's
+     structure and syntax to facilitate comparison and validation.
+     The <parameter>keep_comments</parameter> parameter controls whether XML comments from the input document are preserved or discarded.
+     If omitted, it defaults to <literal>true</literal>.
+    </para>
+
+    <para>
+     Example:
+<screen><![CDATA[
+SELECT
+  xmlcanonicalize(
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml);
+                               xmlcanonicalize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlcanonicalize(
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml, false);
+                      xmlcanonicalize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+]]></screen>
+    </para>
+   </sect3>
+
    <sect3 id="functions-producing-xml-xmlcomment">
     <title><literal>xmlcomment</literal></title>
 
diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql
index 566f308e44..15c33335dc 100644
--- a/src/backend/catalog/system_functions.sql
+++ b/src/backend/catalog/system_functions.sql
@@ -268,6 +268,12 @@ CREATE OR REPLACE FUNCTION xpath_exists(text, xml)
  IMMUTABLE PARALLEL SAFE STRICT COST 1
 RETURN xpath_exists($1, $2, '{}'::text[]);
 
+CREATE OR REPLACE FUNCTION xmlcanonicalize(xml, boolean DEFAULT true)
+ RETURNS xml
+ LANGUAGE internal
+ IMMUTABLE PARALLEL SAFE STRICT
+AS 'xmlcanonicalize';
+
 CREATE OR REPLACE FUNCTION pg_sleep_for(interval)
  RETURNS void
  LANGUAGE sql
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index a4150bff2e..094eb4c33e 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -58,6 +58,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -544,6 +545,65 @@ xmltext(PG_FUNCTION_ARGS)
 #endif							/* not USE_LIBXML */
 }
 
+/*
+ * Canonicalizes the given XML document according to the W3C Canonical XML 1.1
+ * specification, using libxml2's xmlC14NDocDumpMemory().
+ *
+ * The input XML must be a well-formed document (not a fragment). The
+ * canonical form is deterministic and useful for digital signatures and
+ * comparing logically equivalent XML.
+ *
+ * The second argument determines whether comments are preserved
+ * (true) or omitted (false) in the canonicalized output.
+ */
+Datum xmlcanonicalize(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+	xmltype *arg = PG_GETARG_XML_P(0);
+	bool keep_comments = PG_GETARG_BOOL(1);
+	text *result;
+	int nbytes;
+	xmlDocPtr doc;
+	xmlChar *xmlbuf = NULL;
+
+	/* Parse the input xmltype into a full XML document */
+	doc = xml_parse(arg, XMLOPTION_DOCUMENT, false,
+					GetDatabaseEncoding(), NULL, NULL, NULL);
+
+	/*
+	 * Canonicalize the XML document into memory using Canonical XML 1.1.
+	 *
+	 * xmlC14NDocDumpMemory arguments:
+	 * - doc: the XML document to canonicalize (already parsed above)
+	 * - nodes: NULL means the entire document is canonicalized
+	 * - mode: 2 selects the Canonical XML 1.1 algorithm (xmlC14NMode enum)
+	 * - inclusive_ns_prefixes: NULL includes all namespaces by default
+	 * - with_comments: determined by keep_comments argument
+	 * - doc_txt_ptr: output buffer receiving the canonicalized XML (xmlbuf)
+	 *
+	 * On success, xmlbuf points to the serialized canonical form,
+	 * and nbytes holds its size.
+	 */
+	nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, keep_comments, &xmlbuf);
+
+	if (doc)
+		xmlFreeDoc(doc);
+
+	if (nbytes < 0 || xmlbuf == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INTERNAL_ERROR),
+				 errmsg("could not canonicalize XML document")));
+
+	result = cstring_to_text_with_len((const char *)xmlbuf, nbytes);
+
+	xmlFree(xmlbuf);
+
+	PG_RETURN_XML_P(result);
+#else
+	NO_XML_SUPPORT();
+	return 0;
+#endif /* not USE_LIBXML */
+}
 
 /*
  * TODO: xmlconcat needs to merge the notations and unparsed entities
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index d3d28a263f..d6c5b80a8f 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9133,6 +9133,9 @@
 { oid => '3813', descr => 'generate XML text node',
   proname => 'xmltext', prorettype => 'xml', proargtypes => 'text',
   prosrc => 'xmltext' },
+{ oid => '3814', descr => 'generate the canonical form of an XML document',
+  proname => 'xmlcanonicalize', prorettype => 'xml', proargtypes => 'xml bool',
+  prosrc => 'xmlcanonicalize' },
 
 { oid => '2923', descr => 'map table contents to XML',
   proname => 'table_to_xml', procost => '100', provolatile => 's',
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 103a22a3b1..688c0fc3e9 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -1881,3 +1881,85 @@ SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
  x&lt;P&gt;73&lt;/P&gt;0.42truej
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+                                                                                                                                                                                                                                                                                                                                                                                            xmlcanonicalize                                                                                                                                                                                                                                                                                                                                                                                             
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside root element -->
+(1 row)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+                                                                                                                                                                                   xmlcanonicalize                                                                                                                                                                                    
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo');
+ERROR:  invalid XML document
+\set VERBOSITY default
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 73c411118a..8bc3ac1c96 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -1496,3 +1496,83 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j':...
                              ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+ERROR:  unsupported XML feature
+LINE 2:   ('<?xml version="1.0" encoding="ISO-8859-1"?>
+           ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('  ', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('foo', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('');
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('  ');
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('foo');
+ERROR:  unsupported XML feature at character 24
+\set VERBOSITY default
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index a85d95358d..4ce36ff825 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -1867,3 +1867,85 @@ SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
  x&lt;P&gt;73&lt;/P&gt;0.42truej
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+                                                                                                                                                                                                                                                                                                                                                                                            xmlcanonicalize                                                                                                                                                                                                                                                                                                                                                                                             
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside root element -->
+(1 row)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+                                                                                                                                                                                   xmlcanonicalize                                                                                                                                                                                    
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo');
+ERROR:  invalid XML document
+\set VERBOSITY default
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 0ea4f50883..4af51a9908 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -679,3 +679,55 @@ SELECT xmltext('  ');
 SELECT xmltext('foo `$_-+?=*^%!|/\()[]{}');
 SELECT xmltext('foo & <"bar">');
 SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
+
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(NULL, true);
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+SELECT xmlcanonicalize('  ', true);
+SELECT xmlcanonicalize('foo', true);
+SELECT xmlcanonicalize('');
+SELECT xmlcanonicalize('  ');
+SELECT xmlcanonicalize('foo');
+\set VERBOSITY default
\ No newline at end of file
-- 
2.34.1

#46

Jim Jones

jim.jones@uni-muenster.de

5 months ago

In reply to: Jim Jones (#45)

1 attachment(s)

Re: [PATCH] Add CANONICAL option to xmlserialize

rebased due to recent changes in func-xml.sgml

Additionally, v19 now wraps the xmlC14NDocDumpMemory call in a
PG_TRY/PG_CATCH.

Best regards, Jim

Attachments:

v19-0001-Add-xmlcanonicalize-function.patchtext/x-patch; charset=UTF-8; name=v19-0001-Add-xmlcanonicalize-function.patchDownload

From 64c37bc75a14dadf1ae3fc1daa2112dfa89efd06 Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Wed, 6 Aug 2025 17:31:57 +0200
Subject: [PATCH v19] Add xmlcanonicalize function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds the xmlcanonicalize(xml, boolean) function, which
transforms a well-formed XML document into its canonical form
according to the W3C Canonical XML 1.1 (C14N 1.1) specification.

    xmlcanonicalize(doc xml, keep_comments boolean DEFAULT true) RETURNS xml

- doc: the XML document to be canonicalized.
- keep_comments: whether to preserve XML comments (default: true).

This function is implemented using the xmlC14NDocDumpMemory()
function from libxml2’s Canonical XML (C14N) module.
---
 doc/src/sgml/func/func-xml.sgml          | 50 +++++++++++++++
 src/backend/catalog/system_functions.sql |  6 ++
 src/backend/utils/adt/xml.c              | 81 +++++++++++++++++++++++
 src/include/catalog/pg_proc.dat          |  3 +
 src/test/regress/expected/xml.out        | 82 ++++++++++++++++++++++++
 src/test/regress/expected/xml_1.out      | 80 +++++++++++++++++++++++
 src/test/regress/expected/xml_2.out      | 82 ++++++++++++++++++++++++
 src/test/regress/sql/xml.sql             | 52 +++++++++++++++
 8 files changed, 436 insertions(+)

diff --git a/doc/src/sgml/func/func-xml.sgml b/doc/src/sgml/func/func-xml.sgml
index 21f34467a4..0e958bda2e 100644
--- a/doc/src/sgml/func/func-xml.sgml
+++ b/doc/src/sgml/func/func-xml.sgml
@@ -61,6 +61,56 @@ SELECT xmltext('< foo & bar >');
     </para>
    </sect3>
 
+<sect3 id="functions-producing-xml-xmlcanonicalize">
+    <title><literal>xmlcanonicalize</literal></title>
+
+    <indexterm>
+     <primary>xmlcanonicalize</primary>
+    </indexterm>
+
+<synopsis>
+<function>xmlcanonicalize</function> ( <parameter>doc</parameter> <type>xml</type> [, <parameter>keep_comments</parameter> <type>boolean</type> DEFAULT <literal>true</literal>] ) <returnvalue>xml</returnvalue>
+
+</synopsis>
+
+    <para>
+     This function transforms a given XML document into its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>,
+     as defined by the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>, which standardizes the document's
+     structure and syntax to facilitate comparison and validation.
+     The <parameter>keep_comments</parameter> parameter controls whether XML comments from the input document are preserved or discarded.
+     If omitted, it defaults to <literal>true</literal>.
+    </para>
+
+    <para>
+     Example:
+<screen><![CDATA[
+SELECT
+  xmlcanonicalize(
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml);
+                               xmlcanonicalize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlcanonicalize(
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml, false);
+                      xmlcanonicalize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+]]></screen>
+    </para>
+   </sect3>
+
    <sect3 id="functions-producing-xml-xmlcomment">
     <title><literal>xmlcomment</literal></title>
 
diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql
index 566f308e44..15c33335dc 100644
--- a/src/backend/catalog/system_functions.sql
+++ b/src/backend/catalog/system_functions.sql
@@ -268,6 +268,12 @@ CREATE OR REPLACE FUNCTION xpath_exists(text, xml)
  IMMUTABLE PARALLEL SAFE STRICT COST 1
 RETURN xpath_exists($1, $2, '{}'::text[]);
 
+CREATE OR REPLACE FUNCTION xmlcanonicalize(xml, boolean DEFAULT true)
+ RETURNS xml
+ LANGUAGE internal
+ IMMUTABLE PARALLEL SAFE STRICT
+AS 'xmlcanonicalize';
+
 CREATE OR REPLACE FUNCTION pg_sleep_for(interval)
  RETURNS void
  LANGUAGE sql
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 182e8f75db..0a4aa64ec2 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -58,6 +58,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -566,6 +567,86 @@ xmltext(PG_FUNCTION_ARGS)
 #endif							/* not USE_LIBXML */
 }
 
+/*
+ * Canonicalizes the given XML document according to the W3C Canonical XML 1.1
+ * specification, using libxml2's xmlC14NDocDumpMemory().
+ *
+ * The input XML must be a well-formed document (not a fragment). The
+ * canonical form is deterministic and useful for digital signatures and
+ * comparing logically equivalent XML.
+ *
+ * The second argument determines whether comments are preserved
+ * (true) or omitted (false) in the canonicalized output.
+ */
+Datum xmlcanonicalize(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+	xmltype *arg = PG_GETARG_XML_P(0);
+	bool keep_comments = PG_GETARG_BOOL(1);
+	text *result;
+	volatile xmlChar *xmlbuf = NULL;
+	volatile int nbytes = 0;
+	volatile xmlDocPtr doc = NULL;
+	PgXmlErrorContext *xmlerrcxt;
+
+	/* Set up XML error context for proper libxml2 error integration */
+	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
+
+	PG_TRY();
+	{
+		/* Parse the input as a full XML document */
+		doc = xml_parse(arg, XMLOPTION_DOCUMENT, false,
+						GetDatabaseEncoding(), NULL, NULL, (Node *)xmlerrcxt);
+
+		/*
+		 * xmlC14NDocDumpMemory arguments:
+		 * - doc: the XML document to canonicalize (already parsed above)
+		 * - nodes: NULL means the entire document is canonicalized
+		 * - mode: 2 selects the Canonical XML 1.1 algorithm (xmlC14NMode enum)
+		 * - inclusive_ns_prefixes: NULL includes all namespaces by default
+		 * - with_comments: determined by keep_comments argument
+		 * - doc_txt_ptr: output buffer receiving the canonicalized XML (xmlbuf)
+		 *
+		 * On success, xmlbuf points to the serialized canonical form, and nbytes
+		 * holds its size.
+		 */
+		nbytes = xmlC14NDocDumpMemory(doc,
+									  NULL, /* entire document */
+									  2,	/* xmlC14NMode 1.1 */
+									  NULL, /* all namespaces */
+									  keep_comments,
+									  (xmlChar **)&xmlbuf);
+
+		if (nbytes < 0 || xmlbuf == NULL || xmlerrcxt->err_occurred)
+			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+						"could not canonicalize XML document");
+
+		result = cstring_to_text_with_len((const char *)xmlbuf, nbytes);
+	}
+	PG_CATCH();
+	{
+		if (doc)
+			xmlFreeDoc((xmlDocPtr)doc);
+		if (xmlbuf)
+			xmlFree((xmlChar *)xmlbuf);
+
+		pg_xml_done(xmlerrcxt, true);
+		PG_RE_THROW();
+	}
+	PG_END_TRY();
+
+	if (doc)
+		xmlFreeDoc((xmlDocPtr)doc);
+	if (xmlbuf)
+		xmlFree((xmlChar *)xmlbuf);
+	pg_xml_done(xmlerrcxt, false);
+
+	PG_RETURN_XML_P(result);
+#else
+	NO_XML_SUPPORT();
+	return 0;
+#endif /* not USE_LIBXML */
+}
 
 /*
  * TODO: xmlconcat needs to merge the notations and unparsed entities
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 118d6da1ac..d16b118c7d 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9159,6 +9159,9 @@
 { oid => '3813', descr => 'generate XML text node',
   proname => 'xmltext', prorettype => 'xml', proargtypes => 'text',
   prosrc => 'xmltext' },
+{ oid => '3814', descr => 'generate the canonical form of an XML document',
+  proname => 'xmlcanonicalize', prorettype => 'xml', proargtypes => 'xml bool',
+  prosrc => 'xmlcanonicalize' },
 
 { oid => '2923', descr => 'map table contents to XML',
   proname => 'table_to_xml', procost => '100', provolatile => 's',
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 103a22a3b1..688c0fc3e9 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -1881,3 +1881,85 @@ SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
  x&lt;P&gt;73&lt;/P&gt;0.42truej
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+                                                                                                                                                                                                                                                                                                                                                                                            xmlcanonicalize                                                                                                                                                                                                                                                                                                                                                                                             
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside root element -->
+(1 row)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+                                                                                                                                                                                   xmlcanonicalize                                                                                                                                                                                    
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo');
+ERROR:  invalid XML document
+\set VERBOSITY default
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 73c411118a..8bc3ac1c96 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -1496,3 +1496,83 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j':...
                              ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+ERROR:  unsupported XML feature
+LINE 2:   ('<?xml version="1.0" encoding="ISO-8859-1"?>
+           ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+(0 rows)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('  ', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('foo', true);
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('');
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('  ');
+ERROR:  unsupported XML feature at character 24
+SELECT xmlcanonicalize('foo');
+ERROR:  unsupported XML feature at character 24
+\set VERBOSITY default
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index a85d95358d..4ce36ff825 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -1867,3 +1867,85 @@ SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
  x&lt;P&gt;73&lt;/P&gt;0.42truej
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+                                                                                                                                                                                                                                                                                                                                                                                            xmlcanonicalize                                                                                                                                                                                                                                                                                                                                                                                             
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      +
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside root element -->
+(1 row)
+
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+                                                                                                                                                                                   xmlcanonicalize                                                                                                                                                                                    
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+SELECT xmlcanonicalize(NULL, true);
+ xmlcanonicalize 
+-----------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo', true);
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('  ');
+ERROR:  invalid XML document
+SELECT xmlcanonicalize('foo');
+ERROR:  invalid XML document
+\set VERBOSITY default
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 0ea4f50883..4af51a9908 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -679,3 +679,55 @@ SELECT xmltext('  ');
 SELECT xmltext('foo `$_-+?=*^%!|/\()[]{}');
 SELECT xmltext('foo & <"bar">');
 SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
+
+-- xmlserialize: canonical
+CREATE TABLE xmlcanonicalize_test (doc xml);
+INSERT INTO xmlcanonicalize_test VALUES
+  ('<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org">
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org"     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>      <!-- comment outside root element -->          ');
+
+SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
+SELECT xmlcanonicalize(NULL, true);
+
+\set VERBOSITY terse
+SELECT xmlcanonicalize('', true);
+SELECT xmlcanonicalize('  ', true);
+SELECT xmlcanonicalize('foo', true);
+SELECT xmlcanonicalize('');
+SELECT xmlcanonicalize('  ');
+SELECT xmlcanonicalize('foo');
+\set VERBOSITY default
\ No newline at end of file
-- 
2.43.0