Re: [REVIEW] Re: Fix xpath() to return namespace definitions
While reviewing the patch myself, i spotted some formatting problems in the
code. Fixed in this v5 patch.
Also, this patch uses context patch format (in first versions, because of
the small modification, context patch format obfucates the changes. After
reimplementation this isn't the case anymore)
Thanks,
Ali Akbar
Attachments:
xpath-ns-fix-5.patchtext/x-diff; charset=US-ASCII; name=xpath-ns-fix-5.patchDownload
*** a/src/backend/utils/adt/xml.c
--- b/src/backend/utils/adt/xml.c
***************
*** 141,149 **** static bool print_xml_decl(StringInfo buf, const xmlChar *version,
pg_enc encoding, int standalone);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding);
! static text *xml_xmlnodetoxmltype(xmlNodePtr cur);
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState **astate);
#endif /* USE_LIBXML */
static StringInfo query_to_xml_internal(const char *query, char *tablename,
--- 141,151 ----
pg_enc encoding, int standalone);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding);
! static text *xml_xmlnodetoxmltype(xmlNodePtr cur,
! PgXmlErrorContext *xmlerrcxt);
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState **astate,
! PgXmlErrorContext *xmlerrcxt);
#endif /* USE_LIBXML */
static StringInfo query_to_xml_internal(const char *query, char *tablename,
***************
*** 3594,3620 **** SPI_sql_row_to_xmlelement(int rownum, StringInfo result, char *tablename,
#ifdef USE_LIBXML
/*
* Convert XML node to text (dump subtree in case of element,
* return value otherwise)
*/
static text *
! xml_xmlnodetoxmltype(xmlNodePtr cur)
{
xmltype *result;
if (cur->type == XML_ELEMENT_NODE)
{
xmlBufferPtr buf;
buf = xmlBufferCreate();
PG_TRY();
{
xmlNodeDump(buf, NULL, cur, 0, 1);
result = xmlBuffer_to_xmltype(buf);
}
PG_CATCH();
{
xmlBufferFree(buf);
PG_RE_THROW();
}
--- 3596,3706 ----
#ifdef USE_LIBXML
+ /* check ns definition of node and its childrens. If any one of ns is
+ * not defined in node and it's children, but in the node's parent,
+ * copy the definition to node.
+ */
+ static void
+ xml_checkandcopyns(xmlNodePtr root,
+ PgXmlErrorContext *xmlerrcxt,
+ xmlNodePtr node,
+ xmlNsPtr lastns_before)
+ {
+ xmlNsPtr ns = NULL;
+ xmlNsPtr cur_ns;
+ xmlNodePtr cur;
+
+ if (node->ns != NULL)
+ {
+ /* check in new nses */
+ cur_ns = lastns_before == NULL ? node->nsDef : lastns_before->next;
+ while (cur_ns != NULL)
+ {
+ if (cur_ns->href != NULL)
+ {
+ if (((cur_ns->prefix == NULL) && (node->ns->prefix == NULL)) ||
+ ((cur_ns->prefix != NULL) && (node->ns->prefix != NULL) &&
+ xmlStrEqual(cur_ns->prefix, node->ns->prefix)))
+ {
+ ns = cur_ns;
+ break;
+ }
+ }
+ cur_ns = cur_ns->next;
+ }
+ if (ns == NULL) /* not in new nses */
+ {
+ ns = xmlSearchNs(NULL, node->parent, node->ns->prefix);
+
+ if (ns != NULL)
+ {
+ ns = xmlNewNs(root, ns->href, ns->prefix);
+
+ if (ns == NULL && xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlNs");
+ }
+ }
+ }
+ /* check and copy ns for children recursively */
+ cur = node->children;
+ while (cur != NULL)
+ {
+ xml_checkandcopyns(root, xmlerrcxt, cur, lastns_before);
+ cur = cur->next;
+ }
+ }
+
/*
* Convert XML node to text (dump subtree in case of element,
* return value otherwise)
*/
static text *
! xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
{
xmltype *result;
if (cur->type == XML_ELEMENT_NODE)
{
xmlBufferPtr buf;
+ xmlNsPtr lastns_before;
+ xmlNsPtr ns;
+ xmlNsPtr next;
buf = xmlBufferCreate();
+
PG_TRY();
{
+ lastns_before = cur->nsDef;
+ if (lastns_before != NULL)
+ {
+ while (lastns_before->next != NULL)
+ lastns_before = lastns_before->next;
+ }
+ xml_checkandcopyns(cur, xmlerrcxt, cur, lastns_before);
+
xmlNodeDump(buf, NULL, cur, 0, 1);
result = xmlBuffer_to_xmltype(buf);
+
+ /* delete and free new nses */
+ ns = lastns_before == NULL ? cur->nsDef : lastns_before->next;
+ while (ns != NULL)
+ {
+ next = ns->next;
+ xmlFree(ns);
+ ns = next;
+ }
+
+ if (lastns_before == NULL)
+ cur->nsDef = NULL;
+ else
+ lastns_before->next = NULL;
}
PG_CATCH();
{
+ /* new namespaces will be freed while free-ing the node, so we
+ * won't free it here
+ */
xmlBufferFree(buf);
PG_RE_THROW();
}
***************
*** 3660,3666 **** xml_xmlnodetoxmltype(xmlNodePtr cur)
*/
static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState **astate)
{
int result = 0;
Datum datum;
--- 3746,3753 ----
*/
static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState **astate,
! PgXmlErrorContext *xmlerrcxt)
{
int result = 0;
Datum datum;
***************
*** 3682,3688 **** xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
for (i = 0; i < result; i++)
{
! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i]));
*astate = accumArrayResult(*astate, datum,
false, XMLOID,
CurrentMemoryContext);
--- 3769,3776 ----
for (i = 0; i < result; i++)
{
! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
! xmlerrcxt));
*astate = accumArrayResult(*astate, datum,
false, XMLOID,
CurrentMemoryContext);
***************
*** 3886,3894 **** xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
* Extract the results as requested.
*/
if (res_nitems != NULL)
! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate);
else
! (void) xml_xpathobjtoxmlarray(xpathobj, astate);
}
PG_CATCH();
{
--- 3974,3982 ----
* Extract the results as requested.
*/
if (res_nitems != NULL)
! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
else
! (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
}
PG_CATCH();
{
*** a/src/test/regress/expected/xml.out
--- b/src/test/regress/expected/xml.out
***************
*** 612,617 **** SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><loc
--- 612,632 ----
{1,2}
(1 row)
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ xpath
+ ------------------------------------------------------------------------------------------------------------------------------------------------
+ {"<local:piece xmlns:local=\"http://127.0.0.1\" id=\"1\">number one</local:piece>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"2\"/>"}
+ (1 row)
+
+ SELECT xpath('//loc:*', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ xpath
+ --------------------------------------------------------------------------------------------------------------------------------------------------------------
+ {"<local:data xmlns:local=\"http://127.0.0.1\"> +
+ <local:piece id=\"1\">number one</local:piece> +
+ <local:piece id=\"2\"/> +
+ </local:data>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"1\">number one</local:piece>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"2\"/>"}
+ (1 row)
+
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
xpath
-------------------------
*** a/src/test/regress/expected/xml_1.out
--- b/src/test/regress/expected/xml_1.out
***************
*** 514,519 **** LINE 1: SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="ht...
--- 514,531 ----
^
DETAIL: This functionality requires the server to be built with libxml support.
HINT: You need to rebuild PostgreSQL using --with-libxml.
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ ERROR: unsupported XML feature
+ LINE 1: SELECT xpath('//loc:piece', '<local:data xmlns:local="http:/...
+ ^
+ DETAIL: This functionality requires the server to be built with libxml support.
+ HINT: You need to rebuild PostgreSQL using --with-libxml.
+ SELECT xpath('//loc:*', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ ERROR: unsupported XML feature
+ LINE 1: SELECT xpath('//loc:*', '<local:data xmlns:local="http://127...
+ ^
+ DETAIL: This functionality requires the server to be built with libxml support.
+ HINT: You need to rebuild PostgreSQL using --with-libxml.
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
ERROR: unsupported XML feature
LINE 1: SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'...
*** a/src/test/regress/sql/xml.sql
--- b/src/test/regress/sql/xml.sql
***************
*** 178,183 **** SELECT xpath(NULL, NULL) IS NULL FROM xmltest;
--- 178,185 ----
SELECT xpath('', '<!-- error -->');
SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ SELECT xpath('//loc:*', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
SELECT xpath('//text()', '<root><</root>');
SELECT xpath('//@value', '<root value="<"/>');
On 10/6/14 10:24 PM, Ali Akbar wrote:
While reviewing the patch myself, i spotted some formatting problems in
the code. Fixed in this v5 patch.Also, this patch uses context patch format (in first versions, because
of the small modification, context patch format obfucates the changes.
After reimplementation this isn't the case anymore)
I think the problem this patch is addressing is real, and your approach
is sound, but I'd ask you to go back to the xmlCopyNode() version, and
try to add a test case for why the second argument = 1 is necessary. I
don't see any other problems.
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
2014-11-04 22:16 GMT+07:00 Peter Eisentraut <peter_e@gmx.net>:
On 10/6/14 10:24 PM, Ali Akbar wrote:
While reviewing the patch myself, i spotted some formatting problems in
the code. Fixed in this v5 patch.Also, this patch uses context patch format (in first versions, because
of the small modification, context patch format obfucates the changes.
After reimplementation this isn't the case anymore)I think the problem this patch is addressing is real, and your approach
is sound, but I'd ask you to go back to the xmlCopyNode() version, and
try to add a test case for why the second argument = 1 is necessary. I
don't see any other problems.
OK. Because upstream code is fixed in current version, i'll revert to the
previous version. Test case added to regression test. With =1 argument, the
result is correct:
<local:piece xmlns:local=\"http://127.0.0.1\" xmlns=\"http://127.0.0.2\"
id=\"1\">
<internal>number one</internal>
<internal2/>
</local:piece>
without the argument, the result is not correct, all children will be lost.
Because of that, the other regression test will fail too because the
children is not copied:
*** 584,593 ****
-- Text XPath expressions evaluation
SELECT xpath('/value', data) FROM xmltest;
! xpath
! ----------------------
! {<value>one</value>}
! {<value>two</value>}
(2 rows)
SELECT xpath(NULL, NULL) IS NULL FROM xmltest;
--- 584,593 ----
-- Text XPath expressions evaluation
SELECT xpath('/value', data) FROM xmltest;
! xpath
! ------------
! {<value/>}
! {<value/>}
(2 rows)
SELECT xpath(NULL, NULL) IS NULL FROM xmltest;
***************
... <cut>
updated patch attached.
Regards,
--
Ali Akbar
Attachments:
xpath-ns-fix-6.patchtext/x-diff; charset=US-ASCII; name=xpath-ns-fix-6.patchDownload
*** a/src/backend/utils/adt/xml.c
--- b/src/backend/utils/adt/xml.c
***************
*** 141,149 **** static bool print_xml_decl(StringInfo buf, const xmlChar *version,
pg_enc encoding, int standalone);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding);
! static text *xml_xmlnodetoxmltype(xmlNodePtr cur);
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState **astate);
#endif /* USE_LIBXML */
static StringInfo query_to_xml_internal(const char *query, char *tablename,
--- 141,151 ----
pg_enc encoding, int standalone);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding);
! static text *xml_xmlnodetoxmltype(xmlNodePtr cur,
! PgXmlErrorContext *xmlerrcxt);
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState **astate,
! PgXmlErrorContext *xmlerrcxt);
#endif /* USE_LIBXML */
static StringInfo query_to_xml_internal(const char *query, char *tablename,
***************
*** 3599,3624 **** SPI_sql_row_to_xmlelement(int rownum, StringInfo result, char *tablename,
* return value otherwise)
*/
static text *
! xml_xmlnodetoxmltype(xmlNodePtr cur)
{
xmltype *result;
if (cur->type == XML_ELEMENT_NODE)
{
xmlBufferPtr buf;
buf = xmlBufferCreate();
PG_TRY();
{
! xmlNodeDump(buf, NULL, cur, 0, 1);
result = xmlBuffer_to_xmltype(buf);
}
PG_CATCH();
{
xmlBufferFree(buf);
PG_RE_THROW();
}
PG_END_TRY();
xmlBufferFree(buf);
}
else
--- 3601,3640 ----
* return value otherwise)
*/
static text *
! xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
{
xmltype *result;
if (cur->type == XML_ELEMENT_NODE)
{
xmlBufferPtr buf;
+ xmlNodePtr cur_copy;
buf = xmlBufferCreate();
+
+ /* the result of xmlNodeDump won't contain namespace definitions
+ * from parent nodes, but xmlCopyNode duplicates a node along
+ * with its required namespace definitions.
+ */
+ cur_copy = xmlCopyNode(cur, 1);
+
+ if (cur_copy == NULL)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not serialize xml");
+
PG_TRY();
{
! xmlNodeDump(buf, NULL, cur_copy, 0, 1);
result = xmlBuffer_to_xmltype(buf);
}
PG_CATCH();
{
+ xmlFreeNode(cur_copy);
xmlBufferFree(buf);
PG_RE_THROW();
}
PG_END_TRY();
+ xmlFreeNode(cur_copy);
xmlBufferFree(buf);
}
else
***************
*** 3660,3666 **** xml_xmlnodetoxmltype(xmlNodePtr cur)
*/
static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState **astate)
{
int result = 0;
Datum datum;
--- 3676,3683 ----
*/
static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState **astate,
! PgXmlErrorContext *xmlerrcxt)
{
int result = 0;
Datum datum;
***************
*** 3682,3688 **** xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
for (i = 0; i < result; i++)
{
! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i]));
*astate = accumArrayResult(*astate, datum,
false, XMLOID,
CurrentMemoryContext);
--- 3699,3706 ----
for (i = 0; i < result; i++)
{
! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
! xmlerrcxt));
*astate = accumArrayResult(*astate, datum,
false, XMLOID,
CurrentMemoryContext);
***************
*** 3886,3894 **** xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
* Extract the results as requested.
*/
if (res_nitems != NULL)
! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate);
else
! (void) xml_xpathobjtoxmlarray(xpathobj, astate);
}
PG_CATCH();
{
--- 3904,3912 ----
* Extract the results as requested.
*/
if (res_nitems != NULL)
! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
else
! (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
}
PG_CATCH();
{
*** a/src/test/regress/expected/xml.out
--- b/src/test/regress/expected/xml.out
***************
*** 612,617 **** SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><loc
--- 612,623 ----
{1,2}
(1 row)
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ xpath
+ ------------------------------------------------------------------------------------------------------------------------------------------------
+ {"<local:piece xmlns:local=\"http://127.0.0.1\" id=\"1\">number one</local:piece>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"2\"/>"}
+ (1 row)
+
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
xpath
-------------------------
*** a/src/test/regress/expected/xml_1.out
--- b/src/test/regress/expected/xml_1.out
***************
*** 514,519 **** LINE 1: SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="ht...
--- 514,531 ----
^
DETAIL: This functionality requires the server to be built with libxml support.
HINT: You need to rebuild PostgreSQL using --with-libxml.
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ ERROR: unsupported XML feature
+ LINE 1: SELECT xpath('//loc:piece', '<local:data xmlns:local="http:/...
+ ^
+ DETAIL: This functionality requires the server to be built with libxml support.
+ HINT: You need to rebuild PostgreSQL using --with-libxml.
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1" xmlns="http://127.0.0.2"><local:piece id="1"><internal>number one</internal><internal2/></local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ ERROR: unsupported XML feature
+ LINE 1: SELECT xpath('//loc:piece', '<local:data xmlns:local="http:/...
+ ^
+ DETAIL: This functionality requires the server to be built with libxml support.
+ HINT: You need to rebuild PostgreSQL using --with-libxml.
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
ERROR: unsupported XML feature
LINE 1: SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'...
*** a/src/test/regress/sql/xml.sql
--- b/src/test/regress/sql/xml.sql
***************
*** 178,183 **** SELECT xpath(NULL, NULL) IS NULL FROM xmltest;
--- 178,185 ----
SELECT xpath('', '<!-- error -->');
SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1" xmlns="http://127.0.0.2"><local:piece id="1"><internal>number one</internal><internal2/></local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
SELECT xpath('//text()', '<root><</root>');
SELECT xpath('//@value', '<root value="<"/>');
2014-11-04 22:16 GMT+07:00 Peter Eisentraut <peter_e@gmx.net>:
On 10/6/14 10:24 PM, Ali Akbar wrote:
While reviewing the patch myself, i spotted some formatting problems in
the code. Fixed in this v5 patch.Also, this patch uses context patch format (in first versions, because
of the small modification, context patch format obfucates the changes.
After reimplementation this isn't the case anymore)I think the problem this patch is addressing is real, and your approach
is sound, but I'd ask you to go back to the xmlCopyNode() version, and
try to add a test case for why the second argument = 1 is necessary. I
don't see any other problems.OK. Because upstream code is fixed in current version, i'll revert to the
previous version. Test case added to regression test. With =1 argument, the
result is correct:
<local:piece xmlns:local=\"http://127.0.0.1\" xmlns=\"http://127.0.0.2\"
id=\"1\">
<internal>number one</internal>
<internal2/>
</local:piece>without the argument, the result is not correct, all children will be
lost. Because of that, the other regression test will fail too because the
children is not copied:
*** 584,593 ****-- Text XPath expressions evaluation
SELECT xpath('/value', data) FROM xmltest;
! xpath
! ----------------------
! {<value>one</value>}
! {<value>two</value>}
(2 rows)SELECT xpath(NULL, NULL) IS NULL FROM xmltest; --- 584,593 ------ Text XPath expressions evaluation
SELECT xpath('/value', data) FROM xmltest;
! xpath
! ------------
! {<value/>}
! {<value/>}
(2 rows)SELECT xpath(NULL, NULL) IS NULL FROM xmltest;
***************
... <cut>updated patch attached.
I noticed somewhat big performance regression if the xml is large (i use
PRODML Product Volume sample from energistics.org):
* Without patch (tested 3 times):
select unnest(xpath('//a:flow', x, ARRAY[['a','
http://www.prodml.org/schemas/1series']])) from u;
unnest
-----------------------------------------------------------------------------------------------
<flow>
+
<kind>gas
lift</kind> +
...
Time: 84,012 ms
Time: 85,683 ms
Time: 88,766 ms
* With latest v6 patch (notice the correct result with namespace
definition):
select unnest(xpath('//a:flow', x, ARRAY[['a','
http://www.prodml.org/schemas/1series']])) from u;
unnest
-----------------------------------------------------------------------------------------------
<flow xmlns="http://www.prodml.org/schemas/1series">
+
...
Time: 108,912 ms
Time: 108,267 ms
Time: 114,848 ms
It's 23% performance regression.
* Just curious, i'm also testing v5 patch performance (notice the namespace
in the result):
select unnest(xpath('//a:flow', x, ARRAY[['a','
http://www.prodml.org/schemas/1series']])) from u;
unnest
-----------------------------------------------------------------------------------------------
<flow xmlns="http://www.prodml.org/schemas/1series">
+
<kind>gas
lift</kind> +
Time: 92,552 ms
Time: 97,440 ms
Time: 99,309 ms
The regression is only 13%. I know the xmlCopyNode() version (v6 patch) is
much more cleaner than v5patch, should we consider the performance benefit?
Anyway, thanks for the review! :)
Regards,
--
Ali Akbar
2014-11-05 21:50 GMT+07:00 Ali Akbar <the.apaan@gmail.com>:
2014-11-04 22:16 GMT+07:00 Peter Eisentraut <peter_e@gmx.net>:
I think the problem this patch is addressing is real, and your approach
is sound, but I'd ask you to go back to the xmlCopyNode() version, and
try to add a test case for why the second argument = 1 is necessary. I
don't see any other problems.OK. Because upstream code is fixed in current version, i'll revert to the
previous version. Test case added to regression test. With =1 argument, the
result is correct:
<local:piece xmlns:local=\"http://127.0.0.1\" xmlns=\"http://127.0.0.2\"
id=\"1\">
<internal>number one</internal>
<internal2/>
</local:piece>without the argument, the result is not correct, all children will be
lost. Because of that, the other regression test will fail too because the
children is not copied:
*** 584,593 ****-- Text XPath expressions evaluation
SELECT xpath('/value', data) FROM xmltest;
! xpath
! ----------------------
! {<value>one</value>}
! {<value>two</value>}
(2 rows)SELECT xpath(NULL, NULL) IS NULL FROM xmltest; --- 584,593 ------ Text XPath expressions evaluation
SELECT xpath('/value', data) FROM xmltest;
! xpath
! ------------
! {<value/>}
! {<value/>}
(2 rows)SELECT xpath(NULL, NULL) IS NULL FROM xmltest;
***************
... <cut>updated patch attached.
I noticed somewhat big performance regression if the xml is large (i use
PRODML Product Volume sample from energistics.org):
* Without patch (tested 3 times):
select unnest(xpath('//a:flow', x, ARRAY[['a','
http://www.prodml.org/schemas/1series']])) from u;unnest
----------------------------------------------------------------------------------------------- <flow> + <kind>gas lift</kind> + ... Time: 84,012 ms Time: 85,683 ms Time: 88,766 ms* With latest v6 patch (notice the correct result with namespace
definition):select unnest(xpath('//a:flow', x, ARRAY[['a','
http://www.prodml.org/schemas/1series']])) from u;unnest
----------------------------------------------------------------------------------------------- <flow xmlns="http://www.prodml.org/schemas/1series"> + ... Time: 108,912 ms Time: 108,267 ms Time: 114,848 msIt's 23% performance regression.
* Just curious, i'm also testing v5 patch performance (notice the
namespace in the result):
select unnest(xpath('//a:flow', x, ARRAY[['a','
http://www.prodml.org/schemas/1series']])) from u;unnest
----------------------------------------------------------------------------------------------- <flow xmlns="http://www.prodml.org/schemas/1series"> + <kind>gas lift</kind> + Time: 92,552 ms Time: 97,440 ms Time: 99,309 msThe regression is only 13%. I know the xmlCopyNode() version (v6 patch) is
much more cleaner than v5patch, should we consider the performance benefit?Anyway, thanks for the review! :)
commit bac2739 in master by Tom Lane changes *astate definition
in xml_xpathobjtoxmlarray, this attached v7 patch rebases v6 patch with
latest master.
For performance comparison, i also rebased the v5 patch attached with name
v5-141126.patch
--
Ali Akbar
Attachments:
xpath-ns-fix-5-141126.patchtext/x-patch; charset=US-ASCII; name=xpath-ns-fix-5-141126.patchDownload
*** a/src/backend/utils/adt/xml.c
--- b/src/backend/utils/adt/xml.c
***************
*** 141,149 **** static bool print_xml_decl(StringInfo buf, const xmlChar *version,
pg_enc encoding, int standalone);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding);
! static text *xml_xmlnodetoxmltype(xmlNodePtr cur);
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate);
#endif /* USE_LIBXML */
static StringInfo query_to_xml_internal(const char *query, char *tablename,
--- 141,151 ----
pg_enc encoding, int standalone);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding);
! static text *xml_xmlnodetoxmltype(xmlNodePtr cur,
! PgXmlErrorContext *xmlerrcxt);
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate,
! PgXmlErrorContext *xmlerrcxt);
#endif /* USE_LIBXML */
static StringInfo query_to_xml_internal(const char *query, char *tablename,
***************
*** 3594,3620 **** SPI_sql_row_to_xmlelement(int rownum, StringInfo result, char *tablename,
#ifdef USE_LIBXML
/*
* Convert XML node to text (dump subtree in case of element,
* return value otherwise)
*/
static text *
! xml_xmlnodetoxmltype(xmlNodePtr cur)
{
xmltype *result;
if (cur->type == XML_ELEMENT_NODE)
{
xmlBufferPtr buf;
buf = xmlBufferCreate();
PG_TRY();
{
xmlNodeDump(buf, NULL, cur, 0, 1);
result = xmlBuffer_to_xmltype(buf);
}
PG_CATCH();
{
xmlBufferFree(buf);
PG_RE_THROW();
}
--- 3596,3706 ----
#ifdef USE_LIBXML
+ /* check ns definition of node and its childrens. If any one of ns is
+ * not defined in node and it's children, but in the node's parent,
+ * copy the definition to node.
+ */
+ static void
+ xml_checkandcopyns(xmlNodePtr root,
+ PgXmlErrorContext *xmlerrcxt,
+ xmlNodePtr node,
+ xmlNsPtr lastns_before)
+ {
+ xmlNsPtr ns = NULL;
+ xmlNsPtr cur_ns;
+ xmlNodePtr cur;
+
+ if (node->ns != NULL)
+ {
+ /* check in new nses */
+ cur_ns = lastns_before == NULL ? node->nsDef : lastns_before->next;
+ while (cur_ns != NULL)
+ {
+ if (cur_ns->href != NULL)
+ {
+ if (((cur_ns->prefix == NULL) && (node->ns->prefix == NULL)) ||
+ ((cur_ns->prefix != NULL) && (node->ns->prefix != NULL) &&
+ xmlStrEqual(cur_ns->prefix, node->ns->prefix)))
+ {
+ ns = cur_ns;
+ break;
+ }
+ }
+ cur_ns = cur_ns->next;
+ }
+ if (ns == NULL) /* not in new nses */
+ {
+ ns = xmlSearchNs(NULL, node->parent, node->ns->prefix);
+
+ if (ns != NULL)
+ {
+ ns = xmlNewNs(root, ns->href, ns->prefix);
+
+ if (ns == NULL && xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlNs");
+ }
+ }
+ }
+ /* check and copy ns for children recursively */
+ cur = node->children;
+ while (cur != NULL)
+ {
+ xml_checkandcopyns(root, xmlerrcxt, cur, lastns_before);
+ cur = cur->next;
+ }
+ }
+
/*
* Convert XML node to text (dump subtree in case of element,
* return value otherwise)
*/
static text *
! xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
{
xmltype *result;
if (cur->type == XML_ELEMENT_NODE)
{
xmlBufferPtr buf;
+ xmlNsPtr lastns_before;
+ xmlNsPtr ns;
+ xmlNsPtr next;
buf = xmlBufferCreate();
+
PG_TRY();
{
+ lastns_before = cur->nsDef;
+ if (lastns_before != NULL)
+ {
+ while (lastns_before->next != NULL)
+ lastns_before = lastns_before->next;
+ }
+ xml_checkandcopyns(cur, xmlerrcxt, cur, lastns_before);
+
xmlNodeDump(buf, NULL, cur, 0, 1);
result = xmlBuffer_to_xmltype(buf);
+
+ /* delete and free new nses */
+ ns = lastns_before == NULL ? cur->nsDef : lastns_before->next;
+ while (ns != NULL)
+ {
+ next = ns->next;
+ xmlFree(ns);
+ ns = next;
+ }
+
+ if (lastns_before == NULL)
+ cur->nsDef = NULL;
+ else
+ lastns_before->next = NULL;
}
PG_CATCH();
{
+ /* new namespaces will be freed while free-ing the node, so we
+ * won't free it here
+ */
xmlBufferFree(buf);
PG_RE_THROW();
}
***************
*** 3660,3666 **** xml_xmlnodetoxmltype(xmlNodePtr cur)
*/
static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate)
{
int result = 0;
Datum datum;
--- 3746,3753 ----
*/
static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate,
! PgXmlErrorContext *xmlerrcxt)
{
int result = 0;
Datum datum;
***************
*** 3679,3685 **** xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
for (i = 0; i < result; i++)
{
! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i]));
(void) accumArrayResult(astate, datum, false,
XMLOID, CurrentMemoryContext);
}
--- 3766,3773 ----
for (i = 0; i < result; i++)
{
! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
! xmlerrcxt));
(void) accumArrayResult(astate, datum, false,
XMLOID, CurrentMemoryContext);
}
***************
*** 3881,3889 **** xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
* Extract the results as requested.
*/
if (res_nitems != NULL)
! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate);
else
! (void) xml_xpathobjtoxmlarray(xpathobj, astate);
}
PG_CATCH();
{
--- 3969,3977 ----
* Extract the results as requested.
*/
if (res_nitems != NULL)
! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
else
! (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
}
PG_CATCH();
{
*** a/src/test/regress/expected/xml.out
--- b/src/test/regress/expected/xml.out
***************
*** 612,617 **** SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><loc
--- 612,632 ----
{1,2}
(1 row)
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ xpath
+ ------------------------------------------------------------------------------------------------------------------------------------------------
+ {"<local:piece xmlns:local=\"http://127.0.0.1\" id=\"1\">number one</local:piece>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"2\"/>"}
+ (1 row)
+
+ SELECT xpath('//loc:*', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ xpath
+ --------------------------------------------------------------------------------------------------------------------------------------------------------------
+ {"<local:data xmlns:local=\"http://127.0.0.1\"> +
+ <local:piece id=\"1\">number one</local:piece> +
+ <local:piece id=\"2\"/> +
+ </local:data>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"1\">number one</local:piece>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"2\"/>"}
+ (1 row)
+
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
xpath
-------------------------
*** a/src/test/regress/expected/xml_1.out
--- b/src/test/regress/expected/xml_1.out
***************
*** 514,519 **** LINE 1: SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="ht...
--- 514,531 ----
^
DETAIL: This functionality requires the server to be built with libxml support.
HINT: You need to rebuild PostgreSQL using --with-libxml.
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ ERROR: unsupported XML feature
+ LINE 1: SELECT xpath('//loc:piece', '<local:data xmlns:local="http:/...
+ ^
+ DETAIL: This functionality requires the server to be built with libxml support.
+ HINT: You need to rebuild PostgreSQL using --with-libxml.
+ SELECT xpath('//loc:*', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ ERROR: unsupported XML feature
+ LINE 1: SELECT xpath('//loc:*', '<local:data xmlns:local="http://127...
+ ^
+ DETAIL: This functionality requires the server to be built with libxml support.
+ HINT: You need to rebuild PostgreSQL using --with-libxml.
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
ERROR: unsupported XML feature
LINE 1: SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'...
*** a/src/test/regress/sql/xml.sql
--- b/src/test/regress/sql/xml.sql
***************
*** 178,183 **** SELECT xpath(NULL, NULL) IS NULL FROM xmltest;
--- 178,185 ----
SELECT xpath('', '<!-- error -->');
SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ SELECT xpath('//loc:*', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
SELECT xpath('//text()', '<root><</root>');
SELECT xpath('//@value', '<root value="<"/>');
xpath-ns-fix-7.patchtext/x-patch; charset=US-ASCII; name=xpath-ns-fix-7.patchDownload
*** a/src/backend/utils/adt/xml.c
--- b/src/backend/utils/adt/xml.c
***************
*** 141,149 **** static bool print_xml_decl(StringInfo buf, const xmlChar *version,
pg_enc encoding, int standalone);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding);
! static text *xml_xmlnodetoxmltype(xmlNodePtr cur);
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate);
#endif /* USE_LIBXML */
static StringInfo query_to_xml_internal(const char *query, char *tablename,
--- 141,151 ----
pg_enc encoding, int standalone);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding);
! static text *xml_xmlnodetoxmltype(xmlNodePtr cur,
! PgXmlErrorContext *xmlerrcxt);
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate,
! PgXmlErrorContext *xmlerrcxt);
#endif /* USE_LIBXML */
static StringInfo query_to_xml_internal(const char *query, char *tablename,
***************
*** 3599,3624 **** SPI_sql_row_to_xmlelement(int rownum, StringInfo result, char *tablename,
* return value otherwise)
*/
static text *
! xml_xmlnodetoxmltype(xmlNodePtr cur)
{
xmltype *result;
if (cur->type == XML_ELEMENT_NODE)
{
xmlBufferPtr buf;
buf = xmlBufferCreate();
PG_TRY();
{
! xmlNodeDump(buf, NULL, cur, 0, 1);
result = xmlBuffer_to_xmltype(buf);
}
PG_CATCH();
{
xmlBufferFree(buf);
PG_RE_THROW();
}
PG_END_TRY();
xmlBufferFree(buf);
}
else
--- 3601,3640 ----
* return value otherwise)
*/
static text *
! xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
{
xmltype *result;
if (cur->type == XML_ELEMENT_NODE)
{
xmlBufferPtr buf;
+ xmlNodePtr cur_copy;
buf = xmlBufferCreate();
+
+ /* the result of xmlNodeDump won't contain namespace definitions
+ * from parent nodes, but xmlCopyNode duplicates a node along
+ * with its required namespace definitions.
+ */
+ cur_copy = xmlCopyNode(cur, 1);
+
+ if (cur_copy == NULL)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not serialize xml");
+
PG_TRY();
{
! xmlNodeDump(buf, NULL, cur_copy, 0, 1);
result = xmlBuffer_to_xmltype(buf);
}
PG_CATCH();
{
+ xmlFreeNode(cur_copy);
xmlBufferFree(buf);
PG_RE_THROW();
}
PG_END_TRY();
+ xmlFreeNode(cur_copy);
xmlBufferFree(buf);
}
else
***************
*** 3660,3666 **** xml_xmlnodetoxmltype(xmlNodePtr cur)
*/
static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate)
{
int result = 0;
Datum datum;
--- 3676,3683 ----
*/
static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate,
! PgXmlErrorContext *xmlerrcxt)
{
int result = 0;
Datum datum;
***************
*** 3679,3685 **** xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
for (i = 0; i < result; i++)
{
! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i]));
(void) accumArrayResult(astate, datum, false,
XMLOID, CurrentMemoryContext);
}
--- 3696,3703 ----
for (i = 0; i < result; i++)
{
! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
! xmlerrcxt));
(void) accumArrayResult(astate, datum, false,
XMLOID, CurrentMemoryContext);
}
***************
*** 3881,3889 **** xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
* Extract the results as requested.
*/
if (res_nitems != NULL)
! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate);
else
! (void) xml_xpathobjtoxmlarray(xpathobj, astate);
}
PG_CATCH();
{
--- 3899,3907 ----
* Extract the results as requested.
*/
if (res_nitems != NULL)
! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
else
! (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
}
PG_CATCH();
{
*** a/src/test/regress/expected/xml.out
--- b/src/test/regress/expected/xml.out
***************
*** 612,617 **** SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><loc
--- 612,623 ----
{1,2}
(1 row)
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ xpath
+ ------------------------------------------------------------------------------------------------------------------------------------------------
+ {"<local:piece xmlns:local=\"http://127.0.0.1\" id=\"1\">number one</local:piece>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"2\"/>"}
+ (1 row)
+
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
xpath
-------------------------
*** a/src/test/regress/expected/xml_1.out
--- b/src/test/regress/expected/xml_1.out
***************
*** 514,519 **** LINE 1: SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="ht...
--- 514,531 ----
^
DETAIL: This functionality requires the server to be built with libxml support.
HINT: You need to rebuild PostgreSQL using --with-libxml.
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ ERROR: unsupported XML feature
+ LINE 1: SELECT xpath('//loc:piece', '<local:data xmlns:local="http:/...
+ ^
+ DETAIL: This functionality requires the server to be built with libxml support.
+ HINT: You need to rebuild PostgreSQL using --with-libxml.
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1" xmlns="http://127.0.0.2"><local:piece id="1"><internal>number one</internal><internal2/></local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ ERROR: unsupported XML feature
+ LINE 1: SELECT xpath('//loc:piece', '<local:data xmlns:local="http:/...
+ ^
+ DETAIL: This functionality requires the server to be built with libxml support.
+ HINT: You need to rebuild PostgreSQL using --with-libxml.
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
ERROR: unsupported XML feature
LINE 1: SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'...
*** a/src/test/regress/sql/xml.sql
--- b/src/test/regress/sql/xml.sql
***************
*** 178,183 **** SELECT xpath(NULL, NULL) IS NULL FROM xmltest;
--- 178,185 ----
SELECT xpath('', '<!-- error -->');
SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1" xmlns="http://127.0.0.2"><local:piece id="1"><internal>number one</internal><internal2/></local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
SELECT xpath('//text()', '<root><</root>');
SELECT xpath('//@value', '<root value="<"/>');
On 11/5/14 9:50 AM, Ali Akbar wrote:
I noticed somewhat big performance regression if the xml is large (i use
PRODML Product Volume sample from energistics.org <http://energistics.org>):
* Without patch (tested 3 times):
select unnest(xpath('//a:flow', x,
ARRAY[['a','http://www.prodml.org/schemas/1series']])) from u;
Time: 84,012 ms
Time: 85,683 ms
Time: 88,766 ms
* With latest v6 patch (notice the correct result with namespace
definition):select unnest(xpath('//a:flow', x,
ARRAY[['a','http://www.prodml.org/schemas/1series']])) from u;
Time: 108,912 ms
Time: 108,267 ms
Time: 114,848 msIt's 23% performance regression.
* Just curious, i'm also testing v5 patch performance (notice the
namespace in the result):
select unnest(xpath('//a:flow', x,
ARRAY[['a','http://www.prodml.org/schemas/1series']])) from u;
Time: 92,552 ms
Time: 97,440 ms
Time: 99,309 msThe regression is only 13%. I know the xmlCopyNode() version (v6 patch)
is much more cleaner than v5patch, should we consider the performance
benefit?
I ran a test using postgres-US.fo built in the PostgreSQL source tree,
which is 38 MB, and ran
select unnest(xpath('//fo:bookmark-title', b, array[array['fo',
'http://www.w3.org/1999/XSL/Format']])) from data;
(Table contains one row only.)
The timings were basically indistinguishable between the three code
versions.
I'll try to reproduce your test. How big is your file? Do you have a
link to the actual file? Could you share your load script?
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
I ran a test using postgres-US.fo built in the PostgreSQL source tree,
which is 38 MB, and ranselect unnest(xpath('//fo:bookmark-title', b, array[array['fo',
'http://www.w3.org/1999/XSL/Format']])) from data;(Table contains one row only.)
The timings were basically indistinguishable between the three code
versions.I'll try to reproduce your test. How big is your file? Do you have a
link to the actual file? Could you share your load script?
I use this xml sample:
http://w3.energistics.org/schema/prodml_v1.2.0_data/XML_Examples/productVolume_no_xsl.xml
Basically i loaded the xml to table u 100 times. Load script attached.
Regards,
--
Ali Akbar
Attachments:
2014-12-14 22:18 GMT+07:00 Ali Akbar <the.apaan@gmail.com>:
I ran a test using postgres-US.fo built in the PostgreSQL source tree,
which is 38 MB, and ran
select unnest(xpath('//fo:bookmark-title', b, array[array['fo',
'http://www.w3.org/1999/XSL/Format']])) from data;(Table contains one row only.)
The timings were basically indistinguishable between the three code
versions.I'll try to reproduce your test. How big is your file? Do you have a
link to the actual file? Could you share your load script?I use this xml sample:
http://w3.energistics.org/schema/prodml_v1.2.0_data/XML_Examples/productVolume_no_xsl.xmlBasically i loaded the xml to table u 100 times. Load script attached.
Peter, while reviewing the better performing patch myself, now i think the
patch needs more work to be committed. The structuring of the method will
be confusing in the long term. I think i'll restructure the patch in the
next commitfest.
So i propose to break the patch:
1. We apply the current patch which uses xmlNodeCopy, so that the
long-standing bug will be fixed in postgres.
2. I'll work with the performance enhancement in the next commitfest.
Maybe for (2), the current better-performing patch can be viewed as PoC of
the expected performance.
Regards,
--
Ali Akbar
On Mon, Dec 15, 2014 at 9:05 AM, Ali Akbar <the.apaan@gmail.com> wrote:
Peter, while reviewing the better performing patch myself, now i think the
patch needs more work to be committed. The structuring of the method will be
confusing in the long term. I think I'll restructure the patch in the next
commitfest.
So i propose to break the patch:
1. We apply the current patch which uses xmlNodeCopy, so that the
long-standing bug will be fixed in postgres.
2. I'll work with the performance enhancement in the next commitfest.Maybe for (2), the current better-performing patch can be viewed as PoC of
the expected performance.
Ali, are you currently working on that? Would you mind re-creating new
entries in the commit fest app for the new set of patches that you are
planning to do?
For now I am switching this patch as returned with feedback.
Thanks,
--
Michael
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
2014-12-15 10:19 GMT+07:00 Michael Paquier <michael.paquier@gmail.com>:
On Mon, Dec 15, 2014 at 9:05 AM, Ali Akbar <the.apaan@gmail.com> wrote:
Peter, while reviewing the better performing patch myself, now i think
the
patch needs more work to be committed. The structuring of the method
will be
confusing in the long term. I think I'll restructure the patch in the
next
commitfest.
So i propose to break the patch:
1. We apply the current patch which uses xmlNodeCopy, so that the
long-standing bug will be fixed in postgres.
2. I'll work with the performance enhancement in the next commitfest.Maybe for (2), the current better-performing patch can be viewed as PoC
of
the expected performance.
Ali, are you currently working on that? Would you mind re-creating new
entries in the commit fest app for the new set of patches that you are
planning to do?
For now I am switching this patch as returned with feedback.
Thanks,
What i mean, the last patch (v7 patch) as it is is enough to fix the bug
(nested xpath namespace problem). I think the performance regression is
still acceptable (in my case it's ~20%), because the bug is severe.
Currently, xpath can return invalid xml because the namespace isn't
included in the output!
What i'll be working is the v4 patch, because it turns out the v4 patch has
better performance (~10%, but Peter's test shows it isn't the case). But,
the problem is the v4 patch is organized wrongly, and hacks around the
libxml's xml node structure (duplicating the namespace on the existing
structure). I'll work on that, but it will affects the performance benefit.
So what i propose is, we close the longstanding bug in this comitfest with
the v7 patch. I'll work on improving the performance, without compromising
good code structure. If the result is good, i'll submit the patch.
Thanks
Regards,
--
Ali Akbar
2014-12-15 11:02 GMT+07:00 Ali Akbar <the.apaan@gmail.com>:
2014-12-15 10:19 GMT+07:00 Michael Paquier <michael.paquier@gmail.com>:
On Mon, Dec 15, 2014 at 9:05 AM, Ali Akbar <the.apaan@gmail.com> wrote:
Peter, while reviewing the better performing patch myself, now i think
the
patch needs more work to be committed. The structuring of the method
will be
confusing in the long term. I think I'll restructure the patch in the
next
commitfest.
So i propose to break the patch:
1. We apply the current patch which uses xmlNodeCopy, so that the
long-standing bug will be fixed in postgres.
2. I'll work with the performance enhancement in the next commitfest.Maybe for (2), the current better-performing patch can be viewed as PoC
of
the expected performance.
Ali, are you currently working on that? Would you mind re-creating new
entries in the commit fest app for the new set of patches that you are
planning to do?
For now I am switching this patch as returned with feedback.
Thanks,What i mean, the last patch (v7 patch) as it is is enough to fix the bug
(nested xpath namespace problem). I think the performance regression is
still acceptable (in my case it's ~20%), because the bug is severe.
Currently, xpath can return invalid xml because the namespace isn't
included in the output!
Sorry, typo here. What i mean isn't "invalid xml", but "incomplete xml".
Hence the next call to xpath with the previous result as its input will
become a problem because the namespace will not match.
What i'll be working is the v4 patch, because it turns out the v4 patch
has better performance (~10%, but Peter's test shows it isn't the case).
But, the problem is the v4 patch is organized wrongly, and hacks around the
libxml's xml node structure (duplicating the namespace on the existing
structure). I'll work on that, but it will affects the performance benefit.So what i propose is, we close the longstanding bug in this comitfest with
the v7 patch. I'll work on improving the performance, without compromising
good code structure. If the result is good, i'll submit the patch.Thanks
Regards,
--
Ali Akbar
On Mon, Dec 15, 2014 at 1:02 PM, Ali Akbar <the.apaan@gmail.com> wrote:
2014-12-15 10:19 GMT+07:00 Michael Paquier <michael.paquier@gmail.com>:
On Mon, Dec 15, 2014 at 9:05 AM, Ali Akbar <the.apaan@gmail.com> wrote:
Peter, while reviewing the better performing patch myself, now i think
the
patch needs more work to be committed. The structuring of the method
will be
confusing in the long term. I think I'll restructure the patch in the
next
commitfest.
So i propose to break the patch:
1. We apply the current patch which uses xmlNodeCopy, so that the
long-standing bug will be fixed in postgres.
2. I'll work with the performance enhancement in the next commitfest.Maybe for (2), the current better-performing patch can be viewed as PoC
of
the expected performance.Ali, are you currently working on that? Would you mind re-creating new
entries in the commit fest app for the new set of patches that you are
planning to do?
For now I am switching this patch as returned with feedback.
Thanks,What i mean, the last patch (v7 patch) as it is is enough to fix the bug
(nested xpath namespace problem). I think the performance regression is
still acceptable (in my case it's ~20%), because the bug is severe.
Currently, xpath can return invalid xml because the namespace isn't included
in the output!What i'll be working is the v4 patch, because it turns out the v4 patch has
better performance (~10%, but Peter's test shows it isn't the case). But,
the problem is the v4 patch is organized wrongly, and hacks around the
libxml's xml node structure (duplicating the namespace on the existing
structure). I'll work on that, but it will affects the performance benefit.So what i propose is, we close the longstanding bug in this comitfest with
the v7 patch. I'll work on improving the performance, without compromising
good code structure. If the result is good, i'll submit the patch.
OK. Could you then move this patch to the new CF with Needs Review
with Peter as reviewer then? He seems to be looking at it anyway
seeing the update from 12/11.
--
Michael
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
2014-12-15 11:06 GMT+07:00 Michael Paquier <michael.paquier@gmail.com>:
On Mon, Dec 15, 2014 at 1:02 PM, Ali Akbar <the.apaan@gmail.com> wrote:
2014-12-15 10:19 GMT+07:00 Michael Paquier <michael.paquier@gmail.com>:
On Mon, Dec 15, 2014 at 9:05 AM, Ali Akbar <the.apaan@gmail.com> wrote:
Peter, while reviewing the better performing patch myself, now i think
the
patch needs more work to be committed. The structuring of the method
will be
confusing in the long term. I think I'll restructure the patch in the
next
commitfest.
So i propose to break the patch:
1. We apply the current patch which uses xmlNodeCopy, so that the
long-standing bug will be fixed in postgres.
2. I'll work with the performance enhancement in the next commitfest.Maybe for (2), the current better-performing patch can be viewed as
PoC
of
the expected performance.Ali, are you currently working on that? Would you mind re-creating new
entries in the commit fest app for the new set of patches that you are
planning to do?
For now I am switching this patch as returned with feedback.
Thanks,What i mean, the last patch (v7 patch) as it is is enough to fix the bug
(nested xpath namespace problem). I think the performance regression is
still acceptable (in my case it's ~20%), because the bug is severe.
Currently, xpath can return invalid xml because the namespace isn'tincluded
in the output!
What i'll be working is the v4 patch, because it turns out the v4 patch
has
better performance (~10%, but Peter's test shows it isn't the case). But,
the problem is the v4 patch is organized wrongly, and hacks around the
libxml's xml node structure (duplicating the namespace on the existing
structure). I'll work on that, but it will affects the performancebenefit.
So what i propose is, we close the longstanding bug in this comitfest
with
the v7 patch. I'll work on improving the performance, without
compromising
good code structure. If the result is good, i'll submit the patch.
OK. Could you then move this patch to the new CF with Needs Review
with Peter as reviewer then? He seems to be looking at it anyway
seeing the update from 12/11.
OK. Moved to the new CF.
Regards,
--
Ali Akbar
committed version 7
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Peter Eisentraut <peter_e@gmx.net> writes:
committed version 7
Isn't that a back-patchable bug fix?
regards, tom lane
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Peter Eisentraut <peter_e@gmx.net> writes:
committed version 7
Thanks!
2015-01-07 13:28 GMT+07:00 Tom Lane <tgl@sss.pgh.pa.us>:
Isn't that a back-patchable bug fix?
Upthread, i noted:
For back versions, i think because this patch changes xpath() behavior, we
will only apply this to future versions. The old behavior is wrong
(according to XPath standard) for not including namespaces, but maybe there
are some application that depends on the old behavior.
Reviewing the behavior on 9.3, now i think the old behavior isn't usable
(the resulting xml is not even processable in postgres):
# select unnest(xpath('//a:b', '<b:a
xmlns:b="http://test.com/a"><b:b>1</b:b><b:b>2</b:b></b:a>'::xml,
array[array['a','http://test.com/a']]));
unnest
--------------
<b:b>1</b:b>
<b:b>2</b:b>
(2 rows)
# select xpath('//b:b', unnest(xpath('//a:b', '<b:a xmlns:b="
http://test.com/a"><b:b>1</b:b><b:b>2</b:b></b:a>'::xml, array[array['a','
http://test.com/a']])));
ERROR: could not parse XML document
DETAIL: line 1: Namespace prefix b on b is not defined
<b:b>1</b:b>
Maybe some application uses the result directly, but correct xml-using
applications should handle namespace correctly, so if '<b:b>1</b:b>'
becomes '<b:b xmlns:b="http://test.com/a' >1</b:b>', there should be no
issue in those applications.
So now +1 for back-patching this.
Regards,
--
Ali Akbar
On 1/7/15 8:51 PM, Ali Akbar wrote:
So now +1 for back-patching this.
Done. Was a bit of work to get it into all the old versions.
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
2015-01-18 10:44 GMT+07:00 Peter Eisentraut <peter_e@gmx.net>:
On 1/7/15 8:51 PM, Ali Akbar wrote:
So now +1 for back-patching this.
Done. Was a bit of work to get it into all the old versions.
Wow. Thanks.
Btw, for bug-fix patches like this, should the patch creator (me) also
create patches for back branches?
Regards,
--
Ali Akbar
On Sun, Jan 18, 2015 at 06:05:05PM +0700, Ali Akbar wrote:
2015-01-18 10:44 GMT+07:00 Peter Eisentraut <peter_e@gmx.net>:
On 1/7/15 8:51 PM, Ali Akbar wrote:
So now +1 for back-patching this.
Done. Was a bit of work to get it into all the old versions.
Wow. Thanks.
Btw, for bug-fix patches like this, should the patch creator (me) also
create patches for back branches?
As I understand it, back-patches are the committer's responsibility.
The submitter might make suggestions as to how this might be
approached if it doesn't appear trivial.
Cheers,
David.
--
David Fetter <david@fetter.org> http://fetter.org/
Phone: +1 415 235 3778 AIM: dfetter666 Yahoo!: dfetter
Skype: davidfetter XMPP: david.fetter@gmail.com
Remember to vote!
Consider donating to Postgres: http://www.postgresql.org/about/donate
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Mon, Jan 19, 2015 at 2:38 AM, David Fetter <david@fetter.org> wrote:
On Sun, Jan 18, 2015 at 06:05:05PM +0700, Ali Akbar wrote:
2015-01-18 10:44 GMT+07:00 Peter Eisentraut <peter_e@gmx.net>:
Btw, for bug-fix patches like this, should the patch creator (me) also
create patches for back branches?As I understand it, back-patches are the committer's responsibility.
The submitter might make suggestions as to how this might be
approached if it doesn't appear trivial.
TBH, I would imagine that patches that can be applied to back-branches
are a better start point than plain scratch particularly if there are
diffs in stable branches compared to HEAD. Everybody's time is
important.
--
Michael
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Michael Paquier <michael.paquier@gmail.com> writes:
On Mon, Jan 19, 2015 at 2:38 AM, David Fetter <david@fetter.org> wrote:
On Sun, Jan 18, 2015 at 06:05:05PM +0700, Ali Akbar wrote:
2015-01-18 10:44 GMT+07:00 Peter Eisentraut <peter_e@gmx.net>:
Btw, for bug-fix patches like this, should the patch creator (me) also
create patches for back branches?
As I understand it, back-patches are the committer's responsibility.
The submitter might make suggestions as to how this might be
approached if it doesn't appear trivial.
TBH, I would imagine that patches that can be applied to back-branches
are a better start point than plain scratch particularly if there are
diffs in stable branches compared to HEAD. Everybody's time is
important.
Yeah --- and I'd argue that it's largely a waste of time to work on
creating back-branch patches until the HEAD patch is in final form.
Since we've generally reserved the right for the committer to whack
patches around before committing, I think this means the committer
also has to do the work of back-patch adjustment.
Now a committer who doesn't feel like doing that could certainly say
"here's the version of the HEAD patch that I'm willing to commit, but
it doesn't apply cleanly in back branches; could you work up back-branch
equivalents?". But that hasn't been the usual approach.
regards, tom lane
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers