Bug in comparison of empty jsonb arrays to scalars

Started by Nikita Glukhovabout 9 years ago9 messages

n.gluhov@postgrespro.ru

about 9 years ago

1 attachment(s)

Hi hackers.

While working on jsonbstatistics, I found the following bug:
an empty jsonb array is considered to be lesser than any scalar,
but it is expected that objects > arrays > scalars.

# select '[]'::jsonb < 'null'::jsonb;
?column?
----------
t
(1 row)

Attached patch contains:
1. bug fix (added the missing "else" in compareJsonbContainers())
2. regression test
3. pg_upgrade: invalidation of btree indexes on jsonb columns and
REINDEX-script generation

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Attachments:

fix_empty_jsonb_array_comparison_v01.patchtext/x-patch; name=fix_empty_jsonb_array_comparison_v01.patchDownload

diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c
index ddc34ce..43934bf 100644
--- a/src/backend/utils/adt/jsonb_util.c
+++ b/src/backend/utils/adt/jsonb_util.c
@@ -232,7 +232,7 @@ compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
 						 */
 						if (va.val.array.rawScalar != vb.val.array.rawScalar)
 							res = (va.val.array.rawScalar) ? -1 : 1;
-						if (va.val.array.nElems != vb.val.array.nElems)
+						else if (va.val.array.nElems != vb.val.array.nElems)
 							res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1;
 						break;
 					case jbvObject:
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index 42bf499..81c1616 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -115,6 +115,11 @@ check_and_dump_old_cluster(bool live_check)
 	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
 		new_9_0_populate_pg_largeobject_metadata(&old_cluster, true);
 
+	/* Pre-PG 10.0 had bug in jsonb comparison operator  */
+	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 906 &&
+		GET_MAJOR_VERSION(old_cluster.major_version) >= 904)
+		old_9_6_invalidate_jsonb_btree_indexes(&old_cluster, true);
+
 	/*
 	 * While not a check option, we do this now because this is the only time
 	 * the old server is running.
@@ -166,11 +171,26 @@ report_clusters_compatible(void)
 void
 issue_warnings(void)
 {
-	/* Create dummy large object permissions for old < PG 9.0? */
-	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
+	bool need_new_9_0_populate_pg_largeobject_metadata =
+			GET_MAJOR_VERSION(old_cluster.major_version) <= 804;
+
+	bool need_old_9_6_invalidate_jsonb_btree_indexes =
+			GET_MAJOR_VERSION(old_cluster.major_version) <= 906 &&
+			GET_MAJOR_VERSION(old_cluster.major_version) >= 904;
+
+	if (need_new_9_0_populate_pg_largeobject_metadata ||
+		need_old_9_6_invalidate_jsonb_btree_indexes)
 	{
 		start_postmaster(&new_cluster, true);
-		new_9_0_populate_pg_largeobject_metadata(&new_cluster, false);
+
+		/* Create dummy large object permissions for old < PG 9.0? */
+		if (need_new_9_0_populate_pg_largeobject_metadata)
+			new_9_0_populate_pg_largeobject_metadata(&new_cluster, false);
+
+		/* invalidate jsonb btree indexes for old < PG 10.0  */
+		if (need_old_9_6_invalidate_jsonb_btree_indexes)
+			old_9_6_invalidate_jsonb_btree_indexes(&new_cluster, false);
+
 		stop_postmaster(false);
 	}
 }
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 19dca83..07e0ca6 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -442,6 +442,8 @@ void		pg_putenv(const char *var, const char *val);
 void new_9_0_populate_pg_largeobject_metadata(ClusterInfo *cluster,
 										 bool check_mode);
 void		old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster);
+void old_9_6_invalidate_jsonb_btree_indexes(ClusterInfo *cluster,
+											bool check_mode);
 
 /* parallel.c */
 void parallel_exec_prog(const char *log_file, const char *opt_log_file,
diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c
index 3c7c5fa..b1a3b89 100644
--- a/src/bin/pg_upgrade/version.c
+++ b/src/bin/pg_upgrade/version.c
@@ -10,6 +10,7 @@
 #include "postgres_fe.h"
 
 #include "pg_upgrade.h"
+#include "catalog/pg_type.h"
 #include "fe_utils/string_utils.h"
 
 
@@ -185,3 +186,116 @@ old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster)
 	else
 		check_ok();
 }
+
+/*
+ * old_9_6_invalidate_jsonb_btree_indexes()
+ *	9.4-9.6 -> 10.0
+ *	Btree index ordering for jsonb had been fixed in 10.0
+ */
+void
+old_9_6_invalidate_jsonb_btree_indexes(ClusterInfo *cluster, bool check_mode)
+{
+	int			dbnum;
+	FILE	   *script = NULL;
+	bool		found = false;
+	char		output_path[MAXPGPATH];
+
+	prep_status("Checking for jsonb btree indexes existence");
+
+	snprintf(output_path, sizeof(output_path), "reindex_jsonb_btree.sql");
+
+	for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
+	{
+		PGresult   *res;
+		bool		db_used = false;
+		int			ntups;
+		int			rowno;
+		int			i_nspname,
+					i_relname;
+		DbInfo	   *active_db = &cluster->dbarr.dbs[dbnum];
+		PGconn	   *conn = connectToServer(cluster, active_db->db_name);
+
+		/* find jsonb btree indexes */
+		res = executeQueryOrDie(conn,
+								"SELECT DISTINCT n.nspname, c.relname "
+								"FROM	pg_catalog.pg_class c, "
+								"		pg_catalog.pg_index i, "
+								"		pg_catalog.pg_am am, "
+								"		pg_catalog.pg_namespace n, "
+								"		pg_catalog.pg_attribute a "
+								"WHERE	i.indexrelid = c.oid AND "
+								"		c.relam = am.oid AND "
+								"		c.relnamespace = n.oid AND "
+								"		a.attrelid = c.oid AND "
+								"		a.atttypid = " CppAsString2(JSONBOID) " AND "
+								"		am.amname = 'btree'");
+
+		ntups = PQntuples(res);
+		i_nspname = PQfnumber(res, "nspname");
+		i_relname = PQfnumber(res, "relname");
+
+		for (rowno = 0; rowno < ntups; rowno++)
+		{
+			found = true;
+			if (!check_mode)
+			{
+				if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
+					pg_fatal("could not open file \"%s\": %s\n", output_path, strerror(errno));
+				if (!db_used)
+				{
+					fprintf(script, "\\connect %s\n",
+							quote_identifier(active_db->db_name));
+					db_used = true;
+				}
+				fprintf(script, "REINDEX INDEX %s.%s;\n",
+						quote_identifier(PQgetvalue(res, rowno, i_nspname)),
+						quote_identifier(PQgetvalue(res, rowno, i_relname)));
+			}
+		}
+
+		PQclear(res);
+
+		if (!check_mode && found)
+			/* mark jsonb btree indexes as invalid */
+			PQclear(executeQueryOrDie(conn,
+									  "UPDATE pg_catalog.pg_index i "
+									  "SET	indisvalid = false "
+									  "FROM	pg_catalog.pg_class c, "
+									  "		pg_catalog.pg_am am, "
+									  "		pg_catalog.pg_namespace n, "
+									  "		pg_catalog.pg_attribute a "
+									  "WHERE	i.indexrelid = c.oid AND "
+									  "		c.relam = am.oid AND "
+									  "		c.relnamespace = n.oid AND "
+									  "		a.attrelid = c.oid AND "
+									  "		a.atttypid = " CppAsString2(JSONBOID) " AND "
+									  "		am.amname = 'btree'"));
+
+		PQfinish(conn);
+	}
+
+	if (script)
+		fclose(script);
+
+	if (found)
+	{
+		report_status(PG_WARNING, "warning");
+		if (check_mode)
+			pg_log(PG_WARNING, "\n"
+				   "Your installation contains jsonb btree indexes.  These indexes have\n"
+				   "different comparison rules between your old and new clusters, so they\n"
+				   "must be reindexed with the REINDEX command.  After upgrading, you will\n"
+				   "be given REINDEX instructions.\n\n");
+		else
+			pg_log(PG_WARNING, "\n"
+				   "Your installation contains jsonb btree indexes.  These indexes have\n"
+				   "different comparison rules between your old and new clusters, so they\n"
+				   "must be reindexed with the REINDEX command.  The file:\n"
+				   "    %s\n"
+				   "when executed by psql by the database superuser will recreate all invalid\n"
+				   "indexes; until then, none of these indexes will be used.\n\n",
+				   output_path);
+	}
+	else
+		check_ok();
+}
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index e2cb08a..203088d 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -2256,6 +2256,13 @@ SELECT count(*) FROM testjsonb WHERE j = '{"pos":98, "line":371, "node":"CBA", "
      1
 (1 row)
 
+-- bug in comparison of [] and scalars
+SELECT '[]'::jsonb > 'null'::jsonb;
+ ?column? 
+----------
+ t
+(1 row)
+
 --gin path opclass
 DROP INDEX jidx;
 CREATE INDEX jidx ON testjsonb USING gin (j jsonb_path_ops);
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index 6b4c796..2d37c4f 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -580,6 +580,9 @@ SET enable_seqscan = off;
 SELECT count(*) FROM testjsonb WHERE j > '{"p":1}';
 SELECT count(*) FROM testjsonb WHERE j = '{"pos":98, "line":371, "node":"CBA", "indexed":true}';
 
+-- bug in comparison of [] and scalars
+SELECT '[]'::jsonb > 'null'::jsonb;
+
 --gin path opclass
 DROP INDEX jidx;
 CREATE INDEX jidx ON testjsonb USING gin (j jsonb_path_ops);

Michael Paquier

michael.paquier@gmail.com

about 9 years ago

In reply to: Nikita Glukhov (#1)

Re: Bug in comparison of empty jsonb arrays to scalars

On Wed, Nov 9, 2016 at 8:31 AM, Nikita Glukhov <n.gluhov@postgrespro.ru> wrote:

Hi hackers.

While working on jsonbstatistics, I found the following bug:
an empty jsonb array is considered to be lesser than any scalar,
but it is expected that objects > arrays > scalars.

Sources? Does the JSON spec contain any information regarding
comparison operators? I don't think so, so that would be up to the
implementation to decide that, no?

Btw I would agree with you that's quite unintuitive, but that's not
wrong either to keep the current comparison algorithm because that's
harmless for btree. We could have more regression tests to make the
current behavior clear though. Thoughts from others are welcome.
--
Michael

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Robert Haas

robertmhaas@gmail.com

about 9 years ago

In reply to: Michael Paquier (#2)

Re: Bug in comparison of empty jsonb arrays to scalars

On Tue, Nov 8, 2016 at 9:49 PM, Michael Paquier
<michael.paquier@gmail.com> wrote:

On Wed, Nov 9, 2016 at 8:31 AM, Nikita Glukhov <n.gluhov@postgrespro.ru> wrote:

Hi hackers.

While working on jsonbstatistics, I found the following bug:
an empty jsonb array is considered to be lesser than any scalar,
but it is expected that objects > arrays > scalars.

Sources?

How about "our documentation"?

https://www.postgresql.org/docs/current/static/datatype-json.html

Look at the last page.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Michael Paquier

michael.paquier@gmail.com

about 9 years ago

In reply to: Robert Haas (#3)

Re: Bug in comparison of empty jsonb arrays to scalars

On Thu, Nov 10, 2016 at 3:27 AM, Robert Haas <robertmhaas@gmail.com> wrote:

On Tue, Nov 8, 2016 at 9:49 PM, Michael Paquier
<michael.paquier@gmail.com> wrote:

On Wed, Nov 9, 2016 at 8:31 AM, Nikita Glukhov <n.gluhov@postgrespro.ru> wrote:

Hi hackers.

While working on jsonbstatistics, I found the following bug:
an empty jsonb array is considered to be lesser than any scalar,
but it is expected that objects > arrays > scalars.

Sources?

How about "our documentation"?

https://www.postgresql.org/docs/current/static/datatype-json.html

Indeed, I missed that. So that's broken...
--
Michael

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Tom Lane

tgl@sss.pgh.pa.us

about 9 years ago

In reply to: Michael Paquier (#4)

Re: Bug in comparison of empty jsonb arrays to scalars

Michael Paquier <michael.paquier@gmail.com> writes:

On Thu, Nov 10, 2016 at 3:27 AM, Robert Haas <robertmhaas@gmail.com> wrote:

https://www.postgresql.org/docs/current/static/datatype-json.html

Indeed, I missed that. So that's broken...

Given that nobody actually cares what that sort order is, I think that
having to jump through hoops in pg_upgrade in order to fix it is not a
great tradeoff. I suggest changing the documentation to match the code.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Michael Paquier

michael.paquier@gmail.com

about 9 years ago

In reply to: Tom Lane (#5)

1 attachment(s)

Re: Bug in comparison of empty jsonb arrays to scalars

On Thu, Nov 10, 2016 at 7:37 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Michael Paquier <michael.paquier@gmail.com> writes:

On Thu, Nov 10, 2016 at 3:27 AM, Robert Haas <robertmhaas@gmail.com> wrote:

https://www.postgresql.org/docs/current/static/datatype-json.html

Indeed, I missed that. So that's broken...

Given that nobody actually cares what that sort order is, I think that
having to jump through hoops in pg_upgrade in order to fix it is not a
great tradeoff. I suggest changing the documentation to match the code.

Yes, definitely.
=# create table json_data (a jsonb);
CREATE TABLE
=# INSERT INTO json_data values ('{}'::jsonb), ('[]'::jsonb),
('null'::jsonb), ('true'::jsonb), ('1'::jsonb), ('""'::jsonb);
INSERT 0 6
=# SELECT * FROM json_data ORDER BY 1 DESC;
a
------
{}
true
1
""
null
[]
(6 rows)
So that's object > boolean > integer > string > NULL > array.

And attached is a patch.
--
Michael

Attachments:

json-ordering.patchtext/plain; charset=US-ASCII; name=json-ordering.patchDownload

diff --git a/doc/src/sgml/json.sgml b/doc/src/sgml/json.sgml
index 3cf78d6..b2688ff 100644
--- a/doc/src/sgml/json.sgml
+++ b/doc/src/sgml/json.sgml
@@ -542,7 +542,7 @@ SELECT jdoc-&gt;'guid', jdoc-&gt;'name' FROM api WHERE jdoc @&gt; '{"tags": ["qu
     The <literal>btree</> ordering for <type>jsonb</> datums is seldom
     of great interest, but for completeness it is:
 <synopsis>
-<replaceable>Object</replaceable> > <replaceable>Array</replaceable> > <replaceable>Boolean</replaceable> > <replaceable>Number</replaceable> > <replaceable>String</replaceable> > <replaceable>Null</replaceable>
+<replaceable>Object</replaceable> > <replaceable>Boolean</replaceable> > <replaceable>Number</replaceable> > <replaceable>String</replaceable> > <replaceable>Null</replaceable> > <replaceable>Array</replaceable>
 
 <replaceable>Object with n pairs</replaceable> > <replaceable>object with n - 1 pairs</replaceable>

Ali Akbar

the.apaan@gmail.com

about 9 years ago

In reply to: Michael Paquier (#6)

Re: Bug in comparison of empty jsonb arrays to scalars

2016-11-10 13:54 GMT+07:00 Michael Paquier <michael.paquier@gmail.com>:

On Thu, Nov 10, 2016 at 7:37 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Given that nobody actually cares what that sort order is, I think that
having to jump through hoops in pg_upgrade in order to fix it is not a
great tradeoff. I suggest changing the documentation to match the code.

Don't you in this case think we should match sort order in javascript?

Yes, definitely.
=# create table json_data (a jsonb);
CREATE TABLE
=# INSERT INTO json_data values ('{}'::jsonb), ('[]'::jsonb),
('null'::jsonb), ('true'::jsonb), ('1'::jsonb), ('""'::jsonb);
INSERT 0 6
=# SELECT * FROM json_data ORDER BY 1 DESC;
a
------
{}
true
1
""
null
[]
(6 rows)
So that's object > boolean > integer > string > NULL > array.

a = [{}, [], null, true, 1, '""']

[ {}, [], null, true, 1, '""' ]

a.sort()

[ [], '""', 1, {}, null, true ]

a.reverse()

[ true, null, {}, 1, '""', [] ]

So in this case it's boolean > NULL > Object > integer > string > array
(tried in Chromium 53, Firefox 49 and Node v6.9.1)

When I tried to search for the ECMA Standard for this behavior, i found
this: http://blog.rodneyrehm.de/archives/14-Sorting-Were-Doing-It-Wrong.html.
There are problems about automatic conversion in javascript, like this:

a = [{}, [], null, true, 1, 'someotherstring']

[ {}, [], null, true, 1, 'someotherstring' ]

a.sort().reverse()

[ true, 'someotherstring', null, {}, 1, [] ]

versus this:

a = [{}, [], null, true, 1, 'SomeOtherString']

[ {}, [], null, true, 1, 'SomeOtherString' ]

a.sort().reverse()

[ true, null, {}, 'SomeOtherString', 1, [] ]

and this:

a = [{}, [], null, true, 1, '2']

[ {}, [], null, true, 1, '2' ]

a.sort().reverse()

[ true, null, {}, '2', 1, [] ]

So we can't replicate javascript sort order without emulating those.

Regards,
Ali Akbar

Nikita Glukhov

n.gluhov@postgrespro.ru

about 9 years ago

In reply to: Michael Paquier (#6)

Re: Bug in comparison of empty jsonb arrays to scalars

On 10.11.2016 09:54, Michael Paquier wrote:

Yes, definitely.
=# create table json_data (a jsonb);
CREATE TABLE
=# INSERT INTO json_data values ('{}'::jsonb), ('[]'::jsonb),
('null'::jsonb), ('true'::jsonb), ('1'::jsonb), ('""'::jsonb);
INSERT 0 6
=# SELECT * FROM json_data ORDER BY 1 DESC;
a
------
{}
true
1
""
null
[]
(6 rows)
So that's object > boolean > integer > string > NULL > array.

And attached is a patch.

Perhaps I did not explain it clearly enough, but only *empty top-level*
arrays are out of the correct order.
See complete example:

=# SELECT * FROM (VALUES
('null'::jsonb), ('0'), ('""'), ('true'), ('[]'), ('{}'),
('[null]'), ('[0][true] [[]] [{}] {} {"a": null} {"a": ""} {"a": 0} {"a": true} {"a": []} {"a": {}} (18 rows)'), ('[""]'), ('[true]'), ('[[]]'), ('[{}]'),
('{"a": null}'), ('{"a": 0}'), ('{"a": ""}'), ('{"a": true}'),
('{"a": []}'), ('{"a": {}}')
) valsORDER BY 1;
column1
-------------
[]
null
""
0
true
[null]
[""]
[0]: [true] [[]] [{}] {} {"a": null} {"a": ""} {"a": 0} {"a": true} {"a": []} {"a": {}} (18 rows)
[true]
[[]]
[{}]
{}
{"a": null}
{"a": ""}
{"a": 0}
{"a": true}
{"a": []}
{"a": {}}
(18 rows)

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Tom Lane

tgl@sss.pgh.pa.us

about 9 years ago

In reply to: Michael Paquier (#6)

Re: Bug in comparison of empty jsonb arrays to scalars

Michael Paquier <michael.paquier@gmail.com> writes:

On Thu, Nov 10, 2016 at 7:37 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Given that nobody actually cares what that sort order is, I think that
having to jump through hoops in pg_upgrade in order to fix it is not a
great tradeoff. I suggest changing the documentation to match the code.

Yes, definitely.
So that's object > boolean > integer > string > NULL > array.

No, because the issue is that empty and nonempty arrays sort differently.

regression=# create table json_data (a jsonb);
CREATE TABLE
regression=# INSERT INTO json_data values ('{}'::jsonb), ('[]'::jsonb),
regression-# ('null'::jsonb), ('true'::jsonb), ('1'::jsonb), ('""'::jsonb),
regression-# ('[42]true 1 "" null [] (8 rows)'::jsonb),('[[43]]'::jsonb);
INSERT 0 8
regression=# SELECT * FROM json_data ORDER BY 1 DESC;
a
--------
{}
[[43]]
[42]: true 1 "" null [] (8 rows)
true
1
""
null
[]
(8 rows)

And attached is a patch.

If we go with the fix-the-docs approach, we'll need to have two entries
for empty and nonempty arrays. It's definitely ugly. Still, my judgement
is that it's not worth the pain of changing the behavior. It was never
intended that this sort order be anything but an implementation detail.

(I guess another approach is to not document the order at all ...)

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers