Add jsonb_compact(...) for whitespace-free jsonb to text

Started by Sehrope Sarkuniover 9 years ago30 messages

sehrope@jackdb.com

over 9 years ago

1 attachment(s)

Hi,

The default text representation of jsonb adds whitespace in between
key/value pairs (after the colon ":") and after successive properties
(after the comma ","):

postgres=# SELECT '{"b":2,"a":1}'::jsonb::text;
text
------------------
{"a": 1, "b": 2}
(1 row)

AFAIK, there's also no guarantee on the specific order of the resulting
properties in the text representation either. I would suppose it's fixed
for a given jsonb value within a database major version but across major
versions it could change (if the underlying representation changes).

I originally ran into this when comparing the hashes of the text
representation of jsonb columns. The results didn't match up because the
javascript function JSON.stringify(...) does not add any extra whitespace.

It'd be nice to have a stable text representation of a jsonb value with
minimal whitespace. The latter would also save a few bytes per record in
text output formats, on the wire, and in backups (ex: COPY ... TO STDOUT).

Attached is a *very* work in progress patch that adds a
jsonb_compact(jsonb)::text function. It generates a text representation
without extra whitespace but does not yet try to enforce a stable order of
the properties within a jsonb value.

Here's some sample usage:

postgres=# SELECT x::text, jsonb_compact(x) FROM (VALUES ('{}'::jsonb),
('{"a":1,"b":2}'), ('[1,2,3]'), ('{"a":{"b":1}}')) AS t(x);
x | jsonb_compact
------------------+---------------
{} | {}
{"a": 1, "b": 2} | {"a":1,"b":2}
[1, 2, 3] | [1,2,3]
{"a": {"b": 1}} | {"a":{"b":1}}
(4 rows)

There aren't any tests yet but I'd like to continue working on it for
inclusion in 9.7. I'm posting it now to see if there's interest in the idea
and get some feedback on the approach (this is my first real patch...).

Regards,
-- Sehrope Sarkuni
Founder & CEO | JackDB, Inc. | https://www.jackdb.com/

Attachments:

add_jsonb_compact.patchtext/x-patch; charset=US-ASCII; name=add_jsonb_compact.patchDownload

diff --git a/src/backend/utils/adt/jsonb.c b/src/backend/utils/adt/jsonb.c
index 256ef80..1ecc17a 100644
--- a/src/backend/utils/adt/jsonb.c
+++ b/src/backend/utils/adt/jsonb.c
@@ -60,6 +60,13 @@ typedef struct JsonbAggState
 	Oid			val_output_func;
 } JsonbAggState;
 
+typedef enum                 /* type categories for jsonb to string whitespace */
+{
+	JSONBWHITESPACE_DEFAULT, /* Default style with space between keys but no indentation */
+	JSONBWHITESPACE_INDENT,  /* Extra indentation of four spaces for each nested level */
+	JSONBWHITESPACE_COMPACT  /* Compact style without extra whitespace between keys */
+} JsonbWhitespaceStyle;
+
 static inline Datum jsonb_from_cstring(char *json, int len);
 static size_t checkStringLen(size_t len);
 static void jsonb_in_object_start(void *pstate);
@@ -86,7 +93,7 @@ static void datum_to_jsonb(Datum val, bool is_null, JsonbInState *result,
 static void add_jsonb(Datum val, bool is_null, JsonbInState *result,
 		  Oid val_type, bool key_scalar);
 static JsonbParseState *clone_parse_state(JsonbParseState *state);
-static char *JsonbToCStringWorker(StringInfo out, JsonbContainer *in, int estimated_len, bool indent);
+static char *JsonbToCStringWorker(StringInfo out, JsonbContainer *in, int estimated_len, JsonbWhitespaceStyle whitespace_style);
 static void add_indent(StringInfo out, bool indent, int level);
 
 /*
@@ -427,7 +434,7 @@ jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype)
 char *
 JsonbToCString(StringInfo out, JsonbContainer *in, int estimated_len)
 {
-	return JsonbToCStringWorker(out, in, estimated_len, false);
+	return JsonbToCStringWorker(out, in, estimated_len, JSONBWHITESPACE_DEFAULT);
 }
 
 /*
@@ -436,14 +443,23 @@ JsonbToCString(StringInfo out, JsonbContainer *in, int estimated_len)
 char *
 JsonbToCStringIndent(StringInfo out, JsonbContainer *in, int estimated_len)
 {
-	return JsonbToCStringWorker(out, in, estimated_len, true);
+	return JsonbToCStringWorker(out, in, estimated_len, JSONBWHITESPACE_INDENT);
+}
+
+/*
+ * same thing but in compact form (no extra whitespace)
+ */
+char *
+JsonbToCStringCompact(StringInfo out, JsonbContainer *in, int estimated_len)
+{
+	return JsonbToCStringWorker(out, in, estimated_len, JSONBWHITESPACE_COMPACT);
 }
 
 /*
  * common worker for above two functions
  */
 static char *
-JsonbToCStringWorker(StringInfo out, JsonbContainer *in, int estimated_len, bool indent)
+JsonbToCStringWorker(StringInfo out, JsonbContainer *in, int estimated_len, JsonbWhitespaceStyle whitespace_style)
 {
 	bool		first = true;
 	JsonbIterator *it;
@@ -452,8 +468,24 @@ JsonbToCStringWorker(StringInfo out, JsonbContainer *in, int estimated_len, bool
 	int			level = 0;
 	bool		redo_switch = false;
 
-	/* If we are indenting, don't add a space after a comma */
-	int			ispaces = indent ? 1 : 2;
+	bool		indent = false;
+	char*		prop_sep          = ", "; /* Separator between successive properties */
+	char*		key_value_sep     = ": "; /* Separator between a key and it's value */
+	int 		prop_sep_len;
+	int 		key_value_sep_len;
+	if (whitespace_style == JSONBWHITESPACE_DEFAULT) {
+		// Use default separators
+	} else if (whitespace_style == JSONBWHITESPACE_INDENT) {
+		indent   = true;
+		prop_sep = ",";
+		key_value_sep  = ": ";
+	} else if (whitespace_style == JSONBWHITESPACE_COMPACT) {
+		indent   = false;
+		prop_sep = ",";
+		key_value_sep  = ":";
+	}
+	prop_sep_len      = strlen(prop_sep);
+	key_value_sep_len = strlen(key_value_sep);
 
 	/*
 	 * Don't indent the very first item. This gets set to the indent flag at
@@ -478,7 +510,7 @@ JsonbToCStringWorker(StringInfo out, JsonbContainer *in, int estimated_len, bool
 		{
 			case WJB_BEGIN_ARRAY:
 				if (!first)
-					appendBinaryStringInfo(out, ", ", ispaces);
+					appendBinaryStringInfo(out, prop_sep, prop_sep_len);
 
 				if (!v.val.array.rawScalar)
 				{
@@ -493,7 +525,7 @@ JsonbToCStringWorker(StringInfo out, JsonbContainer *in, int estimated_len, bool
 				break;
 			case WJB_BEGIN_OBJECT:
 				if (!first)
-					appendBinaryStringInfo(out, ", ", ispaces);
+					appendBinaryStringInfo(out, prop_sep, prop_sep_len);
 
 				add_indent(out, use_indent && !last_was_key, level);
 				appendStringInfoCharMacro(out, '{');
@@ -503,14 +535,14 @@ JsonbToCStringWorker(StringInfo out, JsonbContainer *in, int estimated_len, bool
 				break;
 			case WJB_KEY:
 				if (!first)
-					appendBinaryStringInfo(out, ", ", ispaces);
+					appendBinaryStringInfo(out, prop_sep, prop_sep_len);
 				first = true;
 
 				add_indent(out, use_indent, level);
 
 				/* json rules guarantee this is a string */
 				jsonb_put_escaped_value(out, &v);
-				appendBinaryStringInfo(out, ": ", 2);
+				appendBinaryStringInfo(out, key_value_sep, key_value_sep_len);
 
 				type = JsonbIteratorNext(&it, &v, false);
 				if (type == WJB_VALUE)
@@ -532,7 +564,7 @@ JsonbToCStringWorker(StringInfo out, JsonbContainer *in, int estimated_len, bool
 				break;
 			case WJB_ELEM:
 				if (!first)
-					appendBinaryStringInfo(out, ", ", ispaces);
+					appendBinaryStringInfo(out, prop_sep, prop_sep_len);
 				first = false;
 
 				if (!raw_scalar)
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index fb149dc..0ab324b 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -3353,6 +3353,22 @@ jsonb_pretty(PG_FUNCTION_ARGS)
 }
 
 /*
+ * SQL function jsonb_compact (jsonb)
+ *
+ * Compact-printed text for the jsonb
+ */
+Datum
+jsonb_compact(PG_FUNCTION_ARGS)
+{
+	Jsonb	   *jb = PG_GETARG_JSONB(0);
+	StringInfo	str = makeStringInfo();
+
+	JsonbToCStringCompact(str, &jb->root, VARSIZE(jb));
+
+	PG_RETURN_TEXT_P(cstring_to_text_with_len(str->data, str->len));
+}
+
+/*
  * SQL function jsonb_concat (jsonb, jsonb)
  *
  * function for || operator
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index bb539d4..0033071 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4880,6 +4880,8 @@ DATA(insert OID = 3305 (  jsonb_set    PGNSP PGUID 12 1 0 0 0 f f f f t f i s 4
 DESCR("Set part of a jsonb");
 DATA(insert OID = 3306 (  jsonb_pretty	   PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_pretty _null_ _null_ _null_ ));
 DESCR("Indented text from jsonb");
+DATA(insert OID = 3369 (  jsonb_compact	   PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_compact _null_ _null_ _null_ ));
+DESCR("Compact text from jsonb");
 DATA(insert OID = 3579 (  jsonb_insert    PGNSP PGUID 12 1 0 0 0 f f f f t f i s 4 0 3802 "3802 1009 3802 16" _null_ _null_ _null_ _null_ _null_ jsonb_insert _null_ _null_ _null_ ));
 DESCR("Insert value into a jsonb");
 /* txid */
diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h
index 5d8e4a9..4ce4fab 100644
--- a/src/include/utils/jsonb.h
+++ b/src/include/utils/jsonb.h
@@ -397,6 +397,9 @@ extern Datum gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS);
 /* pretty printer, returns text */
 extern Datum jsonb_pretty(PG_FUNCTION_ARGS);
 
+/* compact printer, returns text */
+extern Datum jsonb_compact(PG_FUNCTION_ARGS);
+
 /* concatenation */
 extern Datum jsonb_concat(PG_FUNCTION_ARGS);
 
@@ -435,6 +438,8 @@ extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
 			   int estimated_len);
 extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
 					 int estimated_len);
+extern char *JsonbToCStringCompact(StringInfo out, JsonbContainer *in,
+					 int estimated_len);
 
 
 #endif   /* __JSONB_H__ */

Andrew Dunstan

andrew@dunslane.net

over 9 years ago

In reply to: Sehrope Sarkuni (#1)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On 04/24/2016 06:02 PM, Sehrope Sarkuni wrote:

AFAIK, there's also no guarantee on the specific order of the
resulting properties in the text representation either. I would
suppose it's fixed for a given jsonb value within a database major
version but across major versions it could change (if the underlying
representation changes).

The order is fixed and very unlikely to change, as it was chosen quite
deliberately to help ensure efficient processing. Any change in on-disk
representation of data types is something we work very hard to avoid, as
it makes it impossible to run pg_upgrade.

It's true that the storage order of keys is not terribly intuitive.

Note that the json type, unlike jsonb, preserves exactly the white space
and key order of its input. In fact, the input is exactly what it stores.

cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Stephen Frost

sfrost@snowman.net

over 9 years ago

In reply to: Andrew Dunstan (#2)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

* Andrew Dunstan (andrew@dunslane.net) wrote:

On 04/24/2016 06:02 PM, Sehrope Sarkuni wrote:

AFAIK, there's also no guarantee on the specific order of the
resulting properties in the text representation either. I would
suppose it's fixed for a given jsonb value within a database major
version but across major versions it could change (if the
underlying representation changes).

The order is fixed and very unlikely to change, as it was chosen
quite deliberately to help ensure efficient processing. Any change
in on-disk representation of data types is something we work very
hard to avoid, as it makes it impossible to run pg_upgrade.

We do, from time-to-time, change on-disk formats in a
backwards-compatible way though. In any case, it's my understanding
that we don't *guarantee* any ordering currently and therefore we should
discourage users from depending on it. If we *are* going to guarantee
ordering, then we should document what that ordering is.

Thanks!

Stephen

Sehrope Sarkuni

sehrope@jackdb.com

over 9 years ago

In reply to: Stephen Frost (#3)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Sun, Apr 24, 2016 at 9:27 PM, Stephen Frost <sfrost@snowman.net> wrote:

* Andrew Dunstan (andrew@dunslane.net) wrote:

On 04/24/2016 06:02 PM, Sehrope Sarkuni wrote:

AFAIK, there's also no guarantee on the specific order of the
resulting properties in the text representation either. I would
suppose it's fixed for a given jsonb value within a database major
version but across major versions it could change (if the
underlying representation changes).

The order is fixed and very unlikely to change, as it was chosen
quite deliberately to help ensure efficient processing. Any change
in on-disk representation of data types is something we work very
hard to avoid, as it makes it impossible to run pg_upgrade.

We do, from time-to-time, change on-disk formats in a
backwards-compatible way though. In any case, it's my understanding
that we don't *guarantee* any ordering currently and therefore we should
discourage users from depending on it. If we *are* going to guarantee
ordering, then we should document what that ordering is.

Yes that's the idea, namely to have a fixed text format that will not
change across releases. If the on-disk representation is already supports
that then this could just be a doc change (assuming there's agreement that
it's a good idea and said guarantee will be maintained).

Separately, I think the compact (i.e. whitespace free) output is useful on
it's own. It adds up to two bytes per key/value pair (one after the colon
and one after the comma) so the more keys you have the more the savings.

Here's a (contrived) example to show the size difference when serializing
information_schema.columns. The row_to_json(...) function returns
whitespace free output (as json, not jsonb) so it's a proxy for
json_compact(..). It comes out to 7.5% smaller than the default jsonb text
format:

app=> SELECT
MAX((SELECT COUNT(*) FROM json_object_keys(x))) AS num_keys,
AVG(length(x::text)) AS json_text,
AVG(length(x::jsonb::text)) AS jsonb_text,
AVG(length(x::text)) / AVG(length(x::jsonb::text)) AS ratio
FROM (SELECT row_to_json(z.*) AS x
FROM information_schema.columns z) t;
num_keys | json_text | jsonb_text | ratio

----------+-----------------------+-----------------------+------------------------
44 | 1077.0748522652659225 | 1164.0748522652659225 |
0.92526253803121012857
(1 row)

Regards,
-- Sehrope Sarkuni
Founder & CEO | JackDB, Inc. | https://www.jackdb.com/

Ryan Pedela

rpedela@datalanche.com

over 9 years ago

In reply to: Sehrope Sarkuni (#1)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Sun, Apr 24, 2016 at 4:02 PM, Sehrope Sarkuni <sehrope@jackdb.com> wrote:

Hi,

The default text representation of jsonb adds whitespace in between
key/value pairs (after the colon ":") and after successive properties
(after the comma ","):

postgres=# SELECT '{"b":2,"a":1}'::jsonb::text;
text
------------------
{"a": 1, "b": 2}
(1 row)

AFAIK, there's also no guarantee on the specific order of the resulting
properties in the text representation either. I would suppose it's fixed
for a given jsonb value within a database major version but across major
versions it could change (if the underlying representation changes).

I originally ran into this when comparing the hashes of the text
representation of jsonb columns. The results didn't match up because the
javascript function JSON.stringify(...) does not add any extra whitespace.

It'd be nice to have a stable text representation of a jsonb value with
minimal whitespace. The latter would also save a few bytes per record in
text output formats, on the wire, and in backups (ex: COPY ... TO STDOUT).

I cannot comment on the patch itself, but I welcome jsonb_compact() or some
way to get JSON with no inserted whitespace.

Stephen Frost

sfrost@snowman.net

over 9 years ago

In reply to: Ryan Pedela (#5)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

* Ryan Pedela (rpedela@datalanche.com) wrote:

On Sun, Apr 24, 2016 at 4:02 PM, Sehrope Sarkuni <sehrope@jackdb.com> wrote:

The default text representation of jsonb adds whitespace in between
key/value pairs (after the colon ":") and after successive properties
(after the comma ","):

[...]

It'd be nice to have a stable text representation of a jsonb value with
minimal whitespace. The latter would also save a few bytes per record in
text output formats, on the wire, and in backups (ex: COPY ... TO STDOUT).

+1

I cannot comment on the patch itself, but I welcome jsonb_compact() or some
way to get JSON with no inserted whitespace.

As I mentioned to Sehrope on IRC, at least for my 2c, if you want a
compact JSON format to reduce the amount of traffic over the wire or to
do things with on the client side, we should probably come up with a
binary format, rather than just hack out the whitespace. It's not like
representing numbers using ASCII characters is terribly efficient
either.

Compression might be another option, though that's certainly less
flexible and only (easily) used in combination with SSL, today.

Thanks!

Stephen

Merlin Moncure

mmoncure@gmail.com

over 9 years ago

In reply to: Andrew Dunstan (#2)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Sun, Apr 24, 2016 at 8:16 PM, Andrew Dunstan <andrew@dunslane.net> wrote:

Note that the json type, unlike jsonb, preserves exactly the white space and
key order of its input. In fact, the input is exactly what it stores.

That is true, but the json serialization functions (to_json etc)
really out to have the same whitespace strategy is jsonb text out. Of
the two ways it's currently done, the json serialization variant seems
better but a completely compact variant seems like a good idea basis
of efficiency.

merlin

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Dave Cramer

pg@fastcrypt.com

over 9 years ago

In reply to: Stephen Frost (#6)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On 26 April 2016 at 12:49, Stephen Frost <sfrost@snowman.net> wrote:

* Ryan Pedela (rpedela@datalanche.com) wrote:

On Sun, Apr 24, 2016 at 4:02 PM, Sehrope Sarkuni <sehrope@jackdb.com>

wrote:

The default text representation of jsonb adds whitespace in between
key/value pairs (after the colon ":") and after successive properties
(after the comma ","):

[...]

It'd be nice to have a stable text representation of a jsonb value with
minimal whitespace. The latter would also save a few bytes per record

in

text output formats, on the wire, and in backups (ex: COPY ... TO

STDOUT).

+1

I cannot comment on the patch itself, but I welcome jsonb_compact() or

some

way to get JSON with no inserted whitespace.

As I mentioned to Sehrope on IRC, at least for my 2c, if you want a
compact JSON format to reduce the amount of traffic over the wire or to
do things with on the client side, we should probably come up with a
binary format, rather than just hack out the whitespace. It's not like
representing numbers using ASCII characters is terribly efficient
either.

Dave Cramer

davec@postgresintl.com
www.postgresintl.com

Merlin Moncure

mmoncure@gmail.com

over 9 years ago

In reply to: Stephen Frost (#6)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Tue, Apr 26, 2016 at 11:49 AM, Stephen Frost <sfrost@snowman.net> wrote:

* Ryan Pedela (rpedela@datalanche.com) wrote:

On Sun, Apr 24, 2016 at 4:02 PM, Sehrope Sarkuni <sehrope@jackdb.com> wrote:

The default text representation of jsonb adds whitespace in between
key/value pairs (after the colon ":") and after successive properties
(after the comma ","):

[...]

It'd be nice to have a stable text representation of a jsonb value with
minimal whitespace. The latter would also save a few bytes per record in
text output formats, on the wire, and in backups (ex: COPY ... TO STDOUT).

+1

I cannot comment on the patch itself, but I welcome jsonb_compact() or some
way to get JSON with no inserted whitespace.

As I mentioned to Sehrope on IRC, at least for my 2c, if you want a
compact JSON format to reduce the amount of traffic over the wire or to
do things with on the client side, we should probably come up with a
binary format, rather than just hack out the whitespace. It's not like
representing numbers using ASCII characters is terribly efficient
either.

-1

This will benefit pretty much nobody unless you are writing a hand
crafted C application that consumes and processes the data directly.
I'd venture to guess this is a tiny fraction of pg users these days.
I do not understand at all the objection to removing whitespace.
Extra whitespace does nothing but pad the document as humans will
always run the document through a prettifier tuned to their specific
requirements (generally starting with, intelligent placement of
newlines) if reading directly.

Also, binary formats are not always smaller than text formats.

Compression might be another option, though that's certainly less
flexible and only (easily) used in combination with SSL, today.

right, exactly.

merlin

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#10

Stephen Frost

sfrost@snowman.net

over 9 years ago

In reply to: Merlin Moncure (#9)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

* Merlin Moncure (mmoncure@gmail.com) wrote:

On Tue, Apr 26, 2016 at 11:49 AM, Stephen Frost <sfrost@snowman.net> wrote:

As I mentioned to Sehrope on IRC, at least for my 2c, if you want a
compact JSON format to reduce the amount of traffic over the wire or to
do things with on the client side, we should probably come up with a
binary format, rather than just hack out the whitespace. It's not like
representing numbers using ASCII characters is terribly efficient
either.

-1

This will benefit pretty much nobody unless you are writing a hand
crafted C application that consumes and processes the data directly.

That's not accurate. All that's needed is for the libraries which
either wrap libpq or re-implement it to be updated to understand the
format and then convert the data into whatever structure makes sense for
the language (or library that the language includes for working with
JSON data).

One of the unfortunate realities with JSON is that there isn't a
terribly good binary representation, afaik. As I understand it, BSON
doesn't support all the JSON structures that we do; if it did, I'd
suggest we provide a way to convert our structure into BSON.

I'd venture to guess this is a tiny fraction of pg users these days.
I do not understand at all the objection to removing whitespace.
Extra whitespace does nothing but pad the document as humans will
always run the document through a prettifier tuned to their specific
requirements (generally starting with, intelligent placement of
newlines) if reading directly.

The objection is that it's a terribly poor solution as it simply makes
things ugly for a pretty small amount of improvement. Looking at it
from the perspective of just "whitespace is bad!" it might look like a
good answer to just remove whitespace, but we should be looking at it
from the perspective of "how do we make this more efficient?". Under
that lense, removing whitespace appears to be minimally effective
whereas passing the data back in a binary structure looks likely to
greatly improve the efficiency on a number of levels.

Also, binary formats are not always smaller than text formats.

While true, I don't think that would be true in this case.

Of course, there's nothing like actually trying it and seeing.

Thanks!

Stephen

#11

David G. Johnston

david.g.johnston@gmail.com

over 9 years ago

In reply to: Sehrope Sarkuni (#1)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Sun, Apr 24, 2016 at 3:02 PM, Sehrope Sarkuni <sehrope@jackdb.com> wrote:

It'd be nice to have a stable text representation of a jsonb value with
minimal whitespace. The latter would also save a few bytes per record in
text output formats, on the wire, and in backups (ex: COPY ... TO STDOUT).

Attached is a *very* work in progress patch that adds a

jsonb_compact(jsonb)::text function. It generates a text representation
without extra whitespace but does not yet try to enforce a stable order of
the properties within a jsonb value.

This doesn't impact backups unless you do away with the function and change
the output i/o function for the type. In that scenario the function is no
longer necessary.

David J.

#12

David G. Johnston

david.g.johnston@gmail.com

over 9 years ago

In reply to: Sehrope Sarkuni (#1)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Sun, Apr 24, 2016 at 3:02 PM, Sehrope Sarkuni <sehrope@jackdb.com> wrote:

Attached is a *very* work in progress patch that adds a
jsonb_compact(jsonb)::text function. It generates a text representation
without extra whitespace but does not yet try to enforce a stable order of
the properties within a jsonb value.

I think that having a jsonb_compact function that complements the existing
jsonb_pretty function is a well scoped and acceptable feature. I do not
believe that it should also take on the role of canonicalization.

I'd suggest that any discussions regarding stability of jsonb output be
given its own thread.

That topic also seems separate from a discussion on how to implement a
binary transport protocol for jsonb.

Lastly would be whether we change our default text representation so that
users utilizing COPY get the added benefit of a maximally minimized text
representation.

As an aside on the last topic, has there ever been considered to have a way
to specify a serialization function to use for a given type (or maybe
column) specified in a copy command?

Something like: COPY [...] WITH (jsonb USING jsonb_compact)

I'm thinking this would hard and undesirable given the role copy plays and
limited intelligence that it has in order to maximize its efficiency in
fulfilling its role.

Backups get compressed already so bandwidth seems the bigger goal there.
Otherwise I'd say that we lack any kind of overwhelming evidence that
making such a change would be warranted.

While these are definitely related topics it doesn't seem like any are
pre-requisites for the others. I think this thread is going to become hard
to follow and trail off it continues to try and address all of these topics
randomly as people see fit to reply. And it will quickly become hard for
anyone to jump in and understand the topics at hand.

David J.

#13

Ryan Pedela

rpedela@datalanche.com

over 9 years ago

In reply to: Stephen Frost (#6)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Tue, Apr 26, 2016 at 10:49 AM, Stephen Frost <sfrost@snowman.net> wrote:

* Ryan Pedela (rpedela@datalanche.com) wrote:

On Sun, Apr 24, 2016 at 4:02 PM, Sehrope Sarkuni <sehrope@jackdb.com>

wrote:

The default text representation of jsonb adds whitespace in between
key/value pairs (after the colon ":") and after successive properties
(after the comma ","):

[...]

It'd be nice to have a stable text representation of a jsonb value with
minimal whitespace. The latter would also save a few bytes per record

in

text output formats, on the wire, and in backups (ex: COPY ... TO

STDOUT).

+1

I cannot comment on the patch itself, but I welcome jsonb_compact() or

some

way to get JSON with no inserted whitespace.

As I mentioned to Sehrope on IRC, at least for my 2c, if you want a
compact JSON format to reduce the amount of traffic over the wire or to
do things with on the client side, we should probably come up with a
binary format, rather than just hack out the whitespace. It's not like
representing numbers using ASCII characters is terribly efficient
either.

Why build a Ferrari when a skateboard would suffice? Besides, that doesn't
help one of the most common cases for JSONB: REST APIs.

Now that PG fully supports JSON, a user can use PG to construct the JSON
payload of a REST API request. Then the web server would simply be a
pass-through for the JSON payload. I personally have this use case, it is
not hypothetical. However currently, a user must parse the JSON string from
PG and re-stringify it to minimize the whitespace. Given that HTTP is
text-based, removing all extraneous whitespace is the best way to compress
it, and on top of that you can do gzip compression. Unless you are
suggesting that the binary format is just a gzipped version of the
minimized text format, I don't see how a binary format helps at all in the
REST API case.

In addition, every JSON implementation I have ever seen fully minimizes
JSON by default. PG appears to deviate from standard practice for no
apparent reason.

#14

David G. Johnston

david.g.johnston@gmail.com

over 9 years ago

In reply to: Ryan Pedela (#13)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Thu, Apr 28, 2016 at 10:00 AM, Ryan Pedela <rpedela@datalanche.com>
wrote:

On Tue, Apr 26, 2016 at 10:49 AM, Stephen Frost <sfrost@snowman.net>
wrote:

* Ryan Pedela (rpedela@datalanche.com) wrote:

On Sun, Apr 24, 2016 at 4:02 PM, Sehrope Sarkuni <sehrope@jackdb.com>

wrote:

The default text representation of jsonb adds whitespace in between
key/value pairs (after the colon ":") and after successive properties
(after the comma ","):

[...]

It'd be nice to have a stable text representation of a jsonb value

with

minimal whitespace. The latter would also save a few bytes per record

in

text output formats, on the wire, and in backups (ex: COPY ... TO

STDOUT).

+1

I cannot comment on the patch itself, but I welcome jsonb_compact() or

some

way to get JSON with no inserted whitespace.

As I mentioned to Sehrope on IRC, at least for my 2c, if you want a
compact JSON format to reduce the amount of traffic over the wire or to
do things with on the client side, we should probably come up with a
binary format, rather than just hack out the whitespace. It's not like
representing numbers using ASCII characters is terribly efficient
either.

Why build a Ferrari when a skateboard would suffice? Besides, that doesn't
help one of the most common cases for JSONB: REST APIs.

I'm agreeing with this sentiment. This isn't an either-or situation so
argue the white-space removal on its own merits. The fact that we might
implement a binary representation in the future doesn't, for me, influence
whether we make this white-space change now.

Now that PG fully supports JSON, a user can use PG to construct the JSON
payload of a REST API request. Then the web server would simply be a
pass-through for the JSON payload. I personally have this use case, it is
not hypothetical.

However currently, a user must parse the JSON string from PG and

re-stringify it to minimize the whitespace. Given that HTTP is text-based,
removing all extraneous whitespace is the best way to compress it, and on
top of that you can do gzip compression.

Can you clarify what you mean by "and on top of that you can do gzip
compression"?

Unless you are suggesting that the binary format is just a gzipped version

of the minimized text format, I don't see how a binary format helps at all
in the REST API case.

No, I'm pretty sure you still end up with uncompressed text in the
application layer.

In addition, every JSON implementation I have ever seen fully minimizes
JSON by default. PG appears to deviate from standard practice for no
apparent reason.

Sorry to nit-pick but that's called convention - the standard is likely
silent on this point. And its not like this was done in a vacuum - why is
this only coming up now and not, say, during the beta? Is it surprising
that this seemingly easy-to-overlook dynamic was not explicitly addressed
by the author and reviewer of the patch, especially when implementation of
said feature consisted of a lot more things of greater import and impact?

David J.

#15

David Fetter

david@fetter.org

over 9 years ago

In reply to: Stephen Frost (#10)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Wed, Apr 27, 2016 at 05:05:18PM -0400, Stephen Frost wrote:

* Merlin Moncure (mmoncure@gmail.com) wrote:

On Tue, Apr 26, 2016 at 11:49 AM, Stephen Frost <sfrost@snowman.net> wrote:

As I mentioned to Sehrope on IRC, at least for my 2c, if you want a
compact JSON format to reduce the amount of traffic over the wire or to
do things with on the client side, we should probably come up with a
binary format, rather than just hack out the whitespace. It's not like
representing numbers using ASCII characters is terribly efficient
either.

-1

This will benefit pretty much nobody unless you are writing a hand
crafted C application that consumes and processes the data directly.

That's not accurate. All that's needed is for the libraries which
either wrap libpq or re-implement it to be updated to understand the
format and then convert the data into whatever structure makes sense for
the language (or library that the language includes for working with
JSON data).

One of the unfortunate realities with JSON is that there isn't a
terribly good binary representation, afaik. As I understand it, BSON
doesn't support all the JSON structures that we do; if it did, I'd
suggest we provide a way to convert our structure into BSON.

How about MessagePack?
http://msgpack.org/index.html

Cheers,
David.
--
David Fetter <david@fetter.org> http://fetter.org/
Phone: +1 415 235 3778 AIM: dfetter666 Yahoo!: dfetter
Skype: davidfetter XMPP: david.fetter@gmail.com

Remember to vote!
Consider donating to Postgres: http://www.postgresql.org/about/donate

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#16

Alvaro Herrera

alvherre@2ndquadrant.com

over 9 years ago

In reply to: David G. Johnston (#14)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

David G. Johnston wrote:

On Thu, Apr 28, 2016 at 10:00 AM, Ryan Pedela <rpedela@datalanche.com>
wrote:

In addition, every JSON implementation I have ever seen fully minimizes
JSON by default. PG appears to deviate from standard practice for no
apparent reason.

Sorry to nit-pick but that's called convention - the standard is likely
silent on this point. And its not like this was done in a vacuum - why is
this only coming up now and not, say, during the beta? Is it surprising
that this seemingly easy-to-overlook dynamic was not explicitly addressed
by the author and reviewer of the patch, especially when implementation of
said feature consisted of a lot more things of greater import and impact?

Actually we did have someone come up with a patch to "normalize" how
JSON stuff was output, because our code seems to do it in three
different, inconsistent ways. And our response was for them to get the
heck outta here, because we're so much in love with our current
practice that we don't need to refactor the three implementations into a
single one.

Personally I don't see any reason we need to care one bit about how the
irrelevant whitespace is laid out, in other words why shouldn't we just
strip them all out? Surely there's no backwards compatibility argument
there. If somebody wants to see a nicely laid out structure they can
use the prettification function.

--
ï¿½lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#17

Sehrope Sarkuni

sehrope@jackdb.com

over 9 years ago

In reply to: David G. Johnston (#12)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Wed, Apr 27, 2016 at 7:09 PM, David G. Johnston <
david.g.johnston@gmail.com> wrote:

On Sun, Apr 24, 2016 at 3:02 PM, Sehrope Sarkuni <sehrope@jackdb.com>
wrote:

Attached is a *very* work in progress patch that adds a
jsonb_compact(jsonb)::text function. It generates a text representation
without extra whitespace but does not yet try to enforce a stable order of
the properties within a jsonb value.

I think that having a jsonb_compact function that complements the
existing jsonb_pretty function is a well scoped and acceptable feature. I
do not believe that it should also take on the role of canonicalization.

I'd suggest that any discussions regarding stability of jsonb output be
given its own thread.

I'm fine with removing the stability aspect. I think it's nice-to-have but
it definitely complicates things and has longer term consequences.

That topic also seems separate from a discussion on how to implement a
binary transport protocol for jsonb.

Defining a binary format for jsonb is definitely out of scope.

Lastly would be whether we change our default text representation so that
users utilizing COPY get the added benefit of a maximally minimized text
representation.

I see this applying to both COPY and the text format on the wire. The
latter has the added benefit that it works with existing clients without
any driver changes.

Outside of being a bit more pleasant in psql, I don't see a point in the
added whitespace for jsonb::text. Even in psql it only helps with small
fields as anything big isn't really legible without indenting it via
jsonb_pretty(...).

As an aside on the last topic, has there ever been considered to have a
way to specify a serialization function to use for a given type (or maybe
column) specified in a copy command?

Something like: COPY [...] WITH (jsonb USING jsonb_compact)

I'm thinking this would hard and undesirable given the role copy plays and
limited intelligence that it has in order to maximize its efficiency in
fulfilling its role.

Backups get compressed already so bandwidth seems the bigger goal there.
Otherwise I'd say that we lack any kind of overwhelming evidence that
making such a change would be warranted.

While these are definitely related topics it doesn't seem like any are
pre-requisites for the others. I think this thread is going to become hard
to follow and trail off it continues to try and address all of these topics
randomly as people see fit to reply. And it will quickly become hard for
anyone to jump in and understand the topics at hand.

That's a really cool idea but agree it's way out of scope for this.

I had a related idea, maybe something similar could be done for psql to set
a jsonb output format. That way you could automatically prettify jsonb
fields client side.

Regards,
-- Sehrope Sarkuni
Founder & CEO | JackDB, Inc. | https://www.jackdb.com/

#18

Andrew Dunstan

andrew@dunslane.net

over 9 years ago

In reply to: Alvaro Herrera (#16)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On 04/28/2016 04:29 PM, Alvaro Herrera wrote:

David G. Johnston wrote:

On Thu, Apr 28, 2016 at 10:00 AM, Ryan Pedela <rpedela@datalanche.com>
wrote:

In addition, every JSON implementation I have ever seen fully minimizes
JSON by default. PG appears to deviate from standard practice for no
apparent reason.

Sorry to nit-pick but that's called convention - the standard is likely
silent on this point. And its not like this was done in a vacuum - why is
this only coming up now and not, say, during the beta? Is it surprising
that this seemingly easy-to-overlook dynamic was not explicitly addressed
by the author and reviewer of the patch, especially when implementation of
said feature consisted of a lot more things of greater import and impact?

Actually we did have someone come up with a patch to "normalize" how
JSON stuff was output, because our code seems to do it in three
different, inconsistent ways. And our response was for them to get the
heck outta here, because we're so much in love with our current
practice that we don't need to refactor the three implementations into a
single one.

That's a pretty bad mischaracterization of the discussion. What was
proposed was broken, as I pointed out to the OP, and then again later to
you when you asked about it. What he wanted to achieve simply couldn't
be done they way he was trying to achieve it.

Regarding the present proposal:

I wouldn't necessarily be opposed to us having one or more of the following:

a) suppressing whitespace addition in all json generation and text
output, possibly governed by a GUC setting so we could maintain
behaviour compatibility if required
b) a function to remove whitespace from json values, but otherwise
preserve things like key order
c) a function to pretty-print json similar to the output from jsonb, but
again preserving key order
d) a function to reorder keys in json so they were sorted according to
the relevant collation.

None of these things except possibly the last should be terribly
difficult to do.

cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#19

Shulgin, Oleksandr

oleksandr.shulgin@zalando.de

over 9 years ago

In reply to: Andrew Dunstan (#18)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Fri, Apr 29, 2016 at 3:18 PM, Andrew Dunstan <andrew@dunslane.net> wrote:

On 04/28/2016 04:29 PM, Alvaro Herrera wrote:

Actually we did have someone come up with a patch to "normalize" how

JSON stuff was output, because our code seems to do it in three
different, inconsistent ways. And our response was for them to get the
heck outta here, because we're so much in love with our current
practice that we don't need to refactor the three implementations into a
single one.

That's a pretty bad mischaracterization of the discussion. What was
proposed was broken, as I pointed out to the OP, and then again later to
you when you asked about it. What he wanted to achieve simply couldn't be
done they way he was trying to achieve it.

Yeah, the original request was pretty invalid, but when I've proposed to
resubmit just the normalization of whitespace nobody has shown enthusiasm
about the idea either:

/messages/by-id/CACACo5QKOiZ-00Jf6W2Uf0Pst05qRekQ9UzssyBL0m9FGKdS2Q@mail.gmail.com

Regarding the present proposal:

I wouldn't necessarily be opposed to us having one or more of the
following:

a) suppressing whitespace addition in all json generation and text output,
possibly governed by a GUC setting so we could maintain behaviour
compatibility if required

I'm not thrilled about GUC that would silently break stuff. That being
said, if someone's code is dependent on exact placement of whitespace in
the JSON text, it's pretty broken already and it's just a matter of time
when they hit an issue there.

b) a function to remove whitespace from json values, but otherwise
preserve things like key order
c) a function to pretty-print json similar to the output from jsonb, but
again preserving key order
d) a function to reorder keys in json so they were sorted according to the
relevant collation.

None of these things except possibly the last should be terribly difficult
to do.

My vote goes to remove all optional whitespace by default and have a single
function to prettify it. Key reordering can then be handled with an
optional parameter to such prettifying function.

It would probably make sense model this function after Python's
"dump-to-JSON-string" function:
https://docs.python.org/2/library/json.html#json.dumps With the optional
parameters for sorting the keys, indentation size and punctuation. This
way all the prettiness enhancements could be contained in a single function
w/o the need for generalized interface used in many places.

How about that?

--
Alex

#20

Merlin Moncure

mmoncure@gmail.com

over 9 years ago

In reply to: Stephen Frost (#10)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Wed, Apr 27, 2016 at 4:05 PM, Stephen Frost <sfrost@snowman.net> wrote:

* Merlin Moncure (mmoncure@gmail.com) wrote:

On Tue, Apr 26, 2016 at 11:49 AM, Stephen Frost <sfrost@snowman.net> wrote:

As I mentioned to Sehrope on IRC, at least for my 2c, if you want a
compact JSON format to reduce the amount of traffic over the wire or to
do things with on the client side, we should probably come up with a
binary format, rather than just hack out the whitespace. It's not like
representing numbers using ASCII characters is terribly efficient
either.

-1

This will benefit pretty much nobody unless you are writing a hand
crafted C application that consumes and processes the data directly.

That's not accurate. All that's needed is for the libraries which
either wrap libpq or re-implement it to be updated to understand the
format and then convert the data into whatever structure makes sense for
the language (or library that the language includes for working with
JSON data).

Sure, that's pretty easy. Note, I cowrote the only libpq wrapper
that demystifies pg binary formats, libpqtypes. I can tell you that
binary formats are much faster than text formats in any cases where
parsing is non trivial -- geo types, timestamp types, containers etc.
However I would be very surprised if postgres binary format json
would replace language parsing of json in any popular language like
java for common usage.

I'll go further. Postgres json support has pretty much made our
binary formats obsolete. The main problem with text format data was
sending complex structured data to the client over our overlapping
escape mechanisms; client side parsing was slow and in certain
scenarios backslashes would proliferate exponentially. json support
eliminates all of those problems and the performance advantages of
binary support (mainly parsing of complex types) rarely justify the
development headaches. These days, for the vast majority of data
traffic to the application it's a single row, single column json
coming in and out of the database.

I'd venture to guess this is a tiny fraction of pg users these days.
I do not understand at all the objection to removing whitespace.
Extra whitespace does nothing but pad the document as humans will
always run the document through a prettifier tuned to their specific
requirements (generally starting with, intelligent placement of
newlines) if reading directly.

The objection is that it's a terribly poor solution as it simply makes
things ugly for a pretty small amount of improvement. Looking at it
from the perspective of just "whitespace is bad!"

Whitespace is bad, because it simply pads documents on every stage of
processing. You simply can't please everyone in terms of where it
should go so you don't and reserve that functionality for
prettification functions. json is for *data serialization*. We
should not inject extra characters for aesthetics in the general case;
reducing memory consumption by 10% on both the client and server
during parse is a feature.

Andrew mentions several solutions. I like them all except I would
prefer not to introduce a GUC for controlling the output format. I do
not think it's a good idea to set the expectation that clients can
rely on text out byte for byte for any type including json.

merlin

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#21

David G. Johnston

david.g.johnston@gmail.com

over 9 years ago

In reply to: Merlin Moncure (#20)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Fri, Apr 29, 2016 at 7:15 AM, Merlin Moncure <mmoncure@gmail.com> wrote:

Andrew mentions several solutions. I like them all except I would
prefer not to introduce a GUC for controlling the output format. I do
not think it's a good idea to set the expectation that clients can
rely on text out byte for byte for any type including json.

+1

I agree on the GUC point and on the general desirability of making jsonb
output not include insignificant whitespace.

There seems to be enough coders who agree to this principle: could one of
you please write a patch and start a new thread specifically for this
change. If we go that route the need for the subject of this thread
becomes moot.

Thanks!

David J.

#22

Alvaro Herrera

alvherre@2ndquadrant.com

over 9 years ago

In reply to: Andrew Dunstan (#18)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

Thanks Alex for finding the previous thread.

Andrew Dunstan wrote:

On 04/28/2016 04:29 PM, Alvaro Herrera wrote:

Actually we did have someone come up with a patch to "normalize" how
JSON stuff was output, because our code seems to do it in three
different, inconsistent ways. And our response was for them to get the
heck outta here, because we're so much in love with our current
practice that we don't need to refactor the three implementations into a
single one.

That's a pretty bad mischaracterization of the discussion.

Sorry, I don't agree with that.

What was proposed
was broken, as I pointed out to the OP, and then again later to you when you
asked about it.

I didn't find your argument convincing back then, but this doesn't have
enough value to me that I can spend much time arguing about it.

I wouldn't necessarily be opposed to us having one or more of the following:

a) suppressing whitespace addition in all json generation and text output,
possibly governed by a GUC setting so we could maintain behaviour
compatibility if required

Sounds great to me, because we can unify the code so that we have *one*
piece to convert json to text instead of N, and not worry about the
non-relevant whitespace.

--
ï¿½lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#23

Merlin Moncure

mmoncure@gmail.com

over 9 years ago

In reply to: Alvaro Herrera (#22)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Fri, Apr 29, 2016 at 12:31 PM, Alvaro Herrera
<alvherre@2ndquadrant.com> wrote:

Thanks Alex for finding the previous thread.

Andrew Dunstan wrote:

On 04/28/2016 04:29 PM, Alvaro Herrera wrote:

Actually we did have someone come up with a patch to "normalize" how
JSON stuff was output, because our code seems to do it in three
different, inconsistent ways. And our response was for them to get the
heck outta here, because we're so much in love with our current
practice that we don't need to refactor the three implementations into a
single one.

That's a pretty bad mischaracterization of the discussion.

Sorry, I don't agree with that.

What was proposed
was broken, as I pointed out to the OP, and then again later to you when you
asked about it.

I didn't find your argument convincing back then, but this doesn't have
enough value to me that I can spend much time arguing about it.

I wouldn't necessarily be opposed to us having one or more of the following:

a) suppressing whitespace addition in all json generation and text output,
possibly governed by a GUC setting so we could maintain behaviour
compatibility if required

Sounds great to me, because we can unify the code so that we have *one*
piece to convert json to text instead of N, and not worry about the
non-relevant whitespace.

hurk -- no objection to unifying the text serialization code (if that
proves reasonable to do). However I think using GUC to control
output format is not a good idea. We did this for bytea and it did
not turn out well; much better to have code anticipating precise
formats to check the server version. This comes up over and over
again: the GUC is not a solution for backwards compatibility...in
fact, it's pandora's box (see:
https://dev.mysql.com/doc/refman/5.5/en/sql-mode.html) .

merlin

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#24

Andrew Dunstan

andrew@dunslane.net

over 9 years ago

In reply to: Merlin Moncure (#23)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On 04/29/2016 02:34 PM, Merlin Moncure wrote:

I wouldn't necessarily be opposed to us having one or more of the following:

a) suppressing whitespace addition in all json generation and text output,
possibly governed by a GUC setting so we could maintain behaviour
compatibility if required

Sounds great to me, because we can unify the code so that we have *one*
piece to convert json to text instead of N, and not worry about the
non-relevant whitespace.

hurk -- no objection to unifying the text serialization code (if that
proves reasonable to do). However I think using GUC to control
output format is not a good idea. We did this for bytea and it did
not turn out well; much better to have code anticipating precise
formats to check the server version. This comes up over and over
again: the GUC is not a solution for backwards compatibility...in
fact, it's pandora's box (see:
https://dev.mysql.com/doc/refman/5.5/en/sql-mode.html) .

OK, fine by me. It's trivial to do for jsonb - all the white space comes
from on function, AFAIK. For json it's a bit more spread out, but only
in one or two files. Here's a question: say we have this table:
mytable:(x text, y json). now we do: "select to_json(r) from mytable r;"
Now y is a json field, which preserves the whitespace of the input. Do
we squash the whitespace out or not when producing the output of this
query? I'm inclined to say yes we do, but it's not a slam-dunk no-brainer.

One other point: I think we really need most of these pieces - if we are
going to squash the whitespace we need functions to do that cleanly for
json and to pretty-print json.

cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#25

Merlin Moncure

mmoncure@gmail.com

over 9 years ago

In reply to: Andrew Dunstan (#24)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Fri, Apr 29, 2016 at 4:06 PM, Andrew Dunstan <andrew@dunslane.net> wrote:

One other point: I think we really need most of these pieces - if we are
going to squash the whitespace we need functions to do that cleanly for json
and to pretty-print json.

I don't think it should be squashed per se -- we just don't *add* any
whitespace. So the current behavior of to_json,

postgres=# select to_json(q) from (select 1, '{"a" : "foo"}'::json) q;
to_json
───────────────────────────────────────
{"?column?":1,"json":{"a" : "foo"}}

...is correct to me on the premise that the user deliberately chose
the whitespace preserving json type and did not run compat on it.
However,
postgres=# select row_to_json(q) from (select 1, '{"a" : "foo"}'::jsonb) q;
row_to_json
─────────────────────────────────────
{"?column?":1,"jsonb":{"a": "foo"}}

really ought to render (note lack of space after "a"):
{"?column?":1,"jsonb":{"a":"foo"}}

This is a simple matter of removing spaces in the occasional C string
literal in the serialization routines and adding a json_pretty
function.

merlin

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#26

Andrew Dunstan

andrew@dunslane.net

over 9 years ago

In reply to: Merlin Moncure (#25)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On 04/29/2016 06:11 PM, Merlin Moncure wrote:

This is a simple matter of removing spaces in the occasional C string
literal in the serialization routines and adding a json_pretty
function.

I spent a few hours on this. See
<https://bitbucket.org/adunstan/pgdevel/commits/branch/jsonformat> for
WIP - there are three commits. No regression tests yet for the two new
functions (json_squash and json_pretty), Otherwise fairly complete.
Removing whitespace generation was pretty simple for both json and jsonb.

cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#27

Shulgin, Oleksandr

oleksandr.shulgin@zalando.de

over 9 years ago

In reply to: Andrew Dunstan (#26)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Sun, May 1, 2016 at 3:22 AM, Andrew Dunstan <andrew@dunslane.net> wrote:

On 04/29/2016 06:11 PM, Merlin Moncure wrote:

This is a simple matter of removing spaces in the occasional C string

literal in the serialization routines and adding a json_pretty
function.

I spent a few hours on this. See <
https://bitbucket.org/adunstan/pgdevel/commits/branch/jsonformat> for WIP
- there are three commits. No regression tests yet for the two new
functions (json_squash and json_pretty), Otherwise fairly complete.
Removing whitespace generation was pretty simple for both json and jsonb.

Looks good, thank you!

It would make sense IMO to rename FormatState's `indent' field as `pretty':
it's being used to add whitespace between the punctuation, not only at
start of a line. I'd also move the "if (indent)" check out of
add_indent(): just don't call it if no indent is needed.

I'll try to play with the patch to produce some regression tests for the
new functions.

--
Alex

#28

Andrew Dunstan

andrew@dunslane.net

over 9 years ago

In reply to: Shulgin, Oleksandr (#27)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On 05/02/2016 04:56 AM, Shulgin, Oleksandr wrote:

On Sun, May 1, 2016 at 3:22 AM, Andrew Dunstan <andrew@dunslane.net
<mailto:andrew@dunslane.net>> wrote:

On 04/29/2016 06:11 PM, Merlin Moncure wrote:

This is a simple matter of removing spaces in the occasional C
string
literal in the serialization routines and adding a json_pretty
function.

I spent a few hours on this. See
<https://bitbucket.org/adunstan/pgdevel/commits/branch/jsonformat>
for WIP - there are three commits. No regression tests yet for the
two new functions (json_squash and json_pretty), Otherwise fairly
complete. Removing whitespace generation was pretty simple for
both json and jsonb.

Looks good, thank you!

It would make sense IMO to rename FormatState's `indent' field as
`pretty': it's being used to add whitespace between the punctuation,
not only at start of a line. I'd also move the "if (indent)" check
out of add_indent(): just don't call it if no indent is needed.

I'll try to play with the patch to produce some regression tests for
the new functions.

It was done the way it was to be as consistent as possible with how it's
done for jsonb (c.f. jsonb.c:JsonbToCStringWorker and jsonb.c::add_indent).

cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#29

Oleksandr Shulgin

oleksandr.shulgin@zalando.de

over 9 years ago

In reply to: Andrew Dunstan (#28)

1 attachment(s)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On Mon, May 2, 2016 at 4:04 PM, Andrew Dunstan <andrew@dunslane.net> wrote:

On 05/02/2016 04:56 AM, Shulgin, Oleksandr wrote:

On Sun, May 1, 2016 at 3:22 AM, Andrew Dunstan <andrew@dunslane.net
<mailto:andrew@dunslane.net>> wrote:

On 04/29/2016 06:11 PM, Merlin Moncure wrote:

This is a simple matter of removing spaces in the occasional C
string
literal in the serialization routines and adding a json_pretty
function.

I spent a few hours on this. See
<https://bitbucket.org/adunstan/pgdevel/commits/branch/jsonformat>
for WIP - there are three commits. No regression tests yet for the
two new functions (json_squash and json_pretty), Otherwise fairly
complete. Removing whitespace generation was pretty simple for
both json and jsonb.

Looks good, thank you!

It would make sense IMO to rename FormatState's `indent' field as
`pretty': it's being used to add whitespace between the punctuation, not
only at start of a line. I'd also move the "if (indent)" check out of
add_indent(): just don't call it if no indent is needed.

I'll try to play with the patch to produce some regression tests for the
new functions.

It was done the way it was to be as consistent as possible with how it's
done for jsonb (c.f. jsonb.c:JsonbToCStringWorker and jsonb.c::add_indent).

Ah, I see.

Simply taking regression tests for jsonb_pretty() and using them against
json_pretty() revealed a bug with extra indent being added before every
array/object start. Attached patch fixes that and adds the regression
tests.

For json_squash() I've taken the same three test values, for lack of a
better idea at the moment. At least we are testing key order stability and
that no whitespace is spit out.

Regards,
--
Alex

Attachments:

json-format-add-regression-tests.difftext/plain; charset=US-ASCII; name=json-format-add-regression-tests.diffDownload

diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
new file mode 100644
index d82c9c8..77d8325
*** a/src/backend/utils/adt/jsonfuncs.c
--- b/src/backend/utils/adt/jsonfuncs.c
*************** fmt_object_field_start(void *state, char
*** 3394,3402 ****
  
  	escape_json(_state->strval, fname);
  
! 	appendStringInfoCharMacro(_state->strval, ':');
! 	if (_state->indent)
! 		appendStringInfoCharMacro(_state->strval, ' ');
  
  	_state->last_was_key = true;
  }
--- 3394,3400 ----
  
  	escape_json(_state->strval, fname);
  
! 	appendBinaryStringInfo(_state->strval, ": ", _state->indent ? 2 : 1);
  
  	_state->last_was_key = true;
  }
*************** fmt_array_element_start(void *state, boo
*** 3409,3417 ****
  	if (!_state->first)
  		appendStringInfoCharMacro(_state->strval, ',');
  	_state->first = false;
! 
! 	add_indent(_state->strval, _state->indent, _state->lex->lex_level);
! 
  }
  
  static void
--- 3407,3413 ----
  	if (!_state->first)
  		appendStringInfoCharMacro(_state->strval, ',');
  	_state->first = false;
! 	_state->last_was_key = false;
  }
  
  static void
*************** fmt_scalar(void *state, char *token, Jso
*** 3419,3424 ****
--- 3415,3424 ----
  {
  	FormatState *_state = (FormatState *) state;
  
+ 	if (_state->lex->lex_level > 0)
+ 		add_indent(_state->strval, _state->indent && !_state->last_was_key,
+ 				   _state->lex->lex_level);
+ 
  	if (tokentype == JSON_TOKEN_STRING)
  		escape_json(_state->strval, token);
  	else
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
new file mode 100644
index 0c45c64..7af4022
*** a/src/test/regress/expected/json.out
--- b/src/test/regress/expected/json.out
*************** select json_strip_nulls('{"a": {"b": nul
*** 1658,1660 ****
--- 1658,1736 ----
   {"a":{},"d":{}}
  (1 row)
  
+ select json_pretty('{"a": "test", "b": [1, 2, 3], "c": "test3", "d":{"dd": "test4", "dd2":{"ddd": "test5"}}}');
+         json_pretty         
+ ----------------------------
+  {                         +
+      "a": "test",          +
+      "b": [                +
+          1,                +
+          2,                +
+          3                 +
+      ],                    +
+      "c": "test3",         +
+      "d": {                +
+          "dd": "test4",    +
+          "dd2": {          +
+              "ddd": "test5"+
+          }                 +
+      }                     +
+  }
+ (1 row)
+ 
+ select json_pretty('[{"f1":1,"f2":null},2,null,[[{"x":true},6,7],8],3]');
+         json_pretty        
+ ---------------------------
+  [                        +
+      {                    +
+          "f1": 1,         +
+          "f2": null       +
+      },                   +
+      2,                   +
+      null,                +
+      [                    +
+          [                +
+              {            +
+                  "x": true+
+              },           +
+              6,           +
+              7            +
+          ],               +
+          8                +
+      ],                   +
+      3                    +
+  ]
+ (1 row)
+ 
+ select json_pretty('{"a":["b", "c"], "d": {"e":"f"}}');
+    json_pretty    
+ ------------------
+  {               +
+      "a": [      +
+          "b",    +
+          "c"     +
+      ],          +
+      "d": {      +
+          "e": "f"+
+      }           +
+  }
+ (1 row)
+ 
+ select json_squash('{"a": "test", "b": [1, 2, 3], "c": "test3", "d":{"dd": "test4", "dd2":{"ddd": "test5"}}}');
+                                   json_squash                                  
+ -------------------------------------------------------------------------------
+  {"a":"test","b":[1,2,3],"c":"test3","d":{"dd":"test4","dd2":{"ddd":"test5"}}}
+ (1 row)
+ 
+ select json_squash('[{"f1":1,"f2":null},2,null,[[{"x":true},6,7],8],3]');
+                     json_squash                     
+ ----------------------------------------------------
+  [{"f1":1,"f2":null},2,null,[[{"x":true},6,7],8],3]
+ (1 row)
+ 
+ select json_squash('{"a":["b", "c"], "d": {"e":"f"}}');
+           json_squash          
+ -------------------------------
+  {"a":["b","c"],"d":{"e":"f"}}
+ (1 row)
+ 
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
new file mode 100644
index 603288b..f13e0fa
*** a/src/test/regress/sql/json.sql
--- b/src/test/regress/sql/json.sql
*************** select json_strip_nulls('[1,{"a":1,"b":n
*** 545,547 ****
--- 545,556 ----
  
  -- an empty object is not null and should not be stripped
  select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }');
+ 
+ 
+ select json_pretty('{"a": "test", "b": [1, 2, 3], "c": "test3", "d":{"dd": "test4", "dd2":{"ddd": "test5"}}}');
+ select json_pretty('[{"f1":1,"f2":null},2,null,[[{"x":true},6,7],8],3]');
+ select json_pretty('{"a":["b", "c"], "d": {"e":"f"}}');
+ 
+ select json_squash('{"a": "test", "b": [1, 2, 3], "c": "test3", "d":{"dd": "test4", "dd2":{"ddd": "test5"}}}');
+ select json_squash('[{"f1":1,"f2":null},2,null,[[{"x":true},6,7],8],3]');
+ select json_squash('{"a":["b", "c"], "d": {"e":"f"}}');

#30

Jim Nasby

Jim.Nasby@BlueTreble.com

over 9 years ago

In reply to: Shulgin, Oleksandr (#19)

Re: Add jsonb_compact(...) for whitespace-free jsonb to text

On 4/29/16 8:56 AM, Shulgin, Oleksandr wrote:

It would probably make sense model this function after Python's
"dump-to-JSON-string"
function: https://docs.python.org/2/library/json.html#json.dumps With
the optional parameters for sorting the keys, indentation size and
punctuation. This way all the prettiness enhancements could be
contained in a single function w/o the need for generalized interface
used in many places.

+1. I've found the output functions of json.dumps to be very handy.
-- 
Jim Nasby, Data Architect, Blue Treble Consulting, Austin TX
Experts in Analytics, Data Architecture and PostgreSQL
Data in Trouble? Get it in Treble! http://BlueTreble.com
855-TREBLE2 (855-873-2532)   mobile: 512-569-9461

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers