Bug in error reporting for multi-line JSON

Started by Simon Riggsalmost 5 years ago11 messages

simon.riggs@enterprisedb.com

almost 5 years ago

2 attachment(s)

JSON parsing reports the line number and relevant context info
incorrectly when the JSON contains newlines. Current code mostly just
says "LINE 1" and is misleading for error correction. There were no
tests for this previously.

Proposed changes mean a JSON error such as this
{
"one": 1,
"two":,"two", <-- extra comma
"three": true
}

was previously reported as

CONTEXT: JSON data, line 1: {
"one": 1,
"two":,...

should be reported as

CONTEXT: JSON data, line 3: "two":,...

Attached patches:
HEAD: json_error_context.v3.patch - applies cleanly, passes make check
PG13: json_error_context.v3.patch - applies w minor fuzz, passes make check
PG12: json_error_context.v3.PG12.patch - applies cleanly, passes make check
PG11: json_error_context.v3.PG12.patch - applies cleanly, not tested
PG10: json_error_context.v3.PG12.patch - applies cleanly, not tested
PG9.6: json_error_context.v3.PG12.patch - applies cleanly, not tested
PG9.5: json_error_context.v3.PG12.patch - applies cleanly, not tested

--
Simon Riggs http://www.EnterpriseDB.com/

Attachments:

json_error_context.v3.PG12.patchapplication/octet-stream; name=json_error_context.v3.PG12.patchDownload

diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 26d293709a..94589d0cdb 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -312,6 +312,7 @@ makeJsonLexContextCstringLen(char *json, int len, bool need_escapes)
 
 	lex->input = lex->token_terminator = lex->line_start = json;
 	lex->line_number = 1;
+	lex->pos_last_linefeed = 0;
 	lex->input_length = len;
 	if (need_escapes)
 		lex->strval = makeStringInfo();
@@ -623,7 +624,10 @@ json_lex(JsonLexContext *lex)
 		   (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
 	{
 		if (*s == '\n')
+		{
 			++lex->line_number;
+			lex->pos_last_linefeed = len;
+		}
 		++s;
 		++len;
 	}
@@ -1264,17 +1268,15 @@ report_json_context(JsonLexContext *lex)
 	const char *context_start;
 	const char *context_end;
 	const char *line_start;
-	int			line_number;
 	char	   *ctxt;
 	int			ctxtlen;
 	const char *prefix;
 	const char *suffix;
 
 	/* Choose boundaries for the part of the input we will display */
-	context_start = lex->input;
+	context_start = lex->input + lex->pos_last_linefeed;
 	context_end = lex->token_terminator;
 	line_start = context_start;
-	line_number = 1;
 	for (;;)
 	{
 		/* Always advance over newlines */
@@ -1282,7 +1284,6 @@ report_json_context(JsonLexContext *lex)
 		{
 			context_start++;
 			line_start = context_start;
-			line_number++;
 			continue;
 		}
 		/* Otherwise, done as soon as we are close enough to context_end */
@@ -1317,7 +1318,7 @@ report_json_context(JsonLexContext *lex)
 	suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : "";
 
 	return errcontext("JSON data, line %d: %s%s%s",
-					  line_number, prefix, ctxt, suffix);
+					  lex->line_number, prefix, ctxt, suffix);
 }
 
 /*
diff --git a/src/include/utils/jsonapi.h b/src/include/utils/jsonapi.h
index 5f4d479a7b..3b47fae7e7 100644
--- a/src/include/utils/jsonapi.h
+++ b/src/include/utils/jsonapi.h
@@ -57,6 +57,7 @@ typedef struct JsonLexContext
 	char	   *prev_token_terminator;
 	JsonTokenType token_type;
 	int			lex_level;
+	int			pos_last_linefeed;
 	int			line_number;
 	char	   *line_start;
 	StringInfo	strval;
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index 69b0a6ee6d..b772db7ef8 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -272,6 +272,42 @@ LINE 1: SELECT '    '::json;
                ^
 DETAIL:  The input string ended unexpectedly.
 CONTEXT:  JSON data, line 1:     
+-- Multi-line JSON input to check ERROR reporting
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":"two",
+		"three":
+			true}'::jsonb); -- OK
+   jsonb_pretty    
+-------------------
+ {                +
+     "one": 1,    +
+     "two": "two",+
+     "three": true+
+ }
+(1 row)
+
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":,"two", -- ERROR extraneous comma before field "two" value, line 3
+		"three":
+			true}'::jsonb);
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT jsonb_pretty('{
+                            ^
+DETAIL:  Expected JSON value, but found ",".
+CONTEXT:  JSON data, line 3: 		"two":,...
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":"two",
+		"three":
+			}'::jsonb); -- ERROR missing value for field "three", line 5
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT jsonb_pretty('{
+                            ^
+DETAIL:  Expected JSON value, but found "}".
+CONTEXT:  JSON data, line 5: 			}
+-- ERROR is on line 5 because a LF \n is legal whitespace at end of line 4, so the missing syntax is on line 5
 --constructors
 -- array_to_json
 SELECT array_to_json(array(select 1 as a));
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index e021664592..f7187a7a0b 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -59,6 +59,24 @@ SELECT 'trues'::json;			-- ERROR, not a keyword
 SELECT ''::json;				-- ERROR, no value
 SELECT '    '::json;			-- ERROR, no value
 
+-- Multi-line JSON input to check ERROR reporting
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":"two",
+		"three":
+			true}'::jsonb); -- OK
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":,"two", -- ERROR extraneous comma before field "two" value, line 3
+		"three":
+			true}'::jsonb);
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":"two",
+		"three":
+			}'::jsonb); -- ERROR missing value for field "three", line 5
+-- ERROR is on line 5 because a LF \n is legal whitespace at end of line 4, so the missing syntax is on line 5
+
 --constructors
 -- array_to_json

json_error_context.v3.patchapplication/octet-stream; name=json_error_context.v3.patchDownload

commit 156d4fea7a379ad72bcc4ad84faba14c70c21355
Author: Simon Riggs <simon.riggs@enterprisedb.com>
Date:   Tue Jan 19 20:25:55 2021 +0000

Fix and improve context reporting for JSON parsing errors.

Use the original input line number to report JSON parsing errors
and reduce the amount of extraneous context for mult-line issues.

Add tests for invalid multi-line JSON to illustrate problem.

These changes mean a JSON error such as this

{
 "one": 1,
 "two":,"two",		<-- extra comma
 "three": true 
}

which was previously reported as

CONTEXT:  JSON data, line 1: {
"one": 1,
"two":,...

but is now reported as

CONTEXT:  JSON data, line 3: "two":,...

Backpatch to all supported versions, with suitable modifications.

diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index a794a7df84..2ff0159ad3 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -638,17 +638,15 @@ report_json_context(JsonLexContext *lex)
 	const char *context_start;
 	const char *context_end;
 	const char *line_start;
-	int			line_number;
 	char	   *ctxt;
 	int			ctxtlen;
 	const char *prefix;
 	const char *suffix;
 
 	/* Choose boundaries for the part of the input we will display */
-	context_start = lex->input;
+	context_start = lex->input + lex->pos_last_linefeed;
 	context_end = lex->token_terminator;
 	line_start = context_start;
-	line_number = 1;
 	for (;;)
 	{
 		/* Always advance over newlines */
@@ -656,7 +654,6 @@ report_json_context(JsonLexContext *lex)
 		{
 			context_start++;
 			line_start = context_start;
-			line_number++;
 			continue;
 		}
 		/* Otherwise, done as soon as we are close enough to context_end */
@@ -691,7 +688,7 @@ report_json_context(JsonLexContext *lex)
 	suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : "";
 
 	return errcontext("JSON data, line %d: %s%s%s",
-					  line_number, prefix, ctxt, suffix);
+					  lex->line_number, prefix, ctxt, suffix);
 }
 
 
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index 831a44a2da..648e85cbe2 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -158,6 +158,7 @@ makeJsonLexContextCstringLen(char *json, int len, int encoding, bool need_escape
 
 	lex->input = lex->token_terminator = lex->line_start = json;
 	lex->line_number = 1;
+	lex->pos_last_linefeed = 0;
 	lex->input_length = len;
 	lex->input_encoding = encoding;
 	if (need_escapes)
@@ -536,7 +537,10 @@ json_lex(JsonLexContext *lex)
 		   (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
 	{
 		if (*s == '\n')
+		{
 			++lex->line_number;
+			lex->pos_last_linefeed = len;
+		}
 		++s;
 		++len;
 	}
diff --git a/src/include/common/jsonapi.h b/src/include/common/jsonapi.h
index 03331f6d13..3baef0411c 100644
--- a/src/include/common/jsonapi.h
+++ b/src/include/common/jsonapi.h
@@ -79,6 +79,7 @@ typedef struct JsonLexContext
 	char	   *prev_token_terminator;
 	JsonTokenType token_type;
 	int			lex_level;
+	int			pos_last_linefeed;
 	int			line_number;
 	char	   *line_start;
 	StringInfo	strval;
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index c4156cf2a6..ff15acf3c0 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -272,6 +272,42 @@ LINE 1: SELECT '    '::json;
                ^
 DETAIL:  The input string ended unexpectedly.
 CONTEXT:  JSON data, line 1:     
+-- Multi-line JSON input to check ERROR reporting
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":"two",
+		"three":
+			true}'::jsonb); -- OK
+   jsonb_pretty    
+-------------------
+ {                +
+     "one": 1,    +
+     "two": "two",+
+     "three": true+
+ }
+(1 row)
+
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":,"two", -- ERROR extraneous comma before field "two" value, line 3
+		"three":
+			true}'::jsonb);
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT jsonb_pretty('{
+                            ^
+DETAIL:  Expected JSON value, but found ",".
+CONTEXT:  JSON data, line 3: 		"two":,...
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":"two",
+		"three":
+			}'::jsonb); -- ERROR missing value for field "three", line 5
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT jsonb_pretty('{
+                            ^
+DETAIL:  Expected JSON value, but found "}".
+CONTEXT:  JSON data, line 5: 			}
+-- ERROR is on line 5 because a LF \n is legal whitespace at end of line 4, so the missing syntax is on line 5
 --constructors
 -- array_to_json
 SELECT array_to_json(array(select 1 as a));
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 20354f04e3..e15697c95a 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -59,6 +59,24 @@ SELECT 'trues'::json;			-- ERROR, not a keyword
 SELECT ''::json;				-- ERROR, no value
 SELECT '    '::json;			-- ERROR, no value
 
+-- Multi-line JSON input to check ERROR reporting
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":"two",
+		"three":
+			true}'::jsonb); -- OK
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":,"two", -- ERROR extraneous comma before field "two" value, line 3
+		"three":
+			true}'::jsonb);
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":"two",
+		"three":
+			}'::jsonb); -- ERROR missing value for field "three", line 5
+-- ERROR is on line 5 because a LF \n is legal whitespace at end of line 4, so the missing syntax is on line 5
+
 --constructors
 -- array_to_json

Tom Lane

tgl@sss.pgh.pa.us

almost 5 years ago

In reply to: Simon Riggs (#1)

Re: Bug in error reporting for multi-line JSON

Simon Riggs <simon.riggs@enterprisedb.com> writes:

JSON parsing reports the line number and relevant context info
incorrectly when the JSON contains newlines. Current code mostly just
says "LINE 1" and is misleading for error correction. There were no
tests for this previously.

Couple thoughts:

* I think you are wrong to have removed the line number bump that
happened when report_json_context advances context_start over a
newline. The case is likely harder to get to now, but it can still
happen can't it? If it can't, we should remove that whole stanza.

* I'd suggest naming the new JsonLexContext field "pos_last_newline";
"linefeed" is not usually the word we use for this concept. (Although
actually, it might work better if you make that point to the char
*after* the newline, in which case "last_linestart" might be the
right name.)

* I'm not enthused about back-patching. This behavior seems like an
improvement, but that doesn't mean people will appreciate changing it
in stable branches.

regards, tom lane

Simon Riggs

simon.riggs@enterprisedb.com

almost 5 years ago

In reply to: Tom Lane (#2)

Re: Bug in error reporting for multi-line JSON

On Thu, Jan 21, 2021 at 6:08 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Simon Riggs <simon.riggs@enterprisedb.com> writes:

JSON parsing reports the line number and relevant context info
incorrectly when the JSON contains newlines. Current code mostly just
says "LINE 1" and is misleading for error correction. There were no
tests for this previously.

Couple thoughts:

* I think you are wrong to have removed the line number bump that
happened when report_json_context advances context_start over a
newline. The case is likely harder to get to now, but it can still
happen can't it? If it can't, we should remove that whole stanza.

OK, I'm playing around with this to see what is needed.

* I'd suggest naming the new JsonLexContext field "pos_last_newline";
"linefeed" is not usually the word we use for this concept. (Although
actually, it might work better if you make that point to the char
*after* the newline, in which case "last_linestart" might be the
right name.)

Yes, OK

* I'm not enthused about back-patching. This behavior seems like an
improvement, but that doesn't mean people will appreciate changing it
in stable branches.

OK, as you wish.

Thanks for the review, will post again soon with an updated patch.

--
Simon Riggs http://www.EnterpriseDB.com/

Hamid Akhtar

hamid.akhtar@gmail.com

almost 5 years ago

In reply to: Simon Riggs (#3)

Re: Bug in error reporting for multi-line JSON

On Mon, Jan 25, 2021 at 6:08 PM Simon Riggs <simon.riggs@enterprisedb.com>
wrote:

On Thu, Jan 21, 2021 at 6:08 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Simon Riggs <simon.riggs@enterprisedb.com> writes:

JSON parsing reports the line number and relevant context info
incorrectly when the JSON contains newlines. Current code mostly just
says "LINE 1" and is misleading for error correction. There were no
tests for this previously.

Couple thoughts:

* I think you are wrong to have removed the line number bump that
happened when report_json_context advances context_start over a
newline. The case is likely harder to get to now, but it can still
happen can't it? If it can't, we should remove that whole stanza.

OK, I'm playing around with this to see what is needed.

* I'd suggest naming the new JsonLexContext field "pos_last_newline";
"linefeed" is not usually the word we use for this concept. (Although
actually, it might work better if you make that point to the char
*after* the newline, in which case "last_linestart" might be the
right name.)

Yes, OK

* I'm not enthused about back-patching. This behavior seems like an
improvement, but that doesn't mean people will appreciate changing it
in stable branches.

OK, as you wish.

Thanks for the review, will post again soon with an updated patch.

I agree with Tom's feedback.. Whether you change pos_last_linefeed to point
to the character after the linefeed or not, we can still simplify the for
loop within the "report_json_context" function to:

=================
context_start = lex->input + lex->pos_last_linefeed;
context_start += (*context_start == '\n'); /* Let's move beyond the
linefeed */
context_end = lex->token_terminator;
line_start = context_start;
while (context_end - context_start >= 50 && context_start < context_end)
{
/* Advance to next multibyte character */
if (IS_HIGHBIT_SET(*context_start))
context_start += pg_mblen(context_start);
else
context_start++;
}
=================

IMHO, this should work as pos_last_linefeed points to the position of the
last linefeed before the error occurred, hence we can safely skip it and
move the start_context forward.

--
Simon Riggs http://www.EnterpriseDB.com/

--
Highgo Software (Canada/China/Pakistan)
URL : www.highgo.ca
ADDR: 10318 WHALLEY BLVD, Surrey, BC
CELL：+923335449950 EMAIL: mailto:hamid.akhtar@highgo.ca
SKYPE: engineeredvirus

Hamid Akhtar

hamid.akhtar@gmail.com

almost 5 years ago

In reply to: Hamid Akhtar (#4)

1 attachment(s)

Re: Bug in error reporting for multi-line JSON

On Tue, Jan 26, 2021 at 2:07 PM Hamid Akhtar <hamid.akhtar@gmail.com> wrote:

On Mon, Jan 25, 2021 at 6:08 PM Simon Riggs <simon.riggs@enterprisedb.com>
wrote:

On Thu, Jan 21, 2021 at 6:08 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Simon Riggs <simon.riggs@enterprisedb.com> writes:

JSON parsing reports the line number and relevant context info
incorrectly when the JSON contains newlines. Current code mostly just
says "LINE 1" and is misleading for error correction. There were no
tests for this previously.

Couple thoughts:

* I think you are wrong to have removed the line number bump that
happened when report_json_context advances context_start over a
newline. The case is likely harder to get to now, but it can still
happen can't it? If it can't, we should remove that whole stanza.

OK, I'm playing around with this to see what is needed.

* I'd suggest naming the new JsonLexContext field "pos_last_newline";
"linefeed" is not usually the word we use for this concept. (Although
actually, it might work better if you make that point to the char
*after* the newline, in which case "last_linestart" might be the
right name.)

Yes, OK

* I'm not enthused about back-patching. This behavior seems like an
improvement, but that doesn't mean people will appreciate changing it
in stable branches.

OK, as you wish.

Thanks for the review, will post again soon with an updated patch.

I agree with Tom's feedback.. Whether you change pos_last_linefeed to
point to the character after the linefeed or not, we can still simplify the
for loop within the "report_json_context" function to:

=================
context_start = lex->input + lex->pos_last_linefeed;
context_start += (*context_start == '\n'); /* Let's move beyond the
linefeed */
context_end = lex->token_terminator;
line_start = context_start;
while (context_end - context_start >= 50 && context_start < context_end)
{
/* Advance to next multibyte character */
if (IS_HIGHBIT_SET(*context_start))
context_start += pg_mblen(context_start);
else
context_start++;
}
=================

IMHO, this should work as pos_last_linefeed points to the position of the
last linefeed before the error occurred, hence we can safely skip it and
move the start_context forward.

This thread has been inactive for more than a month now.

So, I have reworked Simon's patch and incorporated Tom's feedback. The
changes include:
- Changing the variable name from "pos_last_linefeed" to "last_linestart"
as it now points to the character after the newline character,
- The "for" loop in report_json_context function has been significantly
simplified and uses a while loop.

The attached patch is created against the current master branch.

Show quoted text

--
Simon Riggs http://www.EnterpriseDB.com/

--
Highgo Software (Canada/China/Pakistan)
URL : www.highgo.ca
ADDR: 10318 WHALLEY BLVD, Surrey, BC
CELL：+923335449950 EMAIL: mailto:hamid.akhtar@highgo.ca
SKYPE: engineeredvirus

Attachments:

json_error_context.v4.patchapplication/octet-stream; name=json_error_context.v4.patchDownload

diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index f194ff911b..a35cd63e56 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -641,30 +641,19 @@ report_json_context(JsonLexContext *lex)
 	const char *context_start;
 	const char *context_end;
 	const char *line_start;
-	int			line_number;
 	char	   *ctxt;
 	int			ctxtlen;
 	const char *prefix;
 	const char *suffix;
 
 	/* Choose boundaries for the part of the input we will display */
-	context_start = lex->input;
+	context_start = lex->input + lex->last_linestart;
 	context_end = lex->token_terminator;
 	line_start = context_start;
-	line_number = 1;
-	for (;;)
+
+	/* Done as soon as we are close enough to context_end */
+	while (context_end - context_start >= 50 && context_start < context_end)
 	{
-		/* Always advance over newlines */
-		if (context_start < context_end && *context_start == '\n')
-		{
-			context_start++;
-			line_start = context_start;
-			line_number++;
-			continue;
-		}
-		/* Otherwise, done as soon as we are close enough to context_end */
-		if (context_end - context_start < 50)
-			break;
 		/* Advance to next multibyte character */
 		if (IS_HIGHBIT_SET(*context_start))
 			context_start += pg_mblen(context_start);
@@ -694,7 +683,7 @@ report_json_context(JsonLexContext *lex)
 	suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : "";
 
 	return errcontext("JSON data, line %d: %s%s%s",
-					  line_number, prefix, ctxt, suffix);
+					  lex->line_number, prefix, ctxt, suffix);
 }
 
 
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index 831a44a2da..049785c264 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -158,6 +158,7 @@ makeJsonLexContextCstringLen(char *json, int len, int encoding, bool need_escape
 
 	lex->input = lex->token_terminator = lex->line_start = json;
 	lex->line_number = 1;
+	lex->last_linestart = 0;
 	lex->input_length = len;
 	lex->input_encoding = encoding;
 	if (need_escapes)
@@ -536,7 +537,10 @@ json_lex(JsonLexContext *lex)
 		   (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
 	{
 		if (*s == '\n')
+		{
 			++lex->line_number;
+			lex->last_linestart = len + 1;
+		}
 		++s;
 		++len;
 	}
diff --git a/src/include/common/jsonapi.h b/src/include/common/jsonapi.h
index 03331f6d13..6e4cb12275 100644
--- a/src/include/common/jsonapi.h
+++ b/src/include/common/jsonapi.h
@@ -79,6 +79,7 @@ typedef struct JsonLexContext
 	char	   *prev_token_terminator;
 	JsonTokenType token_type;
 	int			lex_level;
+	int			last_linestart;
 	int			line_number;
 	char	   *line_start;
 	StringInfo	strval;
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index c4156cf2a6..ff15acf3c0 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -272,6 +272,42 @@ LINE 1: SELECT '    '::json;
                ^
 DETAIL:  The input string ended unexpectedly.
 CONTEXT:  JSON data, line 1:     
+-- Multi-line JSON input to check ERROR reporting
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":"two",
+		"three":
+			true}'::jsonb); -- OK
+   jsonb_pretty    
+-------------------
+ {                +
+     "one": 1,    +
+     "two": "two",+
+     "three": true+
+ }
+(1 row)
+
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":,"two", -- ERROR extraneous comma before field "two" value, line 3
+		"three":
+			true}'::jsonb);
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT jsonb_pretty('{
+                            ^
+DETAIL:  Expected JSON value, but found ",".
+CONTEXT:  JSON data, line 3: 		"two":,...
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":"two",
+		"three":
+			}'::jsonb); -- ERROR missing value for field "three", line 5
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT jsonb_pretty('{
+                            ^
+DETAIL:  Expected JSON value, but found "}".
+CONTEXT:  JSON data, line 5: 			}
+-- ERROR is on line 5 because a LF \n is legal whitespace at end of line 4, so the missing syntax is on line 5
 --constructors
 -- array_to_json
 SELECT array_to_json(array(select 1 as a));
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 20354f04e3..e15697c95a 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -59,6 +59,24 @@ SELECT 'trues'::json;			-- ERROR, not a keyword
 SELECT ''::json;				-- ERROR, no value
 SELECT '    '::json;			-- ERROR, no value
 
+-- Multi-line JSON input to check ERROR reporting
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":"two",
+		"three":
+			true}'::jsonb); -- OK
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":,"two", -- ERROR extraneous comma before field "two" value, line 3
+		"three":
+			true}'::jsonb);
+SELECT jsonb_pretty('{
+		"one": 1,
+		"two":"two",
+		"three":
+			}'::jsonb); -- ERROR missing value for field "three", line 5
+-- ERROR is on line 5 because a LF \n is legal whitespace at end of line 4, so the missing syntax is on line 5
+
 --constructors
 -- array_to_json

Hamid Akhtar

hamid.akhtar@gmail.com

almost 5 years ago

In reply to: Hamid Akhtar (#5)

Re: Bug in error reporting for multi-line JSON

Updated the patch based on feedback.

The new status of this patch is: Needs review

Daniel Gustafsson

daniel@yesql.se

almost 5 years ago

In reply to: Hamid Akhtar (#5)

Re: Bug in error reporting for multi-line JSON

On 27 Feb 2021, at 22:19, Hamid Akhtar <hamid.akhtar@gmail.com> wrote:

This thread has been inactive for more than a month now.

So, I have reworked Simon's patch and incorporated Tom's feedback. The changes include:
- Changing the variable name from "pos_last_linefeed" to "last_linestart" as it now points to the character after the newline character,
- The "for" loop in report_json_context function has been significantly simplified and uses a while loop.

The attached patch is created against the current master branch.

The updated version address the review comments, and pass all tests with no
documentation updates required. Playing around with I was also unable to break
it.

I'm changing the status of this patch to Ready for Committer.

--
Daniel Gustafsson https://vmware.com/

Tom Lane

tgl@sss.pgh.pa.us

almost 5 years ago

In reply to: Daniel Gustafsson (#7)

Re: Bug in error reporting for multi-line JSON

Daniel Gustafsson <daniel@yesql.se> writes:

I'm changing the status of this patch to Ready for Committer.

I reviewed this and pushed it, with some changes.

I noted that there was a basically unused "line_start" field in
JsonLexContext, which seems clearly to have been meant to track
what the new field was going to track. So we can fix this without
any new field by updating that at the right times.

I thought putting jsonb tests into json.sql was a bit poorly
thought out. I ended up adding parallel tests to both json.sql
and jsonb.sql ... maybe that's overkill, but a lot of the rest
of those scripts is duplicative too. The tests weren't exercising
the dots-at-start-of-line behavior, either.

regards, tom lane

tanghy.fnst@fujitsu.com

over 4 years ago

In reply to: Tom Lane (#8)

RE: Bug in error reporting for multi-line JSON

On Tuesday, March 2, 2021 6:50 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Daniel Gustafsson <daniel@yesql.se> writes:

I'm changing the status of this patch to Ready for Committer.

I reviewed this and pushed it, with some changes.

I think the while condition "context_start < context_end" added in commit ffd3944ab9 is useless. Thoughts?

code added in ffd3944ab9
+ while (context_end - context_start >= 50 && context_start < context_end)

Regards,
Tang

#10

Daniel Gustafsson

daniel@yesql.se

over 4 years ago

In reply to: tanghy.fnst@fujitsu.com (#9)

Re: Bug in error reporting for multi-line JSON

On 25 Aug 2021, at 10:22, tanghy.fnst@fujitsu.com wrote:

On Tuesday, March 2, 2021 6:50 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Daniel Gustafsson <daniel@yesql.se> writes:

I'm changing the status of this patch to Ready for Committer.

I reviewed this and pushed it, with some changes.

I think the while condition "context_start < context_end" added in commit ffd3944ab9 is useless. Thoughts?

code added in ffd3944ab9
+ while (context_end - context_start >= 50 && context_start < context_end)

Judging by the diff it’s likely a leftover from the previous coding. I don’t
see a case for when it would hit, but it also doesn’t seem to do any harm apart
from potentially causing static analyzers to get angry.

--
Daniel Gustafsson https://vmware.com/

#11

Tom Lane

tgl@sss.pgh.pa.us

over 4 years ago

In reply to: Daniel Gustafsson (#10)

Re: Bug in error reporting for multi-line JSON

Daniel Gustafsson <daniel@yesql.se> writes:

On 25 Aug 2021, at 10:22, tanghy.fnst@fujitsu.com wrote:
I think the while condition "context_start < context_end" added in commit ffd3944ab9 is useless. Thoughts?

Judging by the diff it’s likely a leftover from the previous coding. I don’t
see a case for when it would hit, but it also doesn’t seem to do any harm apart
from potentially causing static analyzers to get angry.

Yeah. I think that while reviewing this patch I read the while-condition
as a range check on context_start, but it isn't --- both inequalities
are in the same direction. I suppose there could be some quibble
about what happens if context_end - context_start is so large as to
overflow an integer, but that's never gonna happen (and if it did,
we'd have other issues, for instance the lack of any check-for-interrupt
in this loop).

Will fix.

regards, tom lane