From fac98cad7a8dcb1354dd305c55372699d34659fe Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.gluhov@postgrespro.ru>
Date: Fri, 22 Mar 2019 15:15:38 +0300
Subject: [PATCH] Fix parsing of identifiers in jsonpath

---
 src/backend/utils/adt/jsonpath.c       |  11 +-
 src/backend/utils/adt/jsonpath_gram.y  |   6 +-
 src/backend/utils/adt/jsonpath_scan.l  | 146 +++++++++++-----------
 src/test/regress/expected/jsonpath.out | 162 ++++++++++++++++++++++---
 src/test/regress/sql/jsonpath.sql      |  27 +++++
 5 files changed, 252 insertions(+), 100 deletions(-)

diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c
index e683cbef7c6..20c400a43cd 100644
--- a/src/backend/utils/adt/jsonpath.c
+++ b/src/backend/utils/adt/jsonpath.c
@@ -496,9 +496,14 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 			escape_json(buf, jspGetString(v, NULL));
 			break;
 		case jpiVariable:
-			appendStringInfoChar(buf, '$');
-			escape_json(buf, jspGetString(v, NULL));
-			break;
+			{
+				int32		len;
+				char	   *name = jspGetString(v, &len);
+
+				appendStringInfoChar(buf, '$');
+				appendBinaryStringInfo(buf, name, len);
+				break;
+			}
 		case jpiNumeric:
 			appendStringInfoString(buf,
 								   DatumGetCString(DirectFunctionCall1(numeric_out,
diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y
index 252f7051f65..ca47b96cb0e 100644
--- a/src/backend/utils/adt/jsonpath_gram.y
+++ b/src/backend/utils/adt/jsonpath_gram.y
@@ -348,8 +348,10 @@ makeItemVariable(JsonPathString *s)
 	JsonPathParseItem  *v;
 
 	v = makeItemType(jpiVariable);
-	v->value.string.val = s->val;
-	v->value.string.len = s->len;
+
+	/* skip leading '$' */
+	v->value.string.val = &s->val[1];
+	v->value.string.len = s->len - 1;
 
 	return v;
 }
diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l
index 9650226f507..65d3626d9ac 100644
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@@ -20,6 +20,8 @@
 #include "mb/pg_wchar.h"
 #include "nodes/pg_list.h"
 
+#define JSONPATH_SPECIAL_CHARS "?%.[]{}()|&!=<>@#,*:-+/~`;\\\"' \b\f\n\r\t\v"
+
 static JsonPathString scanstring;
 
 /* Handles to the buffer that the lexer uses internally */
@@ -63,20 +65,21 @@ fprintf_to_ereport(const char *fmt, const char *msg)
  * quoted variable names and C-style comments.
  * Exclusive states:
  *  <xq> - quoted strings
- *  <xnq> - non-quoted strings
- *  <xvq> - quoted variable names
+ *  <xnq> - non-quoted identifiers
  *  <xc> - C-style comment
  */
 
 %x xq
 %x xnq
-%x xvq
 %x xc
 
-special		[\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
-blank		[ \t\n\r\f]
-/* "other" means anything that's not special, blank, or '\' or '"' */
-other		[^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\" \t\n\r\f]
+special		[\?\%\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\~\`\;\']
+blank		[ \b\f\n\r\t\v]
+/* "id_start" means anything that's not special, blank, digit, or '\', or '"' */
+id_start	[^\?\%\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\~\`\;\'\\\" \b\f\n\r\t\v(0-9)]
+
+id_cont		({id_start}|[0-9])
+id			{id_start}{id_cont}*
 
 digit		[0-9]
 integer		(0|[1-9]{digit}*)
@@ -94,79 +97,65 @@ hex_fail	\\x{hex_dig}{0,1}
 
 %%
 
-<xnq>{other}+					{
-									addstring(false, yytext, yyleng);
-								}
-
-<xnq>{blank}+					{
-									yylval->str = scanstring;
-									BEGIN INITIAL;
-									return checkKeyword();
-								}
-
-<xnq>\/\*						{
-									yylval->str = scanstring;
-									BEGIN xc;
-								}
-
-<xnq>({special}|\")				{
-									yylval->str = scanstring;
-									yyless(0);
-									BEGIN INITIAL;
-									return checkKeyword();
-								}
-
-<xnq><<EOF>>					{
-									yylval->str = scanstring;
-									BEGIN INITIAL;
-									return checkKeyword();
-								}
+<xq>\\b							{ addchar(false, '\b'); }
 
-<xnq,xq,xvq>\\b				{ addchar(false, '\b'); }
+<xq>\\f							{ addchar(false, '\f'); }
 
-<xnq,xq,xvq>\\f				{ addchar(false, '\f'); }
+<xq>\\n							{ addchar(false, '\n'); }
 
-<xnq,xq,xvq>\\n				{ addchar(false, '\n'); }
+<xq>\\r							{ addchar(false, '\r'); }
 
-<xnq,xq,xvq>\\r				{ addchar(false, '\r'); }
+<xq>\\t							{ addchar(false, '\t'); }
 
-<xnq,xq,xvq>\\t				{ addchar(false, '\t'); }
+<xq>\\v							{ addchar(false, '\v'); }
 
-<xnq,xq,xvq>\\v				{ addchar(false, '\v'); }
+<xq>{hex_char}					{ parseHexChar(yytext); }
 
-<xnq,xq,xvq>{unicode}+		{ parseUnicode(yytext, yyleng); }
+<xq>{hex_fail}					{ yyerror(NULL, "invalid hex character sequence"); }
 
-<xnq,xq,xvq>{hex_char}		{ parseHexChar(yytext); }
+<xnq,xq>{unicode}+				{ parseUnicode(yytext, yyleng); }
 
-<xnq,xq,xvq>{unicode}*{unicodefail}	{ yyerror(NULL, "invalid unicode sequence"); }
+<INITIAL,xq,xnq>{unicode}*{unicodefail}	{ yyerror(NULL, "invalid unicode sequence"); }
 
-<xnq,xq,xvq>{hex_fail}		{ yyerror(NULL, "invalid hex character sequence"); }
+<INITIAL,xq,xnq>{unicode}+\\	{
+									/* throw back the \\, and treat as unicode */
+									yyless(yyleng - 1);
+									parseUnicode(yytext, yyleng);
+								}
 
-<xnq,xq,xvq>{unicode}+\\	{
-								/* throw back the \\, and treat as unicode */
-								yyless(yyleng - 1);
-								parseUnicode(yytext, yyleng);
-							}
+<xq>\\.							{ addchar(false, yytext[1]); }
 
-<xnq,xq,xvq>\\.				{ addchar(false, yytext[1]); }
+<xnq>\\.?						{ yyerror(NULL, "escape sequence is invalid"); }
 
-<xnq,xq,xvq>\\				{ yyerror(NULL, "unexpected end after backslash"); }
+<xnq>{id_cont}+					{ addstring(false, yytext, yyleng); }
 
-<xq,xvq><<EOF>>				{ yyerror(NULL, "unexpected end of quoted string"); }
+<xnq>({special}|{blank}|\")	{
+									/* throw back the special symbol and return id */
+									yyless(yyleng - 1);
+									addchar(false, '\0');
+									yylval->str = scanstring;
+									BEGIN INITIAL;
+									return checkKeyword();
+								}
 
-<xq>\"							{
+<xnq><<EOF>>					{
+									addchar(false, '\0');
 									yylval->str = scanstring;
 									BEGIN INITIAL;
-									return STRING_P;
+									return checkKeyword();
 								}
 
-<xvq>\"							{
+<xq>\"							{
 									yylval->str = scanstring;
 									BEGIN INITIAL;
-									return VARIABLE_P;
+									return STRING_P;
 								}
 
-<xq,xvq>[^\\\"]+				{ addstring(false, yytext, yyleng); }
+<xq>[^\\\"]+					{ addstring(false, yytext, yyleng); }
+
+<xq>\\							{ yyerror(NULL, "unexpected end of quoted string"); }
+<xq><<EOF>>						{ yyerror(NULL, "unexpected end of quoted string"); }
+
 
 <xc>\*\/						{ BEGIN INITIAL; }
 
@@ -176,6 +165,7 @@ hex_fail	\\x{hex_dig}{0,1}
 
 <xc><<EOF>>						{ yyerror(NULL, "unexpected end of comment"); }
 
+
 \&\&							{ return AND_P; }
 
 \|\|							{ return OR_P; }
@@ -198,17 +188,7 @@ hex_fail	\\x{hex_dig}{0,1}
 
 \>								{ return GREATER_P; }
 
-\${other}+						{
-									addstring(true, yytext + 1, yyleng - 1);
-									addchar(false, '\0');
-									yylval->str = scanstring;
-									return VARIABLE_P;
-								}
-
-\$\"							{
-									addchar(true, '\0');
-									BEGIN xvq;
-								}
+\'								{ yyerror(NULL, "unexpected single quote character"); }
 
 {special}						{ return *yytext; }
 
@@ -251,22 +231,25 @@ hex_fail	\\x{hex_dig}{0,1}
 
 ({realfail1}|{realfail2})		{ yyerror(NULL, "invalid floating point number"); }
 
-\"								{
-									addchar(true, '\0');
-									BEGIN xq;
-								}
 
-\\								{
-									yyless(0);
-									addchar(true, '\0');
+{id_start}{id_cont}*			{
+									addstring(true, yytext, yyleng);
 									BEGIN xnq;
 								}
 
-{other}+						{
-									addstring(true, yytext, yyleng);
+{unicode}+						{
+									addstring(true, "", 0);
+									parseUnicode(yytext, yyleng);
 									BEGIN xnq;
 								}
 
+\"								{
+									addchar(true, '\0');
+									BEGIN xq;
+								}
+
+\\								{ yyerror(NULL, "invalid escape sequence"); }
+
 <<EOF>>							{ yyterminate(); }
 
 %%
@@ -338,6 +321,12 @@ checkKeyword()
 						   *StopHigh = keywords + lengthof(keywords),
 						   *StopMiddle;
 
+	if (strcspn(scanstring.val, JSONPATH_SPECIAL_CHARS) < scanstring.len)
+		jsonpath_yyerror(NULL, "invalid characters in identifier");
+
+	if (scanstring.val[0] == '$')
+		return scanstring.len == 1 ? '$' : VARIABLE_P;
+
 	if (scanstring.len > keywords[lengthof(keywords) - 1].len)
 		return res;
 
@@ -588,6 +577,9 @@ parseUnicode(char *s, int l)
 			while (s[++i] != '}' && i < l)
 				ch = (ch << 4) | hexval(s[i]);
 			i++;	/* skip '}' */
+
+			if (ch > 0x10FFFF)
+				jsonpath_yyerror(NULL, "invalid Unicode escape value");
 		}
 		else		/* parse '\uXXXX' */
 		{
diff --git a/src/test/regress/expected/jsonpath.out b/src/test/regress/expected/jsonpath.out
index e399fa96312..ae6baed9bf0 100644
--- a/src/test/regress/expected/jsonpath.out
+++ b/src/test/regress/expected/jsonpath.out
@@ -178,17 +178,101 @@ select '"\x50\u0067\u{53}\u{051}\u{00004C}"'::jsonpath;
 (1 row)
 
 select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::jsonpath;
+ERROR:  escape sequence is invalid at or near "\x" of jsonpath input
+LINE 1: select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::json...
+               ^
+select '"\z"'::jsonpath;  -- unrecognized escape is just the literal char
+ jsonpath 
+----------
+ "z"
+(1 row)
+
+select '$."foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar"'::jsonpath;
       jsonpath       
 ---------------------
  $."fooPgSQL\t\"bar"
 (1 row)
 
-select '"\z"'::jsonpath;  -- unrecognized escape is just the literal char
+select '$.\u12345.a\u1234.b\u{12}34'::jsonpath;
+        jsonpath         
+-------------------------
+ $."á´5"."aá´"."b\u001234"
+(1 row)
+
+select '$.\u123'::jsonpath;
+ERROR:  invalid unicode sequence at or near "\u123" of jsonpath input
+LINE 1: select '$.\u123'::jsonpath;
+               ^
+select '$.\u{}'::jsonpath;
+ERROR:  invalid unicode sequence at or near "\u{" of jsonpath input
+LINE 1: select '$.\u{}'::jsonpath;
+               ^
+select '$.\u{1}'::jsonpath;
+  jsonpath  
+------------
+ $."\u0001"
+(1 row)
+
+select '$.\u{20}'::jsonpath;
+ERROR:  invalid characters in identifier at end of jsonpath input
+LINE 1: select '$.\u{20}'::jsonpath;
+               ^
+select '$."\u{20}"'::jsonpath;
  jsonpath 
 ----------
- "z"
+ $." "
+(1 row)
+
+select '$.\u{21}'::jsonpath;
+ERROR:  invalid characters in identifier at end of jsonpath input
+LINE 1: select '$.\u{21}'::jsonpath;
+               ^
+select '$."\u{21}"'::jsonpath;
+ jsonpath 
+----------
+ $."!"
+(1 row)
+
+select '$.\u{12345}'::jsonpath;
+ jsonpath 
+----------
+ $."ð"
+(1 row)
+
+select '$.\u{10FFFF}'::jsonpath;
+ jsonpath 
+----------
+ $.""
 (1 row)
 
+select '$.\u{110000}'::jsonpath;
+ERROR:  invalid Unicode escape value at or near "\u{110000}" of jsonpath input
+LINE 1: select '$.\u{110000}'::jsonpath;
+               ^
+select '$.\x123'::jsonpath;
+ERROR:  invalid escape sequence at or near "\" of jsonpath input
+LINE 1: select '$.\x123'::jsonpath;
+               ^
+select '$.foo\'::jsonpath;
+ERROR:  escape sequence is invalid at or near "\" of jsonpath input
+LINE 1: select '$.foo\'::jsonpath;
+               ^
+select '"foo'::jsonpath;
+ERROR:  unexpected end of quoted string at end of jsonpath input
+LINE 1: select '"foo'::jsonpath;
+               ^
+select '$.foo"bar'::jsonpath;
+ERROR:  unexpected end of quoted string at end of jsonpath input
+LINE 1: select '$.foo"bar'::jsonpath;
+               ^
+select '"foo\'::jsonpath;
+ERROR:  unexpected end of quoted string at or near "\" of jsonpath input
+LINE 1: select '"foo\'::jsonpath;
+               ^
+select '"foo\\'::jsonpath;
+ERROR:  unexpected end of quoted string at end of jsonpath input
+LINE 1: select '"foo\\'::jsonpath;
+               ^
 select '$.g ? ($.a == 1)'::jsonpath;
       jsonpath      
 --------------------
@@ -276,25 +360,59 @@ select '$.g ? (+@.x >= +-(+@.a + 2))'::jsonpath;
 select '$a'::jsonpath;
  jsonpath 
 ----------
- $"a"
+ $a
+(1 row)
+
+select '$_'::jsonpath;
+ jsonpath 
+----------
+ $_
+(1 row)
+
+select '$123'::jsonpath;
+ jsonpath 
+----------
+ $123
+(1 row)
+
+select '$$$'::jsonpath;
+ jsonpath 
+----------
+ $$$
+(1 row)
+
+select '$_$$1_3a'::jsonpath;
+ jsonpath 
+----------
+ $_$$1_3a
+(1 row)
+
+select '$\u12345'::jsonpath;
+ jsonpath 
+----------
+ $á´5
 (1 row)
 
+select '$\u{20}'::jsonpath;
+ERROR:  invalid characters in identifier at end of jsonpath input
+LINE 1: select '$\u{20}'::jsonpath;
+               ^
 select '$a.b'::jsonpath;
  jsonpath 
 ----------
- $"a"."b"
+ $a."b"
 (1 row)
 
 select '$a[*]'::jsonpath;
  jsonpath 
 ----------
- $"a"[*]
+ $a[*]
 (1 row)
 
 select '$.g ? (@.zip == $zip)'::jsonpath;
-         jsonpath          
----------------------------
- $."g"?(@."zip" == $"zip")
+        jsonpath         
+-------------------------
+ $."g"?(@."zip" == $zip)
 (1 row)
 
 select '$.a[1,2, 3 to 16]'::jsonpath;
@@ -304,9 +422,9 @@ select '$.a[1,2, 3 to 16]'::jsonpath;
 (1 row)
 
 select '$.a[$a + 1, ($b[*]) to -($[0] * 2)]'::jsonpath;
-                jsonpath                
-----------------------------------------
- $."a"[$"a" + 1,$"b"[*] to -($[0] * 2)]
+              jsonpath              
+------------------------------------
+ $."a"[$a + 1,$b[*] to -($[0] * 2)]
 (1 row)
 
 select '$.a[$.a.size() - 3]'::jsonpath;
@@ -414,9 +532,9 @@ select '$ ? (@ starts with "abc")'::jsonpath;
 (1 row)
 
 select '$ ? (@ starts with $var)'::jsonpath;
-         jsonpath         
---------------------------
- $?(@ starts with $"var")
+        jsonpath        
+------------------------
+ $?(@ starts with $var)
 (1 row)
 
 select '$ ? (@ like_regex "(invalid pattern")'::jsonpath;
@@ -487,9 +605,9 @@ select '$ < 1'::jsonpath;
 (1 row)
 
 select '($ < 1) || $.a.b <= $x'::jsonpath;
-           jsonpath           
-------------------------------
- ($ < 1 || $."a"."b" <= $"x")
+          jsonpath          
+----------------------------
+ ($ < 1 || $."a"."b" <= $x)
 (1 row)
 
 select '@ + 1'::jsonpath;
@@ -821,9 +939,17 @@ select '0'::jsonpath;
 (1 row)
 
 select '00'::jsonpath;
-ERROR:  syntax error, unexpected IDENT_P at end of jsonpath input
+ERROR:  syntax error, unexpected INT_P, expecting $end at or near "0" of jsonpath input
 LINE 1: select '00'::jsonpath;
                ^
+select '$.00'::jsonpath;
+ERROR:  syntax error, unexpected INT_P at or near "0" of jsonpath input
+LINE 1: select '$.00'::jsonpath;
+               ^
+select '$.0a'::jsonpath;
+ERROR:  syntax error, unexpected INT_P at or near "0" of jsonpath input
+LINE 1: select '$.0a'::jsonpath;
+               ^
 select '0.0'::jsonpath;
  jsonpath 
 ----------
diff --git a/src/test/regress/sql/jsonpath.sql b/src/test/regress/sql/jsonpath.sql
index 17ab7757831..f8782378cb6 100644
--- a/src/test/regress/sql/jsonpath.sql
+++ b/src/test/regress/sql/jsonpath.sql
@@ -33,6 +33,25 @@ select '"\b\f\r\n\t\v\"\''\\"'::jsonpath;
 select '"\x50\u0067\u{53}\u{051}\u{00004C}"'::jsonpath;
 select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::jsonpath;
 select '"\z"'::jsonpath;  -- unrecognized escape is just the literal char
+select '$."foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar"'::jsonpath;
+select '$.\u12345.a\u1234.b\u{12}34'::jsonpath;
+select '$.\u123'::jsonpath;
+select '$.\u{}'::jsonpath;
+select '$.\u{1}'::jsonpath;
+select '$.\u{20}'::jsonpath;
+select '$."\u{20}"'::jsonpath;
+select '$.\u{21}'::jsonpath;
+select '$."\u{21}"'::jsonpath;
+select '$.\u{12345}'::jsonpath;
+select '$.\u{10FFFF}'::jsonpath;
+select '$.\u{110000}'::jsonpath;
+select '$.\x123'::jsonpath;
+select '$.foo\'::jsonpath;
+
+select '"foo'::jsonpath;
+select '$.foo"bar'::jsonpath;
+select '"foo\'::jsonpath;
+select '"foo\\'::jsonpath;
 
 select '$.g ? ($.a == 1)'::jsonpath;
 select '$.g ? (@ == 1)'::jsonpath;
@@ -50,6 +69,12 @@ select '$.g ? ((@.x >= 123 || @.a == 4) && exists (@.x ? (@ == 14)))'::jsonpath;
 select '$.g ? (+@.x >= +-(+@.a + 2))'::jsonpath;
 
 select '$a'::jsonpath;
+select '$_'::jsonpath;
+select '$123'::jsonpath;
+select '$$$'::jsonpath;
+select '$_$$1_3a'::jsonpath;
+select '$\u12345'::jsonpath;
+select '$\u{20}'::jsonpath;
 select '$a.b'::jsonpath;
 select '$a[*]'::jsonpath;
 select '$.g ? (@.zip == $zip)'::jsonpath;
@@ -154,6 +179,8 @@ select '$ ? (@.a < +10.1e+1)'::jsonpath;
 
 select '0'::jsonpath;
 select '00'::jsonpath;
+select '$.00'::jsonpath;
+select '$.0a'::jsonpath;
 select '0.0'::jsonpath;
 select '0.000'::jsonpath;
 select '0.000e1'::jsonpath;
-- 
2.17.1

