From ea278dee92616f546bb9636bc4c0f7e36fba8136 Mon Sep 17 00:00:00 2001 From: Alexandra Wang Date: Thu, 15 Aug 2024 02:11:33 -0700 Subject: [PATCH v4] Add JSON/JSONB simplified accessor This patch implements JSON/JSONB member accessor and JSON/JSONB array accessor as specified in SQL 2023. Specifically, the following sytaxes are added: 1. Simple dot-notation access to JSON and JSONB object fields 2. Subscripting for indexed access to JSON array elements Examples: -- Setup create table t(x int, y json); insert into t select 1, '{"a": 1, "b": 42}'::json; insert into t select 1, '{"a": 2, "b": {"c": 42}}'::json; insert into t select 1, '{"a": 3, "b": {"c": "42"}, "d":[11, 12]}'::json; -- Existing syntax predates the SQL standard: select (t.y)->'b' from t; select (t.y)->'b'->'c' from t; select (t.y)->'d'->0 from t; -- JSON simplified accessor specified by the SQL standard: select (t.y).b from t; select (t.y).b.c from t; select (t.y).d[0] from t; The SQL standard states that simplified access is equivalent to: JSON_QUERY (VEP, 'lax $.JC' WITH CONDITIONAL ARRAY WRAPPER NULL ON EMPTY NULL ON ERROR) where VEP is the and JC is the . For example, the JSON_QUERY equalalence of the above queries is: select json_query(y, 'lax $.b' WITH CONDITIONAL ARRAY WRAPPER NULL ON EMPTY NULL ON ERROR) from t; select json_query(y, 'lax $.b.c' WITH CONDITIONAL ARRAY WRAPPER NULL ON EMPTY NULL ON ERROR) from t; select json_query(y, 'lax $.d[0]' WITH CONDITIONAL ARRAY WRAPPER NULL ON EMPTY NULL ON ERROR) from t; This implementation enables dot-notation access to JSON/JSONB object by making a syntatic sugar for the json_object_field "->" operator in ParseFuncOrColumn() for arg of JSON/JSONB type. Similarly, JSON array access via subscripting is enabled by creating an OpExpr for the existing "->" operator. Note that the JSON subscripting implementation is different from the JSONB subscripting counterpart, as the former leverages the "->" operator directly, while the latter uses the more generic SubscriptingRef interface. --- src/backend/parser/parse_expr.c | 8 ++- src/backend/parser/parse_func.c | 98 +++++++++++++++++++++++-- src/include/catalog/pg_operator.dat | 6 +- src/include/parser/parse_func.h | 3 + src/include/parser/parse_type.h | 1 + src/test/regress/expected/json.out | 107 ++++++++++++++++++++++++++++ src/test/regress/expected/jsonb.out | 86 ++++++++++++++++++++++ src/test/regress/sql/json.sql | 25 +++++++ src/test/regress/sql/jsonb.sql | 22 ++++++ 9 files changed, 347 insertions(+), 9 deletions(-) diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 36c1b7a88f..c55582cde5 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -453,7 +453,13 @@ transformIndirection(ParseState *pstate, A_Indirection *ind) Node *n = lfirst(i); if (IsA(n, A_Indices)) - subscripts = lappend(subscripts, n); + if (exprType(result) == JSONOID) + result = ParseJsonSimplifiedAccessorArrayElement(pstate, + castNode(A_Indices, n), + result, + location); + else + subscripts = lappend(subscripts, n); else if (IsA(n, A_Star)) { ereport(ERROR, diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c index 9b23344a3b..a13c001dd4 100644 --- a/src/backend/parser/parse_func.c +++ b/src/backend/parser/parse_func.c @@ -33,6 +33,8 @@ #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/syscache.h" +#include "parser/parse_oper.h" +#include "catalog/pg_operator_d.h" /* Possible error codes from LookupFuncNameInternal */ @@ -48,6 +50,8 @@ static void unify_hypothetical_args(ParseState *pstate, static Oid FuncNameAsType(List *funcname); static Node *ParseComplexProjection(ParseState *pstate, const char *funcname, Node *first_arg, int location); +static Node *ParseJsonSimplifiedAccessorObjectField(ParseState *pstate, const char *funcname, + Node *first_arg, int location); static Oid LookupFuncNameInternal(ObjectType objtype, List *funcname, int nargs, const Oid *argtypes, bool include_out_arguments, bool missing_ok, @@ -226,17 +230,24 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, !func_variadic && argnames == NIL && list_length(funcname) == 1 && (actual_arg_types[0] == RECORDOID || - ISCOMPLEX(actual_arg_types[0]))); + ISCOMPLEX(actual_arg_types[0]) || + ISJSON(actual_arg_types[0]))); /* * If it's column syntax, check for column projection case first. */ if (could_be_projection && is_column) { - retval = ParseComplexProjection(pstate, - strVal(linitial(funcname)), - first_arg, - location); + if (ISJSON(actual_arg_types[0])) + retval = ParseJsonSimplifiedAccessorObjectField(pstate, + strVal(linitial(funcname)), + first_arg, + location); + else + retval = ParseComplexProjection(pstate, + strVal(linitial(funcname)), + first_arg, + location); if (retval) return retval; @@ -1902,6 +1913,83 @@ FuncNameAsType(List *funcname) return result; } +/* + * ParseJsonSimplifiedAccessorArrayElement - + * transform json subscript into json_array_element operator. + */ +Node * +ParseJsonSimplifiedAccessorArrayElement(ParseState *pstate, A_Indices *subscript, + Node *first_arg, int location) +{ + OpExpr *result; + Node *index; + + Assert(exprType(first_arg) == JSONOID); + + if (subscript->is_slice) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("json subscript does not support slices")), + parser_errposition(pstate, location)); + + index = transformExpr(pstate, subscript->uidx, pstate->p_expr_kind); + if (!IsA(index, Const) || + castNode(Const, index)->consttype != INT4OID) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("json subscript must be coercible to integer")), + parser_errposition(pstate, exprLocation(index))); + + result = makeNode(OpExpr); + result->opno = OID_JSON_ARRAY_ELEMENT_OP; + result->opresulttype = JSONOID; + result->opfuncid = get_opcode(result->opno); + result->args = list_make2(first_arg, index); + result->location = exprLocation(index); + + return (Node *) result; +} + +/* + * ParseJsonSimplifiedAccessorObjectField - + * handles function calls with a single argument that is of json type. + * If the function call is actually a column projection, return a suitably + * transformed expression tree. If not, return NULL. + */ +Node * +ParseJsonSimplifiedAccessorObjectField(ParseState *pstate, const char *funcname, + Node *first_arg, int location) +{ + OpExpr *result; + Node *rexpr; + + rexpr = (Node *) makeConst( + TEXTOID, + -1, + InvalidOid, + -1, + CStringGetTextDatum(funcname), + false, + false); + + result = makeNode(OpExpr); + if (exprType(first_arg) == JSONOID) + { + result->opno = OID_JSON_OBJECT_FIELD_OP; + result->opresulttype = JSONOID; + } + else + { + Assert(exprType(first_arg) == JSONBOID); + result->opno = OID_JSONB_OBJECT_FIELD_OP; + result->opresulttype = JSONBOID; + } + result->opfuncid = get_opcode(result->opno); + result->args = list_make2(first_arg, rexpr); + result->location = location; + return (Node *) result; +} + /* * ParseComplexProjection - * handles function calls with a single argument that is of complex type. diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat index 0e7511dde1..e375c49252 100644 --- a/src/include/catalog/pg_operator.dat +++ b/src/include/catalog/pg_operator.dat @@ -3154,13 +3154,13 @@ oprname => '*', oprleft => 'anyrange', oprright => 'anyrange', oprresult => 'anyrange', oprcom => '*(anyrange,anyrange)', oprcode => 'range_intersect' }, -{ oid => '3962', descr => 'get json object field', +{ oid => '3962', oid_symbol => 'OID_JSON_OBJECT_FIELD_OP', descr => 'get json object field', oprname => '->', oprleft => 'json', oprright => 'text', oprresult => 'json', oprcode => 'json_object_field' }, { oid => '3963', descr => 'get json object field as text', oprname => '->>', oprleft => 'json', oprright => 'text', oprresult => 'text', oprcode => 'json_object_field_text' }, -{ oid => '3964', descr => 'get json array element', +{ oid => '3964', oid_symbol => 'OID_JSON_ARRAY_ELEMENT_OP', descr => 'get json array element', oprname => '->', oprleft => 'json', oprright => 'int4', oprresult => 'json', oprcode => 'json_array_element' }, { oid => '3965', descr => 'get json array element as text', @@ -3172,7 +3172,7 @@ { oid => '3967', descr => 'get value from json as text with path elements', oprname => '#>>', oprleft => 'json', oprright => '_text', oprresult => 'text', oprcode => 'json_extract_path_text' }, -{ oid => '3211', descr => 'get jsonb object field', +{ oid => '3211', oid_symbol => 'OID_JSONB_OBJECT_FIELD_OP', descr => 'get jsonb object field', oprname => '->', oprleft => 'jsonb', oprright => 'text', oprresult => 'jsonb', oprcode => 'jsonb_object_field' }, { oid => '3477', descr => 'get jsonb object field as text', diff --git a/src/include/parser/parse_func.h b/src/include/parser/parse_func.h index c7ba99dee7..6b7759cbc7 100644 --- a/src/include/parser/parse_func.h +++ b/src/include/parser/parse_func.h @@ -35,6 +35,9 @@ extern Node *ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, Node *last_srf, FuncCall *fn, bool proc_call, int location); +extern Node *ParseJsonSimplifiedAccessorArrayElement(ParseState *pstate, A_Indices *subscript, + Node *first_arg, int location); + extern FuncDetailCode func_get_detail(List *funcname, List *fargs, List *fargnames, int nargs, Oid *argtypes, diff --git a/src/include/parser/parse_type.h b/src/include/parser/parse_type.h index b62e7a6ce9..9c8b3bfb2f 100644 --- a/src/include/parser/parse_type.h +++ b/src/include/parser/parse_type.h @@ -57,5 +57,6 @@ extern bool parseTypeString(const char *str, Oid *typeid_p, int32 *typmod_p, /* true if typeid is composite, or domain over composite, but not RECORD */ #define ISCOMPLEX(typeid) (typeOrDomainTypeRelid(typeid) != InvalidOid) +#define ISJSON(typeid) (typeid == JSONOID || typeid == JSONBOID) #endif /* PARSE_TYPE_H */ diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out index 96c40911cb..f6b7af3ecd 100644 --- a/src/test/regress/expected/json.out +++ b/src/test/regress/expected/json.out @@ -2716,3 +2716,110 @@ select ts_headline('[]'::json, tsquery('aaa & bbb')); [] (1 row) +-- simple dot notation +drop table if exists test_json_dot; +NOTICE: table "test_json_dot" does not exist, skipping +create table test_json_dot(id int, test_json json); +insert into test_json_dot select 1, '{"a": 1, "b": 42}'::json; +insert into test_json_dot select 1, '{"a": 2, "b": {"c": 42}}'::json; +insert into test_json_dot select 1, '{"a": 3, "b": {"c": "42"}, "d":[11, 12]}'::json; +insert into test_json_dot select 1, '{"a": 3, "b": {"c": "42"}, "d":[{"x": [11, 12]}, {"y": [21, 22]}]}'::json; +-- member object access +select (test_json_dot.test_json).b, json_query(test_json, 'lax $.b' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; + b | expected +-------------+------------- + 42 | 42 + {"c": 42} | {"c": 42} + {"c": "42"} | {"c": "42"} + {"c": "42"} | {"c": "42"} +(4 rows) + +select (test_json_dot.test_json).b.c, json_query(test_json, 'lax $.b.c' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; + c | expected +------+---------- + | + 42 | 42 + "42" | "42" + "42" | "42" +(4 rows) + +select (test_json_dot.test_json).d, json_query(test_json, 'lax $.d' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; + d | expected +------------------------------------+------------------------------------ + | + | + [11, 12] | [11, 12] + [{"x": [11, 12]}, {"y": [21, 22]}] | [{"x": [11, 12]}, {"y": [21, 22]}] +(4 rows) + +select (test_json_dot.test_json)."d", json_query(test_json, 'lax $.d' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; + d | expected +------------------------------------+------------------------------------ + | + | + [11, 12] | [11, 12] + [{"x": [11, 12]}, {"y": [21, 22]}] | [{"x": [11, 12]}, {"y": [21, 22]}] +(4 rows) + +select (test_json_dot.test_json).'d' from test_json_dot; +ERROR: syntax error at or near "'d'" +LINE 1: select (test_json_dot.test_json).'d' from test_json_dot; + ^ +select (test_json_dot.test_json)['d'] from test_json_dot; +ERROR: json subscript must be coercible to integer +LINE 1: select (test_json_dot.test_json)['d'] from test_json_dot; + ^ +-- array element access +select (test_json_dot.test_json).d->0 from test_json_dot; + ?column? +----------------- + + + 11 + {"x": [11, 12]} +(4 rows) + +select (test_json_dot.test_json).d[0], json_query(test_json, 'lax $.d[0]' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; + d | expected +-----------------+----------------- + | + | + 11 | 11 + {"x": [11, 12]} | {"x": [11, 12]} +(4 rows) + +select (test_json_dot.test_json).d[1], json_query(test_json, 'lax $.d[1]' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; + d | expected +-----------------+----------------- + | + | + 12 | 12 + {"y": [21, 22]} | {"y": [21, 22]} +(4 rows) + +select (test_json_dot.test_json).d[0:] from test_json_dot; +ERROR: json subscript does not support slices +LINE 1: select (test_json_dot.test_json).d[0:] from test_json_dot; + ^ +select (test_json_dot.test_json).d[0::int] from test_json_dot; + d +----------------- + + + 11 + {"x": [11, 12]} +(4 rows) + +select (test_json_dot.test_json).d[0::float] from test_json_dot; +ERROR: json subscript must be coercible to integer +LINE 1: select (test_json_dot.test_json).d[0::float] from test_json_... + ^ +select (test_json_dot.test_json).d[0].x[1], json_query(test_json, 'lax $.d[0].x[1]' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; + x | expected +----+---------- + | + | + | + 12 | 12 +(4 rows) + diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index 7d163a156e..954efe67b6 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -5715,3 +5715,89 @@ select '12345.0000000000000000000000000000000000000000000005'::jsonb::int8; 12345 (1 row) +-- simple dot notation +drop table if exists test_jsonb_dot; +NOTICE: table "test_jsonb_dot" does not exist, skipping +create table test_jsonb_dot(id int, test_jsonb jsonb); +insert into test_jsonb_dot select 1, '{"a": 1, "b": 42}'::json; +insert into test_jsonb_dot select 1, '{"a": 2, "b": {"c": 42}}'::json; +insert into test_jsonb_dot select 1, '{"a": 3, "b": {"c": "42"}, "d":[11, 12]}'::json; +-- member object access +select (test_jsonb_dot.test_jsonb).b from test_jsonb_dot; + b +------------- + 42 + {"c": 42} + {"c": "42"} +(3 rows) + +select (test_jsonb_dot.test_jsonb).b.c from test_jsonb_dot; + c +------ + + 42 + "42" +(3 rows) + +select (test_jsonb_dot.test_jsonb).d from test_jsonb_dot; + d +---------- + + + [11, 12] +(3 rows) + +select (test_jsonb_dot.test_jsonb)."d" from test_jsonb_dot; + d +---------- + + + [11, 12] +(3 rows) + +select (test_jsonb_dot.test_jsonb).'d' from test_jsonb_dot; +ERROR: syntax error at or near "'d'" +LINE 1: select (test_jsonb_dot.test_jsonb).'d' from test_jsonb_dot; + ^ +select (test_jsonb_dot.test_jsonb)['d'] from test_jsonb_dot; + test_jsonb +------------ + + + [11, 12] +(3 rows) + +-- array element access +select (test_jsonb_dot.test_jsonb).d[0] from test_jsonb_dot; + d +---- + + + 11 +(3 rows) + +select (test_jsonb_dot.test_jsonb).d[0:] from test_jsonb_dot; +ERROR: jsonb subscript does not support slices +LINE 1: select (test_jsonb_dot.test_jsonb).d[0:] from test_jsonb_dot... + ^ +select (test_jsonb_dot.test_jsonb).d[0::int] from test_jsonb_dot; + d +---- + + + 11 +(3 rows) + +select (test_jsonb_dot.test_jsonb).d[0::float] from test_jsonb_dot; +ERROR: subscript type double precision is not supported +LINE 1: select (test_jsonb_dot.test_jsonb).d[0::float] from test_jso... + ^ +HINT: jsonb subscript must be coercible to either integer or text. +select (test_jsonb_dot.test_jsonb).d[0].x[1] from test_jsonb_dot; + x +--- + + + +(3 rows) + diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql index 8251f4f400..21450c4991 100644 --- a/src/test/regress/sql/json.sql +++ b/src/test/regress/sql/json.sql @@ -869,3 +869,28 @@ select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": select ts_headline('null'::json, tsquery('aaa & bbb')); select ts_headline('{}'::json, tsquery('aaa & bbb')); select ts_headline('[]'::json, tsquery('aaa & bbb')); + +-- simple dot notation +drop table if exists test_json_dot; +create table test_json_dot(id int, test_json json); +insert into test_json_dot select 1, '{"a": 1, "b": 42}'::json; +insert into test_json_dot select 1, '{"a": 2, "b": {"c": 42}}'::json; +insert into test_json_dot select 1, '{"a": 3, "b": {"c": "42"}, "d":[11, 12]}'::json; +insert into test_json_dot select 1, '{"a": 3, "b": {"c": "42"}, "d":[{"x": [11, 12]}, {"y": [21, 22]}]}'::json; + +-- member object access +select (test_json_dot.test_json).b, json_query(test_json, 'lax $.b' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; +select (test_json_dot.test_json).b.c, json_query(test_json, 'lax $.b.c' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; +select (test_json_dot.test_json).d, json_query(test_json, 'lax $.d' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; +select (test_json_dot.test_json)."d", json_query(test_json, 'lax $.d' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; +select (test_json_dot.test_json).'d' from test_json_dot; +select (test_json_dot.test_json)['d'] from test_json_dot; + +-- array element access +select (test_json_dot.test_json).d->0 from test_json_dot; +select (test_json_dot.test_json).d[0], json_query(test_json, 'lax $.d[0]' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; +select (test_json_dot.test_json).d[1], json_query(test_json, 'lax $.d[1]' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; +select (test_json_dot.test_json).d[0:] from test_json_dot; +select (test_json_dot.test_json).d[0::int] from test_json_dot; +select (test_json_dot.test_json).d[0::float] from test_json_dot; +select (test_json_dot.test_json).d[0].x[1], json_query(test_json, 'lax $.d[0].x[1]' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index 5f0190d5a2..f095dc2bbe 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -1559,3 +1559,25 @@ select '12345.0000000000000000000000000000000000000000000005'::jsonb::float8; select '12345.0000000000000000000000000000000000000000000005'::jsonb::int2; select '12345.0000000000000000000000000000000000000000000005'::jsonb::int4; select '12345.0000000000000000000000000000000000000000000005'::jsonb::int8; + +-- simple dot notation +drop table if exists test_jsonb_dot; +create table test_jsonb_dot(id int, test_jsonb jsonb); +insert into test_jsonb_dot select 1, '{"a": 1, "b": 42}'::json; +insert into test_jsonb_dot select 1, '{"a": 2, "b": {"c": 42}}'::json; +insert into test_jsonb_dot select 1, '{"a": 3, "b": {"c": "42"}, "d":[11, 12]}'::json; + +-- member object access +select (test_jsonb_dot.test_jsonb).b from test_jsonb_dot; +select (test_jsonb_dot.test_jsonb).b.c from test_jsonb_dot; +select (test_jsonb_dot.test_jsonb).d from test_jsonb_dot; +select (test_jsonb_dot.test_jsonb)."d" from test_jsonb_dot; +select (test_jsonb_dot.test_jsonb).'d' from test_jsonb_dot; +select (test_jsonb_dot.test_jsonb)['d'] from test_jsonb_dot; + +-- array element access +select (test_jsonb_dot.test_jsonb).d[0] from test_jsonb_dot; +select (test_jsonb_dot.test_jsonb).d[0:] from test_jsonb_dot; +select (test_jsonb_dot.test_jsonb).d[0::int] from test_jsonb_dot; +select (test_jsonb_dot.test_jsonb).d[0::float] from test_jsonb_dot; +select (test_jsonb_dot.test_jsonb).d[0].x[1] from test_jsonb_dot; -- 2.39.5 (Apple Git-154)