From 38ff6429e456fe7bfac9664d49529c819fce7c87 Mon Sep 17 00:00:00 2001 From: Alexandra Wang Date: Thu, 15 Aug 2024 02:11:33 -0700 Subject: [PATCH v1] Add JSON/JSONB simplified accessor This patch implements JSON/JSONB member accessor and JSON/JSONB array accessor as specified in SQL 2023. Specifically, the following sytaxes are added: 1. Simple dot-notation access to JSON and JSONB object fields 2. Subscripting for indexed access to JSON array elements Examples: -- Setup create table t(x int, y json); insert into t select 1, '{"a": 1, "b": 42}'::json; insert into t select 1, '{"a": 2, "b": {"c": 42}}'::json; insert into t select 1, '{"a": 3, "b": {"c": "42"}, "d":[11, 12]}'::json; -- Existing syntax predates the SQL standard: select (t.y)->'b' from t; select (t.y)->'b'->'c' from t; select (t.y)->'d'->0 from t; -- JSON simplified accessor specified by the SQL standard: select (t.y).b from t; select (t.y).b.c from t; select (t.y).d[0] from t; The SQL standard states that simplified access is equivalent to: JSON_QUERY (VEP, 'lax $.JC' WITH CONDITIONAL ARRAY WRAPPER NULL ON EMPTY NULL ON ERROR) where VEP is the and JC is the . For example, the JSON_QUERY equalalence of the above queries is: select json_query(y, 'lax $.b' WITH CONDITIONAL ARRAY WRAPPER NULL ON EMPTY NULL ON ERROR) from t; select json_query(y, 'lax $.b.c' WITH CONDITIONAL ARRAY WRAPPER NULL ON EMPTY NULL ON ERROR) from t; select json_query(y, 'lax $.d[0]' WITH CONDITIONAL ARRAY WRAPPER NULL ON EMPTY NULL ON ERROR) from t; This implementation converts the "." and "[]" indirections into the corresponding JSON_QUERY during transformIndirection(). Note that the return type of JSON_QUERY is always JSONB, regardless of whether the original Var type was JSON or JSONB. Therefore, the simplified accessor currently behaves the same as JSON_QUERY. --- src/backend/parser/parse_expr.c | 69 +++++++++++++++++++++- src/backend/parser/parse_func.c | 2 + src/test/regress/expected/json.out | 90 +++++++++++++++++++++++++++++ src/test/regress/expected/jsonb.out | 68 ++++++++++++++++++++++ src/test/regress/sql/json.sql | 29 ++++++++++ src/test/regress/sql/jsonb.sql | 26 +++++++++ 6 files changed, 283 insertions(+), 1 deletion(-) diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 56e413da9f..2385e6e072 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -442,6 +442,8 @@ transformIndirection(ParseState *pstate, A_Indirection *ind) List *subscripts = NIL; int location = exprLocation(result); ListCell *i; + bool json_accessor = false; + StringInfoData jsonpath; /* * We have to split any field-selection operations apart from @@ -452,8 +454,37 @@ transformIndirection(ParseState *pstate, A_Indirection *ind) { Node *n = lfirst(i); - if (IsA(n, A_Indices)) + if (IsA(n, A_Indices)) { + if (!json_accessor && (exprType(result) == JSONOID)) { + json_accessor = true; + initStringInfo(&jsonpath); + appendStringInfoString(&jsonpath, " lax $"); + } + if (json_accessor) { + Node *subExpr; + + if (((A_Indices *)n)->is_slice || ((A_Indices *)n)->lidx) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("json simplified accessor does not support slices"), + parser_errposition(pstate, location))); + + Assert(((A_Indices *)n)->uidx); + subExpr = transformExpr(pstate, ((A_Indices *) n)->uidx, pstate->p_expr_kind); + if (exprType(subExpr) != INT4OID) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("json simplified accessor does not support subscripting of non-int4 type"), + parser_errposition(pstate, location))); + + appendStringInfoString(&jsonpath, "["); + appendStringInfo(&jsonpath, "%d", DatumGetInt32(((Const *) subExpr)->constvalue)); + appendStringInfoString(&jsonpath, "]"); + continue; + } + subscripts = lappend(subscripts, n); + } else if (IsA(n, A_Star)) { ereport(ERROR, @@ -465,6 +496,13 @@ transformIndirection(ParseState *pstate, A_Indirection *ind) { Node *newresult; + if (!json_accessor && ((exprType(result) == JSONOID) || + exprType(result) == JSONBOID)) { + json_accessor = true; + initStringInfo(&jsonpath); + appendStringInfoString(&jsonpath, " lax $"); + } + Assert(IsA(n, String)); /* process subscripts before this field selection */ @@ -477,6 +515,12 @@ transformIndirection(ParseState *pstate, A_Indirection *ind) false); subscripts = NIL; + if (json_accessor) { + appendStringInfoString(&jsonpath, "."); + appendStringInfoString(&jsonpath, strVal(n)); + continue; + } + newresult = ParseFuncOrColumn(pstate, list_make1(n), list_make1(result), @@ -484,11 +528,34 @@ transformIndirection(ParseState *pstate, A_Indirection *ind) NULL, false, location); + if (newresult == NULL) unknown_attribute(pstate, result, strVal(n), location); result = newresult; } } + + if (json_accessor) { + JsonFuncExpr *jfe; + JsonValueExpr *jve; + + jve = makeJsonValueExpr((Expr *) result, NULL, + makeJsonFormat(JS_FORMAT_DEFAULT, + JS_ENC_DEFAULT, + -1)); + jfe = makeNode(JsonFuncExpr); + jfe->op = JSON_QUERY_OP; + jfe->context_item = jve; + jfe->pathspec = makeStringConst(jsonpath.data, -1); + jfe->passing = NULL; + jfe->on_empty = makeJsonBehavior(JSON_BEHAVIOR_NULL, NULL, location); + jfe->on_error = makeJsonBehavior(JSON_BEHAVIOR_NULL, NULL, location); + jfe->wrapper = JSW_CONDITIONAL; + jfe->location = location; + + result = transformJsonFuncExpr(pstate, jfe); + } + /* process trailing subscripts, if any */ if (subscripts) result = (Node *) transformContainerSubscripts(pstate, diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c index 9b23344a3b..8d8f615e6d 100644 --- a/src/backend/parser/parse_func.c +++ b/src/backend/parser/parse_func.c @@ -33,6 +33,8 @@ #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/syscache.h" +#include "parser/parse_oper.h" +#include "utils/json.h" /* Possible error codes from LookupFuncNameInternal */ diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out index 7df11c2f38..d8cf5ce273 100644 --- a/src/test/regress/expected/json.out +++ b/src/test/regress/expected/json.out @@ -2713,3 +2713,93 @@ select ts_headline('[]'::json, tsquery('aaa & bbb')); [] (1 row) +-- simple dot notation +drop table if exists test_json_dot; +NOTICE: table "test_json_dot" does not exist, skipping +create table test_json_dot(id int, test_json json); +insert into test_json_dot select 1, '{"a": 1, "b": 42}'::json; +insert into test_json_dot select 1, '{"a": 2, "b": {"c": 42}}'::json; +insert into test_json_dot select 1, '{"a": 3, "b": {"c": "42"}, "d":[11, 12]}'::json; +insert into test_json_dot select 1, '{"a": 3, "b": {"c": "42"}, "d":[{"x": [11, 12]}, {"y": [21, 22]}]}'::json; +-- member object access +select (test_json_dot.test_json).b, json_query(test_json, 'lax $.b' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; + b | expected +-------------+------------- + [42] | [42] + {"c": 42} | {"c": 42} + {"c": "42"} | {"c": "42"} + {"c": "42"} | {"c": "42"} +(4 rows) + +select (test_json_dot.test_json).b.c, json_query(test_json, 'lax $.b.c' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; + c | expected +--------+---------- + | + [42] | [42] + ["42"] | ["42"] + ["42"] | ["42"] +(4 rows) + +select (test_json_dot.test_json).d, json_query(test_json, 'lax $.d' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; + d | expected +------------------------------------+------------------------------------ + | + | + [11, 12] | [11, 12] + [{"x": [11, 12]}, {"y": [21, 22]}] | [{"x": [11, 12]}, {"y": [21, 22]}] +(4 rows) + +select (test_json_dot.test_json)."d", json_query(test_json, 'lax $.d' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; + d | expected +------------------------------------+------------------------------------ + | + | + [11, 12] | [11, 12] + [{"x": [11, 12]}, {"y": [21, 22]}] | [{"x": [11, 12]}, {"y": [21, 22]}] +(4 rows) + +select (test_json_dot.test_json).'d' from test_json_dot; +ERROR: syntax error at or near "'d'" +LINE 1: select (test_json_dot.test_json).'d' from test_json_dot; + ^ +-- array element access +select (test_json_dot.test_json).d->0 from test_json_dot; + ?column? +----------------- + + + 11 + {"x": [11, 12]} +(4 rows) + +select (test_json_dot.test_json).d[0], json_query(test_json, 'lax $.d[0]' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; + d | expected +-----------------+----------------- + | + | + [11] | [11] + {"x": [11, 12]} | {"x": [11, 12]} +(4 rows) + +select (test_json_dot.test_json).d[1], json_query(test_json, 'lax $.d[1]' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; + d | expected +-----------------+----------------- + | + | + [12] | [12] + {"y": [21, 22]} | {"y": [21, 22]} +(4 rows) + +-- wildcard member access +select (test_json_dot.test_json).b.* from test_json_dot; +ERROR: type jsonb is not composite +-- wildcard array access +select (test_json_dot.test_json).d[*] from test_json_dot; +ERROR: syntax error at or near "*" +LINE 1: select (test_json_dot.test_json).d[*] from test_json_dot; + ^ +-- item method +select (test_json_dot.test_json).d.size() from test_json_dot; +ERROR: syntax error at or near "(" +LINE 1: select (test_json_dot.test_json).d.size() from test_json_dot... + ^ diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index 7d163a156e..7a732a7b0f 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -5715,3 +5715,71 @@ select '12345.0000000000000000000000000000000000000000000005'::jsonb::int8; 12345 (1 row) +-- simple dot notation +drop table if exists test_jsonb_dot; +NOTICE: table "test_jsonb_dot" does not exist, skipping +create table test_jsonb_dot(id int, test_jsonb jsonb); +insert into test_jsonb_dot select 1, '{"a": 1, "b": 42}'::json; +insert into test_jsonb_dot select 1, '{"a": 2, "b": {"c": 42}}'::json; +insert into test_jsonb_dot select 1, '{"a": 3, "b": {"c": "42"}, "d":[11, 12]}'::json; +-- member object access +select (test_jsonb_dot.test_jsonb).b from test_jsonb_dot; + b +------------- + [42] + {"c": 42} + {"c": "42"} +(3 rows) + +select (test_jsonb_dot.test_jsonb).b.c from test_jsonb_dot; + c +-------- + + [42] + ["42"] +(3 rows) + +select (test_json_dot.test_json).d from test_json_dot; + d +------------------------------------ + + + [11, 12] + [{"x": [11, 12]}, {"y": [21, 22]}] +(4 rows) + +select (test_json_dot.test_json)."d" from test_json_dot; + d +------------------------------------ + + + [11, 12] + [{"x": [11, 12]}, {"y": [21, 22]}] +(4 rows) + +select (test_json_dot.test_json).'d' from test_json_dot; +ERROR: syntax error at or near "'d'" +LINE 1: select (test_json_dot.test_json).'d' from test_json_dot; + ^ +-- array element access +select (test_jsonb_dot.test_jsonb).d[0] from test_jsonb_dot; + d +------ + + + [11] +(3 rows) + +-- wildcard member access +select (test_jsonb_dot.test_jsonb).b.* from test_jsonb_dot; +ERROR: type jsonb is not composite +-- wildcard array access +select (test_jsonb_dot.test_jsonb).d[*] from test_jsonb_dot; +ERROR: syntax error at or near "*" +LINE 1: select (test_jsonb_dot.test_jsonb).d[*] from test_jsonb_dot; + ^ +-- item method +select (test_jsonb_dot.test_jsonb).d.size() from test_jsonb_dot; +ERROR: syntax error at or near "(" +LINE 1: select (test_jsonb_dot.test_jsonb).d.size() from test_jsonb_... + ^ diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql index 5c886cd6b3..15154e51dc 100644 --- a/src/test/regress/sql/json.sql +++ b/src/test/regress/sql/json.sql @@ -867,3 +867,32 @@ select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": select ts_headline('null'::json, tsquery('aaa & bbb')); select ts_headline('{}'::json, tsquery('aaa & bbb')); select ts_headline('[]'::json, tsquery('aaa & bbb')); + +-- simple dot notation +drop table if exists test_json_dot; +create table test_json_dot(id int, test_json json); +insert into test_json_dot select 1, '{"a": 1, "b": 42}'::json; +insert into test_json_dot select 1, '{"a": 2, "b": {"c": 42}}'::json; +insert into test_json_dot select 1, '{"a": 3, "b": {"c": "42"}, "d":[11, 12]}'::json; +insert into test_json_dot select 1, '{"a": 3, "b": {"c": "42"}, "d":[{"x": [11, 12]}, {"y": [21, 22]}]}'::json; + +-- member object access +select (test_json_dot.test_json).b, json_query(test_json, 'lax $.b' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; +select (test_json_dot.test_json).b.c, json_query(test_json, 'lax $.b.c' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; +select (test_json_dot.test_json).d, json_query(test_json, 'lax $.d' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; +select (test_json_dot.test_json)."d", json_query(test_json, 'lax $.d' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; +select (test_json_dot.test_json).'d' from test_json_dot; + +-- array element access +select (test_json_dot.test_json).d->0 from test_json_dot; +select (test_json_dot.test_json).d[0], json_query(test_json, 'lax $.d[0]' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; +select (test_json_dot.test_json).d[1], json_query(test_json, 'lax $.d[1]' WITH CONDITIONAL WRAPPER NULL ON EMPTY NULL ON ERROR) as expected from test_json_dot; + +-- wildcard member access +select (test_json_dot.test_json).b.* from test_json_dot; + +-- wildcard array access +select (test_json_dot.test_json).d[*] from test_json_dot; + +-- item method +select (test_json_dot.test_json).d.size() from test_json_dot; diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index 5f0190d5a2..85153f8155 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -1559,3 +1559,29 @@ select '12345.0000000000000000000000000000000000000000000005'::jsonb::float8; select '12345.0000000000000000000000000000000000000000000005'::jsonb::int2; select '12345.0000000000000000000000000000000000000000000005'::jsonb::int4; select '12345.0000000000000000000000000000000000000000000005'::jsonb::int8; + +-- simple dot notation +drop table if exists test_jsonb_dot; +create table test_jsonb_dot(id int, test_jsonb jsonb); +insert into test_jsonb_dot select 1, '{"a": 1, "b": 42}'::json; +insert into test_jsonb_dot select 1, '{"a": 2, "b": {"c": 42}}'::json; +insert into test_jsonb_dot select 1, '{"a": 3, "b": {"c": "42"}, "d":[11, 12]}'::json; + +-- member object access +select (test_jsonb_dot.test_jsonb).b from test_jsonb_dot; +select (test_jsonb_dot.test_jsonb).b.c from test_jsonb_dot; +select (test_json_dot.test_json).d from test_json_dot; +select (test_json_dot.test_json)."d" from test_json_dot; +select (test_json_dot.test_json).'d' from test_json_dot; + +-- array element access +select (test_jsonb_dot.test_jsonb).d[0] from test_jsonb_dot; + +-- wildcard member access +select (test_jsonb_dot.test_jsonb).b.* from test_jsonb_dot; + +-- wildcard array access +select (test_jsonb_dot.test_jsonb).d[*] from test_jsonb_dot; + +-- item method +select (test_jsonb_dot.test_jsonb).d.size() from test_jsonb_dot; -- 2.39.3 (Apple Git-146)