From 1107e739c01a72a45ebfb79cb0c09aa9fce4df83 Mon Sep 17 00:00:00 2001 From: Florents Tselai Date: Sun, 12 Apr 2026 22:15:27 +0300 Subject: [PATCH v1 2/3] Add $.split(delimiter [, null_string]) jsonpath method --- doc/src/sgml/func/func-json.sgml | 20 ++++ src/backend/utils/adt/jsonpath.c | 52 ++++++++++- src/backend/utils/adt/jsonpath_exec.c | 93 ++++++++++++++++++- src/backend/utils/adt/jsonpath_gram.y | 7 +- src/backend/utils/adt/jsonpath_scan.l | 1 + src/include/utils/jsonpath.h | 1 + src/test/regress/expected/jsonb_jsonpath.out | 20 ++++ src/test/regress/expected/jsonpath.out | 28 ++++++ .../regress/expected/sqljson_queryfuncs.out | 1 + src/test/regress/sql/jsonb_jsonpath.sql | 6 ++ src/test/regress/sql/jsonpath.sql | 6 ++ src/test/regress/sql/sqljson_queryfuncs.sql | 1 + 12 files changed, 233 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/func/func-json.sgml b/doc/src/sgml/func/func-json.sgml index ad1210fe965..7aad2d69a88 100644 --- a/doc/src/sgml/func/func-json.sgml +++ b/doc/src/sgml/func/func-json.sgml @@ -2879,6 +2879,26 @@ ERROR: jsonpath member accessor can only be applied to an object + + + string . split(delimiter , null_string) + array + + + Splits the string at occurrences of delimiter and forms + the resulting fields into a JSON array. If null_string + is supplied, fields matching that string are replaced by JSON null values. + + + jsonb_path_query('"a,b,c"', '$.split(",")') + ["a", "b", "c"] + + + jsonb_path_query('"xx~~yy~~zz"', '$.split("~~", "yy")') + ["xx", null, "zz"] + + + string . ltrim([ characters ]) diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c index 6416d3c376e..d14fe450b6b 100644 --- a/src/backend/utils/adt/jsonpath.c +++ b/src/backend/utils/adt/jsonpath.c @@ -329,6 +329,37 @@ flattenJsonPathParseItem(StringInfo buf, int *result, struct Node *escontext, *(int32 *) (buf->data + right) = chld - pos; } break; + case jpiStrSplit: + { + /* Reserve space for left and right arg positions */ + int32 left = reserveSpaceForItemPointer(buf); + int32 right = reserveSpaceForItemPointer(buf); + + /* Flatten the required left argument (the delimiter) */ + if (!flattenJsonPathParseItem(buf, &chld, escontext, + item->value.args.left, + nestingLevel, + insideArraySubscript)) + return false; + *(int32 *) (buf->data + left) = chld - pos; + + /* Flatten the optional right argument only if provided */ + if (item->value.args.right != NULL) + { + if (!flattenJsonPathParseItem(buf, &chld, escontext, + item->value.args.right, + nestingLevel, + insideArraySubscript)) + return false; + *(int32 *) (buf->data + right) = chld - pos; + } + else + { + /* Default to 0 if missing */ + *(int32 *) (buf->data + right) = 0; + } + } + break; case jpiLikeRegex: { int32 offs; @@ -864,6 +895,18 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey, printJsonPathItem(buf, &elem, false, false); appendStringInfoChar(buf, ')'); break; + case jpiStrSplit: + appendStringInfoString(buf, ".split("); + jspGetLeftArg(v, &elem); + printJsonPathItem(buf, &elem, false, false); + if (v->content.args.right != 0) + { + appendStringInfoChar(buf, ','); + jspGetRightArg(v, &elem); + printJsonPathItem(buf, &elem, false, false); + } + appendStringInfoChar(buf, ')'); + break; case jpiStrLtrim: appendStringInfoString(buf, ".ltrim("); if (v->content.arg) @@ -1004,6 +1047,8 @@ jspOperationName(JsonPathItemType type) return "split_part"; case jpiStrTranslate: return "translate"; + case jpiStrSplit: + return "split"; default: elog(ERROR, "unrecognized jsonpath item type: %d", type); return NULL; @@ -1136,6 +1181,7 @@ jspInitByBuffer(JsonPathItem *v, char *base, int32 pos) case jpiDecimal: case jpiStrReplace: case jpiStrTranslate: + case jpiStrSplit: case jpiStrSplitPart: read_int32(v->content.args.left, base, pos); read_int32(v->content.args.right, base, pos); @@ -1263,6 +1309,7 @@ jspGetNext(JsonPathItem *v, JsonPathItem *a) v->type == jpiStrBtrim || v->type == jpiStrInitcap || v->type == jpiStrSplitPart || + v->type == jpiStrSplit || v->type == jpiStrTranslate); if (a) @@ -1293,6 +1340,7 @@ jspGetLeftArg(JsonPathItem *v, JsonPathItem *a) v->type == jpiDecimal || v->type == jpiStrReplace || v->type == jpiStrTranslate || + v->type == jpiStrSplit || v->type == jpiStrSplitPart); jspInitByBuffer(a, v->base, v->content.args.left); @@ -1318,7 +1366,8 @@ jspGetRightArg(JsonPathItem *v, JsonPathItem *a) v->type == jpiDecimal || v->type == jpiStrReplace || v->type == jpiStrTranslate || - v->type == jpiStrSplitPart); + v->type == jpiStrSplitPart || + v->type == jpiStrSplit); jspInitByBuffer(a, v->base, v->content.args.right); } @@ -1626,6 +1675,7 @@ jspIsMutableWalker(JsonPathItem *jpi, struct JsonPathMutableContext *cxt) case jpiStrBtrim: case jpiStrInitcap: case jpiStrSplitPart: + case jpiStrSplit: case jpiStrTranslate: status = jpdsNonDateTime; break; diff --git a/src/backend/utils/adt/jsonpath_exec.c b/src/backend/utils/adt/jsonpath_exec.c index 687415a04b7..93f31b40b0b 100644 --- a/src/backend/utils/adt/jsonpath_exec.c +++ b/src/backend/utils/adt/jsonpath_exec.c @@ -1690,6 +1690,7 @@ executeItemOptUnwrapTarget(JsonPathExecContext *cxt, JsonPathItem *jsp, case jpiStrBtrim: case jpiStrInitcap: case jpiStrSplitPart: + case jpiStrSplit: case jpiStrTranslate: { if (unwrap && JsonbType(jb) == jbvArray) @@ -2923,7 +2924,8 @@ executeStringInternalMethod(JsonPathExecContext *cxt, JsonPathItem *jsp, jsp->type == jpiStrBtrim || jsp->type == jpiStrInitcap || jsp->type == jpiStrTranslate || - jsp->type == jpiStrSplitPart); + jsp->type == jpiStrSplitPart || + jsp->type == jpiStrSplit); if (!(jb = getScalar(jb, jbvString))) RETURN_ERROR(ereport(ERROR, @@ -3050,6 +3052,95 @@ executeStringInternalMethod(JsonPathExecContext *cxt, JsonPathItem *jsp, DirectFunctionCall1(numeric_int4, NumericGetDatum(n)))); break; } + case jpiStrSplit: + { + char *delim_str; + char *null_str = NULL; + Datum arr_datum; + ArrayType *arr; + Datum *elems; + bool *nulls; + int nelems; + JsonbInState state = {0}; + + /* Extract the delimiter */ + jspGetLeftArg(jsp, &elem); + if (elem.type != jpiString) + elog(ERROR, "invalid jsonpath item type for .split() delimiter"); + + delim_str = jspGetString(&elem, NULL); + + /* Extract the optional null_string */ + if (jsp->content.args.right != 0) + { + jspGetRightArg(jsp, &elem); + if (elem.type != jpiString) + elog(ERROR, "invalid jsonpath item type for .split() null_string"); + + null_str = jspGetString(&elem, NULL); + } + + /* forward the execution to internal text_to_array functions */ + if (null_str) + { + arr_datum = DirectFunctionCall3Coll(text_to_array_null, + DEFAULT_COLLATION_OID, + str, + CStringGetTextDatum(delim_str), + CStringGetTextDatum(null_str)); + } + else + { + arr_datum = DirectFunctionCall2Coll(text_to_array, + DEFAULT_COLLATION_OID, + str, + CStringGetTextDatum(delim_str)); + } + + arr = DatumGetArrayTypeP(arr_datum); + deconstruct_array_builtin(arr, TEXTOID, &elems, &nulls, &nelems); + + pushJsonbValue(&state, WJB_BEGIN_ARRAY, NULL); + + for (int i = 0; i < nelems; i++) + { + JsonbValue v; + + if (nulls[i]) + { + v.type = jbvNull; + } + else + { + char *val = TextDatumGetCString(elems[i]); + + v.type = jbvString; + v.val.string.val = val; + v.val.string.len = strlen(val); + } + pushJsonbValue(&state, WJB_ELEM, &v); + } + + pushJsonbValue(&state, WJB_END_ARRAY, NULL); + + { + Jsonb *jb_result = JsonbValueToJsonb(state.result); + JsonbValue binary_jbv; + + binary_jbv.type = jbvBinary; + binary_jbv.val.binary.data = &jb_result->root; + binary_jbv.val.binary.len = VARSIZE(jb_result); + + res = jperOk; + hasNext = jspGetNext(jsp, &elem); + + if (!hasNext && !found) + return res; + + return executeNextItem(cxt, jsp, &elem, &binary_jbv, found); + } + } + break; default: elog(ERROR, "unsupported jsonpath item type: %d", jsp->type); } diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y index 2b55350df23..fe30e8a6147 100644 --- a/src/backend/utils/adt/jsonpath_gram.y +++ b/src/backend/utils/adt/jsonpath_gram.y @@ -87,7 +87,7 @@ static bool makeItemLikeRegex(JsonPathParseItem *expr, %token BIGINT_P BOOLEAN_P DATE_P DECIMAL_P INTEGER_P NUMBER_P %token STRINGFUNC_P TIME_P TIME_TZ_P TIMESTAMP_P TIMESTAMP_TZ_P %token STR_REPLACE_P STR_LOWER_P STR_UPPER_P STR_LTRIM_P STR_RTRIM_P STR_BTRIM_P - STR_INITCAP_P STR_SPLIT_PART_P STR_TRANSLATE_P + STR_INITCAP_P STR_SPLIT_P STR_SPLIT_PART_P STR_TRANSLATE_P %type result @@ -286,6 +286,10 @@ accessor_op: { $$ = makeItemBinary(jpiStrTranslate, linitial($4), lsecond($4)); } | '.' STR_SPLIT_PART_P '(' str_int_args ')' { $$ = makeItemBinary(jpiStrSplitPart, linitial($4), lsecond($4)); } + | '.' STR_SPLIT_P '(' str_elem ')' + { $$ = makeItemBinary(jpiStrSplit, $4, NULL); } + | '.' STR_SPLIT_P '(' str_str_args ')' + { $$ = makeItemBinary(jpiStrSplit, linitial($4), lsecond($4)); } | '.' STR_LTRIM_P '(' opt_str_arg ')' { $$ = makeItemUnary(jpiStrLtrim, $4); } | '.' STR_RTRIM_P '(' opt_str_arg ')' @@ -385,6 +389,7 @@ key_name: | STR_REPLACE_P | STR_TRANSLATE_P | STR_SPLIT_PART_P + | STR_SPLIT_P | STR_LTRIM_P | STR_RTRIM_P | STR_BTRIM_P diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l index f94074fe342..b06d0abb64a 100644 --- a/src/backend/utils/adt/jsonpath_scan.l +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -419,6 +419,7 @@ static const JsonPathKeyword keywords[] = { {5, false, STR_LOWER_P, "lower"}, {5, false, STR_LTRIM_P, "ltrim"}, {5, false, STR_RTRIM_P, "rtrim"}, + {5, false, STR_SPLIT_P, "split"}, {5, false, STR_UPPER_P, "upper"}, {6, false, BIGINT_P, "bigint"}, {6, false, DOUBLE_P, "double"}, diff --git a/src/include/utils/jsonpath.h b/src/include/utils/jsonpath.h index c2c95a9b8a5..c1c7812a36a 100644 --- a/src/include/utils/jsonpath.h +++ b/src/include/utils/jsonpath.h @@ -123,6 +123,7 @@ typedef enum JsonPathItemType jpiStrBtrim, /* .btrim() item method */ jpiStrInitcap, /* .initcap() item method */ jpiStrSplitPart, /* .split_part() item method */ + jpiStrSplit, /* .split() item method */ jpiStrTranslate, /* .translate() item method */ } JsonPathItemType; diff --git a/src/test/regress/expected/jsonb_jsonpath.out b/src/test/regress/expected/jsonb_jsonpath.out index b04b01f716e..8fc530201a6 100644 --- a/src/test/regress/expected/jsonb_jsonpath.out +++ b/src/test/regress/expected/jsonb_jsonpath.out @@ -3115,6 +3115,26 @@ select jsonb_path_query('"abc,def,ghi,jkl"', '$.split_part(",", -2)'); "ghi" (1 row) +-- Test .split() +select jsonb_path_query('"a,b,c"', '$.split(",")'); + jsonb_path_query +------------------ + ["a", "b", "c"] +(1 row) + +select jsonb_path_query('"a,,c"', '$.split(",", "")'); + jsonb_path_query +------------------ + ["a", null, "c"] +(1 row) + +-- proving the output is a real, indexable JSON array +select jsonb_path_query('"a,b,c"', '$.split(",")[1]'); + jsonb_path_query +------------------ + "b" +(1 row) + -- Test string methods play nicely together select jsonb_path_query('"hello world"', '$.replace("hello","bye").upper()'); jsonb_path_query diff --git a/src/test/regress/expected/jsonpath.out b/src/test/regress/expected/jsonpath.out index 86a10ff3eab..a9ef9cefced 100644 --- a/src/test/regress/expected/jsonpath.out +++ b/src/test/regress/expected/jsonpath.out @@ -513,6 +513,12 @@ select '$.split_part("~@~", 2)'::jsonpath; $.split_part("~@~",2) (1 row) +select '$.split(",")'::jsonpath; + jsonpath +-------------- + $.split(",") +(1 row) + -- Parse errors select '$.replace("hello")'::jsonpath; ERROR: syntax error at or near ")" of jsonpath input @@ -542,6 +548,22 @@ select '$.split_part("~@~", 2, "extra")'::jsonpath; ERROR: syntax error at or near "," of jsonpath input LINE 1: select '$.split_part("~@~", 2, "extra")'::jsonpath; ^ +select '$.split()'::jsonpath; +ERROR: syntax error at or near ")" of jsonpath input +LINE 1: select '$.split()'::jsonpath; + ^ +select '$.split(",", "null", "extra")'::jsonpath; +ERROR: syntax error at or near "," of jsonpath input +LINE 1: select '$.split(",", "null", "extra")'::jsonpath; + ^ +select '$.split(1)'::jsonpath; +ERROR: syntax error at or near "1" of jsonpath input +LINE 1: select '$.split(1)'::jsonpath; + ^ +select '$.split(",", 1)'::jsonpath; +ERROR: syntax error at or near "1" of jsonpath input +LINE 1: select '$.split(",", 1)'::jsonpath; + ^ select '$.lower("hi")'::jsonpath; ERROR: syntax error at or near """ of jsonpath input LINE 1: select '$.lower("hi")'::jsonpath; @@ -609,6 +631,12 @@ select '$.split_part'::jsonpath; $."split_part" (1 row) +select '$.split'::jsonpath; + jsonpath +----------- + $."split" +(1 row) + select '$.ltrim'::jsonpath; jsonpath ----------- diff --git a/src/test/regress/expected/sqljson_queryfuncs.out b/src/test/regress/expected/sqljson_queryfuncs.out index 7940faa58ba..c3fc890a9c4 100644 --- a/src/test/regress/expected/sqljson_queryfuncs.out +++ b/src/test/regress/expected/sqljson_queryfuncs.out @@ -1281,6 +1281,7 @@ CREATE INDEX ON test_jsonb_mutability (JSON_QUERY(js, '$.initcap()')); CREATE INDEX ON test_jsonb_mutability (JSON_QUERY(js, '$.replace("hello", "bye")')); CREATE INDEX ON test_jsonb_mutability (JSON_QUERY(js, '$.translate("hello", "bye")')); CREATE INDEX ON test_jsonb_mutability (JSON_QUERY(js, '$.split_part(",", 2)')); +CREATE INDEX ON test_jsonb_mutability (JSON_QUERY(js, '$.split(",")')); -- DEFAULT expression CREATE OR REPLACE FUNCTION ret_setint() RETURNS SETOF integer AS $$ diff --git a/src/test/regress/sql/jsonb_jsonpath.sql b/src/test/regress/sql/jsonb_jsonpath.sql index 5938b819284..784fbc577f2 100644 --- a/src/test/regress/sql/jsonb_jsonpath.sql +++ b/src/test/regress/sql/jsonb_jsonpath.sql @@ -733,6 +733,12 @@ select jsonb_path_query('"hello world"', '$.translate("hello","bye") starts with select jsonb_path_query('"abc~@~def~@~ghi"', '$.split_part("~@~", 2)'); select jsonb_path_query('"abc,def,ghi,jkl"', '$.split_part(",", -2)'); +-- Test .split() +select jsonb_path_query('"a,b,c"', '$.split(",")'); +select jsonb_path_query('"a,,c"', '$.split(",", "")'); +-- proving the output is a real, indexable JSON array +select jsonb_path_query('"a,b,c"', '$.split(",")[1]'); + -- Test string methods play nicely together select jsonb_path_query('"hello world"', '$.replace("hello","bye").upper()'); select jsonb_path_query('"hElLo WorlD"', '$.lower().upper().lower().replace("hello","bye")'); diff --git a/src/test/regress/sql/jsonpath.sql b/src/test/regress/sql/jsonpath.sql index bce435a6301..ef27c8e7bb6 100644 --- a/src/test/regress/sql/jsonpath.sql +++ b/src/test/regress/sql/jsonpath.sql @@ -91,6 +91,7 @@ select '$.btrim()'::jsonpath; select '$.btrim("xyz")'::jsonpath; select '$.initcap()'::jsonpath; select '$.split_part("~@~", 2)'::jsonpath; +select '$.split(",")'::jsonpath; -- Parse errors select '$.replace("hello")'::jsonpath; @@ -100,6 +101,10 @@ select '$.split_part("~@~")'::jsonpath; select '$.split_part()'::jsonpath; select '$.split_part("~@~", "hi")'::jsonpath; select '$.split_part("~@~", 2, "extra")'::jsonpath; +select '$.split()'::jsonpath; +select '$.split(",", "null", "extra")'::jsonpath; +select '$.split(1)'::jsonpath; +select '$.split(",", 1)'::jsonpath; select '$.lower("hi")'::jsonpath; select '$.upper("hi")'::jsonpath; select '$.initcap("hi")'::jsonpath; @@ -116,6 +121,7 @@ select '$.upper'::jsonpath; select '$.initcap'::jsonpath; select '$.replace'::jsonpath; select '$.split_part'::jsonpath; +select '$.split'::jsonpath; select '$.ltrim'::jsonpath; select '$.rtrim'::jsonpath; select '$.btrim'::jsonpath; diff --git a/src/test/regress/sql/sqljson_queryfuncs.sql b/src/test/regress/sql/sqljson_queryfuncs.sql index 7521b37dae5..83ddfc8bc83 100644 --- a/src/test/regress/sql/sqljson_queryfuncs.sql +++ b/src/test/regress/sql/sqljson_queryfuncs.sql @@ -411,6 +411,7 @@ CREATE INDEX ON test_jsonb_mutability (JSON_QUERY(js, '$.initcap()')); CREATE INDEX ON test_jsonb_mutability (JSON_QUERY(js, '$.replace("hello", "bye")')); CREATE INDEX ON test_jsonb_mutability (JSON_QUERY(js, '$.translate("hello", "bye")')); CREATE INDEX ON test_jsonb_mutability (JSON_QUERY(js, '$.split_part(",", 2)')); +CREATE INDEX ON test_jsonb_mutability (JSON_QUERY(js, '$.split(",")')); -- DEFAULT expression CREATE OR REPLACE FUNCTION ret_setint() RETURNS SETOF integer AS -- 2.53.0