json(b)_to_tsvector with numeric values
Hi,
We've just noticed, that current implementation of `json(b)_to_tsvector` can be
confusing sometimes, if the target document contains numeric values.
In this case
we just drop them, and only string values will contribute to the result:
select to_tsvector('english', '{"a": "The Fat Rats", "b": 123}'::jsonb);
to_tsvector
-----------------
'fat':2 'rat':3
(1 row)
The result would be less surprising if all values, that can be converted to
string representation (so, strings and numeric values, nothing to do for null &
boolean), will take part in it:
select to_tsvector('english', '{"a": "The Fat Rats", "b": 123}'::jsonb);
to_tsvector
-------------------------
'123':5 'fat':2 'rat':3
(1 row)
Attached patch contains small fix that's necessary to get the described
behavior. This patch doesn't touch `ts_headline` though, because following the
same approach it would require changing the type of element in the resulting
json(b).
Any opinions about this suggestion? Can it be considered as a bug fix and
included into this release?
Attachments:
jsonb_to_tsvector_numeric_v1.patchapplication/octet-stream; name=jsonb_to_tsvector_numeric_v1.patchDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5abb1c46fb..73524a756d 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9688,7 +9688,7 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
</entry>
<entry><type>tsvector</type></entry>
<entry>
- reduce each string value in the document to a <type>tsvector</type>, and then
+ reduce each string or numeric value in the document to a <type>tsvector</type>, and then
concatenate those in document order to produce a single <type>tsvector</type>
</entry>
<entry><literal>to_tsvector('english', '{"a": "The Fat Rats"}'::json)</literal></entry>
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index fa78451613..99c474ef2e 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -4939,8 +4939,8 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
}
/*
- * Iterate over jsonb string values or elements, and pass them together with an
- * iteration state to a specified JsonIterateStringValuesAction.
+ * Iterate over jsonb string/numeric values or elements, and pass them together
+ * with an iteration state to a specified JsonIterateStringValuesAction.
*/
void
iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action)
@@ -4953,15 +4953,23 @@ iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesActio
while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
{
- if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+ if (type == WJB_VALUE || type == WJB_ELEM)
{
- action(state, v.val.string.val, v.val.string.len);
+ if (v.type == jbvString)
+ action(state, v.val.string.val, v.val.string.len);
+
+ if (v.type == jbvNumeric)
+ {
+ char *val = DatumGetCString(DirectFunctionCall1(numeric_out,
+ PointerGetDatum(v.val.numeric)));
+ action(state, val, strlen(val));
+ }
}
}
}
/*
- * Iterate over json string values or elements, and pass them together with an
+ * Iterate over json string/numeric values or elements, and pass them together with an
* iteration state to a specified JsonIterateStringValuesAction.
*/
void
@@ -4990,7 +4998,7 @@ iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
{
IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
- if (tokentype == JSON_TOKEN_STRING)
+ if (tokentype == JSON_TOKEN_STRING || tokentype == JSON_TOKEN_NUMBER)
_state->action(_state->action_state, token, strlen(token));
}
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index 06c728e363..5948cbe82b 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -2324,6 +2324,13 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+ to_tsvector
+-------------------------------------------------
+ '123':7 '456':9 'aaa':1 'bbb':3 'ccc':5 'ddd':4
+(1 row)
+
-- ts_vector corner cases
select to_tsvector('""'::json);
to_tsvector
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index f8d6e6f7cc..ce262421c5 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -4122,6 +4122,13 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+ to_tsvector
+-------------------------------------------------
+ '123':7 '456':9 'aaa':1 'bbb':3 'ccc':5 'ddd':4
+(1 row)
+
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
to_tsvector
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 256652c41f..d0d3a0dd1c 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -763,6 +763,9 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c"
-- json to tsvector with stop words
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+
-- ts_vector corner cases
select to_tsvector('""'::json);
select to_tsvector('{}'::json);
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index 2439f949bd..9f09eff8cb 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -1089,6 +1089,9 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c"
-- jsonb to tsvector with stop words
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
select to_tsvector('{}'::jsonb);
Hello Dmitry,
Dmitry Dolgov <9erthalion6@gmail.com> wrote:
Any opinions about this suggestion? Can it be considered as a bug fix and
included into this release?
I think there is no chance to include it into v11. You can add the patch to
the 2018-09 commitfest.
--
Arthur Zakirov
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
On Mon, Apr 2, 2018 at 9:45 AM, Arthur Zakirov <a.zakirov@postgrespro.ru> wrote:
Hello Dmitry,
Dmitry Dolgov <9erthalion6@gmail.com> wrote:
Any opinions about this suggestion? Can it be considered as a bug fix and
included into this release?I think there is no chance to include it into v11. You can add the patch to
the 2018-09 commitfest.
I found this bug, when working on presentation about FTS and it looked
annoying, since it validates
the consistency of FTS.I think this is a bug, which needs to be fixed,
else inconsistency with existing full text search will be gets
deeper.
The fix looks trivial, but needs a review, of course.
Show quoted text
--
Arthur Zakirov
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
On Mon, Apr 02, 2018 at 11:41:12AM +0300, Oleg Bartunov wrote:
On Mon, Apr 2, 2018 at 9:45 AM, Arthur Zakirov <a.zakirov@postgrespro.ru> wrote:
I found this bug, when working on presentation about FTS and it looked
annoying, since it validates
the consistency of FTS.I think this is a bug, which needs to be fixed,
else inconsistency with existing full text search will be gets
deeper.The fix looks trivial, but needs a review, of course.
Oh, I understood. The code looks good, tests passed. But maybe it is
better to use NumericGetDatum() instead of PointerGetDatum()?
--
Arthur Zakirov
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
On 2 April 2018 at 11:27, Arthur Zakirov <a.zakirov@postgrespro.ru> wrote:
On Mon, Apr 02, 2018 at 11:41:12AM +0300, Oleg Bartunov wrote:On Mon, Apr 2, 2018 at 9:45 AM, Arthur Zakirov <a.zakirov@postgrespro.ru> wrote:
I found this bug, when working on presentation about FTS and it looked
annoying, since it validates
the consistency of FTS.I think this is a bug, which needs to be fixed,
else inconsistency with existing full text search will be gets
deeper.The fix looks trivial, but needs a review, of course.
Oh, I understood. The code looks good, tests passed. But maybe it is
better to use NumericGetDatum() instead of PointerGetDatum()?
Well, technically speaking they're the same, but yes, NumericGetDatum would be
more precise. I've modified it in the attached patch.
Attachments:
jsonb_to_tsvector_numeric_v2.patchapplication/octet-stream; name=jsonb_to_tsvector_numeric_v2.patchDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5abb1c46fb..73524a756d 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9688,7 +9688,7 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
</entry>
<entry><type>tsvector</type></entry>
<entry>
- reduce each string value in the document to a <type>tsvector</type>, and then
+ reduce each string or numeric value in the document to a <type>tsvector</type>, and then
concatenate those in document order to produce a single <type>tsvector</type>
</entry>
<entry><literal>to_tsvector('english', '{"a": "The Fat Rats"}'::json)</literal></entry>
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index fa78451613..cab0011d62 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -4939,8 +4939,8 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
}
/*
- * Iterate over jsonb string values or elements, and pass them together with an
- * iteration state to a specified JsonIterateStringValuesAction.
+ * Iterate over jsonb string/numeric values or elements, and pass them together
+ * with an iteration state to a specified JsonIterateStringValuesAction.
*/
void
iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action)
@@ -4953,15 +4953,23 @@ iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesActio
while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
{
- if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+ if (type == WJB_VALUE || type == WJB_ELEM)
{
- action(state, v.val.string.val, v.val.string.len);
+ if (v.type == jbvString)
+ action(state, v.val.string.val, v.val.string.len);
+
+ if (v.type == jbvNumeric)
+ {
+ char *val = DatumGetCString(DirectFunctionCall1(numeric_out,
+ NumericGetDatum(v.val.numeric)));
+ action(state, val, strlen(val));
+ }
}
}
}
/*
- * Iterate over json string values or elements, and pass them together with an
+ * Iterate over json string/numeric values or elements, and pass them together with an
* iteration state to a specified JsonIterateStringValuesAction.
*/
void
@@ -4990,7 +4998,7 @@ iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
{
IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
- if (tokentype == JSON_TOKEN_STRING)
+ if (tokentype == JSON_TOKEN_STRING || tokentype == JSON_TOKEN_NUMBER)
_state->action(_state->action_state, token, strlen(token));
}
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index 06c728e363..5948cbe82b 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -2324,6 +2324,13 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+ to_tsvector
+-------------------------------------------------
+ '123':7 '456':9 'aaa':1 'bbb':3 'ccc':5 'ddd':4
+(1 row)
+
-- ts_vector corner cases
select to_tsvector('""'::json);
to_tsvector
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index f8d6e6f7cc..ce262421c5 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -4122,6 +4122,13 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+ to_tsvector
+-------------------------------------------------
+ '123':7 '456':9 'aaa':1 'bbb':3 'ccc':5 'ddd':4
+(1 row)
+
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
to_tsvector
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 256652c41f..d0d3a0dd1c 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -763,6 +763,9 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c"
-- json to tsvector with stop words
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+
-- ts_vector corner cases
select to_tsvector('""'::json);
select to_tsvector('{}'::json);
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index 2439f949bd..9f09eff8cb 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -1089,6 +1089,9 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c"
-- jsonb to tsvector with stop words
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
select to_tsvector('{}'::jsonb);
the consistency of FTS.I think this is a bug, which needs to be fixed,
else inconsistency with existing full text search will be gets
deeper.
Hm, seems, it's useful feature, but I suggest to make separate function
jsonb_any_to_tsvector and add support for boolean too (if you know better name
for function, do not hide it). Changing behavior of existing function is not
obvious for users and, seems, should not backpatched.
--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/
On 4 April 2018 at 11:52, Teodor Sigaev <teodor@sigaev.ru> wrote:
the consistency of FTS.I think this is a bug, which needs to be fixed,
else inconsistency with existing full text search will be gets
deeper.Hm, seems, it's useful feature, but I suggest to make separate function
jsonb_any_to_tsvector and add support for boolean too (if you know better
name for function, do not hide it). Changing behavior of existing function
is not obvious for users and, seems, should not backpatched.
What do you think about having not a separate function, but a flag argument to
the existing one (like `create` in `jsonb_set`), that will have false as
default value? The result would be the same, but without an extra function with
almost the same implementation.
Hm, seems, it's useful feature, but I suggest to make separate function
jsonb_any_to_tsvector and add support for boolean too (if you know better
name for function, do not hide it). Changing behavior of existing function
is not obvious for users and, seems, should not backpatched.What do you think about having not a separate function, but a flag argument to
the existing one (like `create` in `jsonb_set`), that will have false as
default value? The result would be the same, but without an extra function with
almost the same implementation.
tsvector jsonb_to_tsvector(jsonb[, bool]) ?
Agreed. Second arg should be optional.
--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/
On 4 April 2018 at 16:09, Teodor Sigaev <teodor@sigaev.ru> wrote:
Hm, seems, it's useful feature, but I suggest to make separate function
jsonb_any_to_tsvector and add support for boolean too (if you know better
name for function, do not hide it). Changing behavior of existing
function
is not obvious for users and, seems, should not backpatched.What do you think about having not a separate function, but a flag
argument to
the existing one (like `create` in `jsonb_set`), that will have false as
default value? The result would be the same, but without an extra function
with
almost the same implementation.tsvector jsonb_to_tsvector(jsonb[, bool]) ?
Agreed. Second arg should be optional.
Unfortunately, this idea with a flag argument can't be implemented easily
(related discussion is here [1]). So I've modified the patch accordingly to
your original suggestion about having separate functions
`json(b)_all_to_tsvector`.
1: /messages/by-id/CA+q6zcVJ+Wx+-=kkN5UC0T-LtsJWnx0g9S0xSnn3jUWkriufDA@mail.gmail.com
Attachments:
jsonb_to_tsvector_numeric_v3.patchtext/x-patch; charset=US-ASCII; name=jsonb_to_tsvector_numeric_v3.patchDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5abb1c4..895b60a 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9696,6 +9696,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
</row>
<row>
<entry>
+ <literal><function>json(b)_all_to_tsvector(<optional> <replaceable class="parameter">config</replaceable> <type>regconfig</type> , </optional> <replaceable class="parameter">document</replaceable> <type>json(b)</type>)</function></literal>
+ </entry>
+ <entry><type>tsvector</type></entry>
+ <entry>
+ reduce each string, numeric or boolean value in the document to a <type>tsvector</type>,
+ and then concatenate those in document order to produce a single <type>tsvector</type>
+ </entry>
+ <entry><literal>json_all_to_tsvector('english', '{"a": "The Fat Rats", "b": 123}'::json)</literal></entry>
+ <entry><literal>'123':5 'fat':2 'rat':3</literal></entry>
+ </row>
+ <row>
+ <entry>
<indexterm>
<primary>ts_delete</primary>
</indexterm>
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index ea5947a..02c2b00 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -267,12 +267,12 @@ to_tsvector(PG_FUNCTION_ARGS)
PointerGetDatum(in)));
}
-Datum
-jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
+/*
+ * Worker function for jsonb(_all)_to_tsvector(_byid)
+ */
+static TSVector
+jsonb_to_tsvector_worker(Oid cfgId, Jsonb *jb, bool allTypes)
{
- Oid cfgId = PG_GETARG_OID(0);
- Jsonb *jb = PG_GETARG_JSONB_P(1);
- TSVector result;
TSVectorBuildState state;
ParsedText prs;
@@ -281,11 +281,24 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
state.prs = &prs;
state.cfgId = cfgId;
- iterate_jsonb_string_values(jb, &state, add_to_tsvector);
+ if (allTypes)
+ iterate_jsonb_all_values(jb, &state, add_to_tsvector);
+ else
+ iterate_jsonb_string_values(jb, &state, add_to_tsvector);
- PG_FREE_IF_COPY(jb, 1);
- result = make_tsvector(&prs);
+ return make_tsvector(&prs);
+}
+
+Datum
+jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ Jsonb *jb = PG_GETARG_JSONB_P(1);
+ TSVector result;
+
+ result = jsonb_to_tsvector_worker(cfgId, jb, false);
+ PG_FREE_IF_COPY(jb, 1);
PG_RETURN_TSVECTOR(result);
}
@@ -295,19 +308,48 @@ jsonb_to_tsvector(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB_P(0);
Oid cfgId;
+ TSVector result;
cfgId = getTSCurrentConfig(true);
- PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
- ObjectIdGetDatum(cfgId),
- JsonbPGetDatum(jb)));
+ result = jsonb_to_tsvector_worker(cfgId, jb, false);
+ PG_FREE_IF_COPY(jb, 1);
+
+ PG_RETURN_TSVECTOR(result);
}
Datum
-json_to_tsvector_byid(PG_FUNCTION_ARGS)
+jsonb_all_to_tsvector_byid(PG_FUNCTION_ARGS)
{
Oid cfgId = PG_GETARG_OID(0);
- text *json = PG_GETARG_TEXT_P(1);
+ Jsonb *jb = PG_GETARG_JSONB_P(1);
TSVector result;
+
+ result = jsonb_to_tsvector_worker(cfgId, jb, true);
+ PG_FREE_IF_COPY(jb, 1);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+jsonb_all_to_tsvector(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ Oid cfgId;
+ TSVector result;
+
+ cfgId = getTSCurrentConfig(true);
+ result = jsonb_to_tsvector_worker(cfgId, jb, true);
+ PG_FREE_IF_COPY(jb, 1);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+/*
+ * Worker function for json(_all)_to_tsvector(_byid)
+ */
+static TSVector
+json_to_tsvector_worker(Oid cfgId, text *json, bool allTypes)
+{
TSVectorBuildState state;
ParsedText prs;
@@ -316,11 +358,20 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS)
state.prs = &prs;
state.cfgId = cfgId;
- iterate_json_string_values(json, &state, add_to_tsvector);
+ iterate_json_values(json, allTypes, &state, add_to_tsvector);
- PG_FREE_IF_COPY(json, 1);
+ return make_tsvector(&prs);
+}
+
+Datum
+json_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ text *json = PG_GETARG_TEXT_P(1);
+ TSVector result;
- result = make_tsvector(&prs);
+ result = json_to_tsvector_worker(cfgId, json, false);
+ PG_FREE_IF_COPY(json, 1);
PG_RETURN_TSVECTOR(result);
}
@@ -330,11 +381,40 @@ json_to_tsvector(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_P(0);
Oid cfgId;
+ TSVector result;
cfgId = getTSCurrentConfig(true);
- PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
- ObjectIdGetDatum(cfgId),
- PointerGetDatum(json)));
+ result = json_to_tsvector_worker(cfgId, json, false);
+ PG_FREE_IF_COPY(json, 1);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+json_all_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ text *json = PG_GETARG_TEXT_P(1);
+ TSVector result;
+
+ result = json_to_tsvector_worker(cfgId, json, true);
+ PG_FREE_IF_COPY(json, 1);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+json_all_to_tsvector(PG_FUNCTION_ARGS)
+{
+ text *json = PG_GETARG_TEXT_P(0);
+ Oid cfgId;
+ TSVector result;
+
+ cfgId = getTSCurrentConfig(true);
+ result = json_to_tsvector_worker(cfgId, json, true);
+ PG_FREE_IF_COPY(json, 1);
+
+ PG_RETURN_TSVECTOR(result);
}
/*
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index fa78451..624185e 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -474,8 +474,9 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems,
int level, Jsonb *newval, uint32 nelems, int op_type);
static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
-/* function supporting iterate_json_string_values */
+/* function supporting iterate_json_values */
static void iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
+static void iterate_all_values_scalar(void *state, char *token, JsonTokenType tokentype);
/* functions supporting transform_json_string_values */
static void transform_string_values_object_start(void *state);
@@ -4939,8 +4940,8 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
}
/*
- * Iterate over jsonb string values or elements, and pass them together with an
- * iteration state to a specified JsonIterateStringValuesAction.
+ * Iterate over jsonb string values or elements, and pass them together
+ * with an iteration state to a specified JsonIterateStringValuesAction.
*/
void
iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action)
@@ -4961,11 +4962,50 @@ iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesActio
}
/*
- * Iterate over json string values or elements, and pass them together with an
+ * Iterate over jsonb string/numeric/boolean values or elements, and pass them
+ * together with an iteration state to a specified JsonIterateStringValuesAction.
+ */
+void
+iterate_jsonb_all_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action)
+{
+ JsonbIterator *it;
+ JsonbValue v;
+ JsonbIteratorToken type;
+ char *val;
+
+ it = JsonbIteratorInit(&jb->root);
+
+ while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+ {
+ if (type == WJB_VALUE || type == WJB_ELEM)
+ {
+
+ if (v.type == jbvString)
+ action(state, v.val.string.val, v.val.string.len);
+
+ if (v.type == jbvNumeric)
+ {
+ val = DatumGetCString(DirectFunctionCall1(numeric_out,
+ NumericGetDatum(v.val.numeric)));
+ action(state, val, strlen(val));
+ }
+
+ if (v.type == jbvNumeric)
+ {
+ val = v.val.boolean ? "true" : "false";
+ action(state, val, strlen(val));
+ }
+ }
+ }
+}
+
+/*
+ * Iterate over json values and elements, and pass them together with an
* iteration state to a specified JsonIterateStringValuesAction.
*/
void
-iterate_json_string_values(text *json, void *action_state, JsonIterateStringValuesAction action)
+iterate_json_values(text *json, bool all_types, void *action_state,
+ JsonIterateStringValuesAction action)
{
JsonLexContext *lex = makeJsonLexContext(json, true);
JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
@@ -4976,14 +5016,17 @@ iterate_json_string_values(text *json, void *action_state, JsonIterateStringValu
state->action_state = action_state;
sem->semstate = (void *) state;
- sem->scalar = iterate_string_values_scalar;
+ if (all_types)
+ sem->scalar = iterate_all_values_scalar;
+ else
+ sem->scalar = iterate_string_values_scalar;
pg_parse_json(lex, sem);
}
/*
- * An auxiliary function for iterate_json_string_values to invoke a specified
- * JsonIterateStringValuesAction.
+ * An auxiliary function for iterate_json_values to invoke a specified
+ * JsonIterateStringValuesAction for string values.
*/
static void
iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
@@ -4995,6 +5038,20 @@ iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
}
/*
+ * An auxiliary function for iterate_json_values to invoke a specified
+ * JsonIterateStringValuesAction for string/numeric/boolean values.
+ */
+static void
+iterate_all_values_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+ IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
+
+ if (tokentype == JSON_TOKEN_STRING || tokentype == JSON_TOKEN_NUMBER ||
+ tokentype == JSON_TOKEN_TRUE || tokentype == JSON_TOKEN_FALSE)
+ _state->action(_state->action_state, token, strlen(token));
+}
+
+/*
* Iterate over a jsonb, and apply a specified JsonTransformStringValuesAction
* to every string value or element. Any necessary context for a
* JsonTransformStringValuesAction can be passed in the action_state variable.
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index ec50afc..8b1a80e 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4975,13 +4975,21 @@ DESCR("transform to tsquery");
DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
DESCR("transform to tsquery");
DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
-DESCR("transform jsonb to tsvector");
+DESCR("transform string values from jsonb to tsvector");
+DATA(insert OID = 4213 ( jsonb_all_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_all_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform all values from jsonb to tsvector");
DATA(insert OID = 4210 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
-DESCR("transform json to tsvector");
+DESCR("transform string values from json to tsvector");
+DATA(insert OID = 4215 ( json_all_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_all_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform all values from json to tsvector");
DATA(insert OID = 4211 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector_byid _null_ _null_ _null_ ));
DESCR("transform jsonb to tsvector");
+DATA(insert OID = 4214 ( jsonb_all_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 3802" _null_ _null_ _null_ _null_ _null_ jsonb_all_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform jsonb to tsvector");
DATA(insert OID = 4212 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector_byid _null_ _null_ _null_ ));
-DESCR("transform json to tsvector");
+DESCR("transform string values from json to tsvector");
+DATA(insert OID = 4216 ( json_all_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 114" _null_ _null_ _null_ _null_ _null_ json_all_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform all values from json to tsvector");
DATA(insert OID = 3752 ( tsvector_update_trigger PGNSP PGUID 12 1 0 0 0 f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
DESCR("trigger for automatic update of tsvector column");
diff --git a/src/include/utils/jsonapi.h b/src/include/utils/jsonapi.h
index e39572e..6f5fb85 100644
--- a/src/include/utils/jsonapi.h
+++ b/src/include/utils/jsonapi.h
@@ -140,7 +140,9 @@ typedef text *(*JsonTransformStringValuesAction) (void *state, char *elem_value,
extern void iterate_jsonb_string_values(Jsonb *jb, void *state,
JsonIterateStringValuesAction action);
-extern void iterate_json_string_values(text *json, void *action_state,
+extern void iterate_jsonb_all_values(Jsonb *jb, void *state,
+ JsonIterateStringValuesAction action);
+extern void iterate_json_values(text *json, bool allTypes, void *action_state,
JsonIterateStringValuesAction action);
extern Jsonb *transform_jsonb_string_values(Jsonb *jsonb, void *action_state,
JsonTransformStringValuesAction transform_action);
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index 06c728e..8ce7874 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -2324,6 +2324,19 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+ to_tsvector
+---------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4
+(1 row)
+
+select json_all_to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+ json_all_to_tsvector
+-------------------------------------------------
+ '123':7 '456':9 'aaa':1 'bbb':3 'ccc':5 'ddd':4
+(1 row)
+
-- ts_vector corner cases
select to_tsvector('""'::json);
to_tsvector
@@ -2331,24 +2344,48 @@ select to_tsvector('""'::json);
(1 row)
+select json_all_to_tsvector('""'::json);
+ json_all_to_tsvector
+----------------------
+
+(1 row)
+
select to_tsvector('{}'::json);
to_tsvector
-------------
(1 row)
+select json_all_to_tsvector('{}'::json);
+ json_all_to_tsvector
+----------------------
+
+(1 row)
+
select to_tsvector('[]'::json);
to_tsvector
-------------
(1 row)
+select json_all_to_tsvector('[]'::json);
+ json_all_to_tsvector
+----------------------
+
+(1 row)
+
select to_tsvector('null'::json);
to_tsvector
-------------
(1 row)
+select json_all_to_tsvector('null'::json);
+ json_all_to_tsvector
+----------------------
+
+(1 row)
+
-- ts_headline for json
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
ts_headline
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index f8d6e6f..25cce08 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -4122,6 +4122,19 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+ to_tsvector
+---------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4
+(1 row)
+
+select jsonb_all_to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+ jsonb_all_to_tsvector
+--------------------------------------------------------------
+ '123':7 '456':11 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'true':9,13
+(1 row)
+
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
to_tsvector
@@ -4129,24 +4142,48 @@ select to_tsvector('""'::jsonb);
(1 row)
+select jsonb_all_to_tsvector('""'::jsonb);
+ jsonb_all_to_tsvector
+-----------------------
+
+(1 row)
+
select to_tsvector('{}'::jsonb);
to_tsvector
-------------
(1 row)
+select jsonb_all_to_tsvector('{}'::jsonb);
+ jsonb_all_to_tsvector
+-----------------------
+
+(1 row)
+
select to_tsvector('[]'::jsonb);
to_tsvector
-------------
(1 row)
+select jsonb_all_to_tsvector('[]'::jsonb);
+ jsonb_all_to_tsvector
+-----------------------
+
+(1 row)
+
select to_tsvector('null'::jsonb);
to_tsvector
-------------
(1 row)
+select jsonb_all_to_tsvector('null'::jsonb);
+ jsonb_all_to_tsvector
+-----------------------
+
+(1 row)
+
-- ts_headline for jsonb
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
ts_headline
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 256652c..f22c0db 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -763,11 +763,19 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c"
-- json to tsvector with stop words
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+select json_all_to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+
-- ts_vector corner cases
select to_tsvector('""'::json);
+select json_all_to_tsvector('""'::json);
select to_tsvector('{}'::json);
+select json_all_to_tsvector('{}'::json);
select to_tsvector('[]'::json);
+select json_all_to_tsvector('[]'::json);
select to_tsvector('null'::json);
+select json_all_to_tsvector('null'::json);
-- ts_headline for json
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index 2439f94..e16e7b4 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -1089,11 +1089,19 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c"
-- jsonb to tsvector with stop words
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+select jsonb_all_to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
+select jsonb_all_to_tsvector('""'::jsonb);
select to_tsvector('{}'::jsonb);
+select jsonb_all_to_tsvector('{}'::jsonb);
select to_tsvector('[]'::jsonb);
+select jsonb_all_to_tsvector('[]'::jsonb);
select to_tsvector('null'::jsonb);
+select jsonb_all_to_tsvector('null'::jsonb);
-- ts_headline for jsonb
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
1) I don't like jsonb_all_to_tsvector too.. What if we will accept new variant
to index? Let me suggest:
tsvector jsonb_to_tsvector([regclass,] jsonb, text[])
where text[] arg is actually a flags, array contains any combination of literals
'numeric', 'string', 'boolean' (and even 'key' to index keys_ to point which
types should be indexed. More than it, may be, it should a jsonb type for
possible improvements in future. For now, it shouldbe a jsonb array type with
string elements described above, example:
select jsonb_to_tsvector('{"a": "aaa in bbb ddd ccc", "b":123}',
'["numeric", "boolean"]');
Form jsonb_to_tsvector('...', '["string"]) is effectively the same as current
to_tsvector(jsonb)
2)
Now it fails, and I see something strange in resuling tsvector: 'true':9,13 and
'fals':9,13 - I don't see any bool keys in input json.
% more /home/teodor/pgsql/src/test/regress/regression.diffs
*** /home/teodor/pgsql/src/test/regress/expected/jsonb.out 2018-04-06
16:34:59.424481000 +0300
--- /home/teodor/pgsql/src/test/regress/results/jsonb.out 2018-04-06
16:36:48.095411000 +0300
***************
*** 4132,4138 ****
select jsonb_all_to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b":
123, "c": 456}'::jsonb);
jsonb_all_to_tsvector
--------------------------------------------------------------
! '123':7 '456':11 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'true':9,13
(1 row)
-- ts_vector corner cases
--- 4132,4138 ----
select jsonb_all_to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b":
123, "c": 456}'::jsonb);
jsonb_all_to_tsvector
--------------------------------------------------------------
! '123':7 '456':11 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'fals':9,13
(1 row)
-- ts_vector corner cases
Dmitry Dolgov wrote:
On 4 April 2018 at 16:09, Teodor Sigaev <teodor@sigaev.ru> wrote:
Hm, seems, it's useful feature, but I suggest to make separate function
jsonb_any_to_tsvector and add support for boolean too (if you know better
name for function, do not hide it). Changing behavior of existing
function
is not obvious for users and, seems, should not backpatched.What do you think about having not a separate function, but a flag
argument to
the existing one (like `create` in `jsonb_set`), that will have false as
default value? The result would be the same, but without an extra function
with
almost the same implementation.tsvector jsonb_to_tsvector(jsonb[, bool]) ?
Agreed. Second arg should be optional.Unfortunately, this idea with a flag argument can't be implemented easily
(related discussion is here [1]). So I've modified the patch accordingly to
your original suggestion about having separate functions
`json(b)_all_to_tsvector`.1: /messages/by-id/CA+q6zcVJ+Wx+-=kkN5UC0T-LtsJWnx0g9S0xSnn3jUWkriufDA@mail.gmail.com
--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/
On 6 April 2018 at 16:25, Teodor Sigaev <teodor@sigaev.ru> wrote:
1) I don't like jsonb_all_to_tsvector too.. What if we will accept new
variant to index? Let me suggest:tsvector jsonb_to_tsvector([regclass,] jsonb, text[])
where text[] arg is actually a flags, array contains any combination of
literals 'numeric', 'string', 'boolean' (and even 'key' to index keys_ to
point which types should be indexed. More than it, may be, it should a jsonb
type for possible improvements in future. For now, it shouldbe a jsonb array
type with string elements described above, example:select jsonb_to_tsvector('{"a": "aaa in bbb ddd ccc", "b":123}',
'["numeric", "boolean"]');Form jsonb_to_tsvector('...', '["string"]) is effectively the same as
current to_tsvector(jsonb)
Thank you for the suggestion, this sounds appealing. But I have two questions:
* why it should be a jsonb array, not a regular array?
* it would introduce the idea of jsonb element type expressed in text format,
so "string", "numeric", "boolean" etc - are there any consequences of that?
As far as I understand so far there was only jsonb_typeof.
2) Now it fails, and I see something strange in resuling tsvector
Oh, sorry, stupid copy-paste mistake in the condition. Just for the records,
I've attached fixed version of the previous patch (without any changes about an
array instead of a boolean flag).
Attachments:
jsonb_to_tsvector_numeric_v4.patchtext/x-patch; charset=US-ASCII; name=jsonb_to_tsvector_numeric_v4.patchDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5abb1c4..895b60a 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9696,6 +9696,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
</row>
<row>
<entry>
+ <literal><function>json(b)_all_to_tsvector(<optional> <replaceable class="parameter">config</replaceable> <type>regconfig</type> , </optional> <replaceable class="parameter">document</replaceable> <type>json(b)</type>)</function></literal>
+ </entry>
+ <entry><type>tsvector</type></entry>
+ <entry>
+ reduce each string, numeric or boolean value in the document to a <type>tsvector</type>,
+ and then concatenate those in document order to produce a single <type>tsvector</type>
+ </entry>
+ <entry><literal>json_all_to_tsvector('english', '{"a": "The Fat Rats", "b": 123}'::json)</literal></entry>
+ <entry><literal>'123':5 'fat':2 'rat':3</literal></entry>
+ </row>
+ <row>
+ <entry>
<indexterm>
<primary>ts_delete</primary>
</indexterm>
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index ea5947a..02c2b00 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -267,12 +267,12 @@ to_tsvector(PG_FUNCTION_ARGS)
PointerGetDatum(in)));
}
-Datum
-jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
+/*
+ * Worker function for jsonb(_all)_to_tsvector(_byid)
+ */
+static TSVector
+jsonb_to_tsvector_worker(Oid cfgId, Jsonb *jb, bool allTypes)
{
- Oid cfgId = PG_GETARG_OID(0);
- Jsonb *jb = PG_GETARG_JSONB_P(1);
- TSVector result;
TSVectorBuildState state;
ParsedText prs;
@@ -281,11 +281,24 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
state.prs = &prs;
state.cfgId = cfgId;
- iterate_jsonb_string_values(jb, &state, add_to_tsvector);
+ if (allTypes)
+ iterate_jsonb_all_values(jb, &state, add_to_tsvector);
+ else
+ iterate_jsonb_string_values(jb, &state, add_to_tsvector);
- PG_FREE_IF_COPY(jb, 1);
- result = make_tsvector(&prs);
+ return make_tsvector(&prs);
+}
+
+Datum
+jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ Jsonb *jb = PG_GETARG_JSONB_P(1);
+ TSVector result;
+
+ result = jsonb_to_tsvector_worker(cfgId, jb, false);
+ PG_FREE_IF_COPY(jb, 1);
PG_RETURN_TSVECTOR(result);
}
@@ -295,19 +308,48 @@ jsonb_to_tsvector(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB_P(0);
Oid cfgId;
+ TSVector result;
cfgId = getTSCurrentConfig(true);
- PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
- ObjectIdGetDatum(cfgId),
- JsonbPGetDatum(jb)));
+ result = jsonb_to_tsvector_worker(cfgId, jb, false);
+ PG_FREE_IF_COPY(jb, 1);
+
+ PG_RETURN_TSVECTOR(result);
}
Datum
-json_to_tsvector_byid(PG_FUNCTION_ARGS)
+jsonb_all_to_tsvector_byid(PG_FUNCTION_ARGS)
{
Oid cfgId = PG_GETARG_OID(0);
- text *json = PG_GETARG_TEXT_P(1);
+ Jsonb *jb = PG_GETARG_JSONB_P(1);
TSVector result;
+
+ result = jsonb_to_tsvector_worker(cfgId, jb, true);
+ PG_FREE_IF_COPY(jb, 1);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+jsonb_all_to_tsvector(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ Oid cfgId;
+ TSVector result;
+
+ cfgId = getTSCurrentConfig(true);
+ result = jsonb_to_tsvector_worker(cfgId, jb, true);
+ PG_FREE_IF_COPY(jb, 1);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+/*
+ * Worker function for json(_all)_to_tsvector(_byid)
+ */
+static TSVector
+json_to_tsvector_worker(Oid cfgId, text *json, bool allTypes)
+{
TSVectorBuildState state;
ParsedText prs;
@@ -316,11 +358,20 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS)
state.prs = &prs;
state.cfgId = cfgId;
- iterate_json_string_values(json, &state, add_to_tsvector);
+ iterate_json_values(json, allTypes, &state, add_to_tsvector);
- PG_FREE_IF_COPY(json, 1);
+ return make_tsvector(&prs);
+}
+
+Datum
+json_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ text *json = PG_GETARG_TEXT_P(1);
+ TSVector result;
- result = make_tsvector(&prs);
+ result = json_to_tsvector_worker(cfgId, json, false);
+ PG_FREE_IF_COPY(json, 1);
PG_RETURN_TSVECTOR(result);
}
@@ -330,11 +381,40 @@ json_to_tsvector(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_P(0);
Oid cfgId;
+ TSVector result;
cfgId = getTSCurrentConfig(true);
- PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
- ObjectIdGetDatum(cfgId),
- PointerGetDatum(json)));
+ result = json_to_tsvector_worker(cfgId, json, false);
+ PG_FREE_IF_COPY(json, 1);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+json_all_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ text *json = PG_GETARG_TEXT_P(1);
+ TSVector result;
+
+ result = json_to_tsvector_worker(cfgId, json, true);
+ PG_FREE_IF_COPY(json, 1);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+json_all_to_tsvector(PG_FUNCTION_ARGS)
+{
+ text *json = PG_GETARG_TEXT_P(0);
+ Oid cfgId;
+ TSVector result;
+
+ cfgId = getTSCurrentConfig(true);
+ result = json_to_tsvector_worker(cfgId, json, true);
+ PG_FREE_IF_COPY(json, 1);
+
+ PG_RETURN_TSVECTOR(result);
}
/*
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index fa78451..4dd6901 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -474,8 +474,9 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems,
int level, Jsonb *newval, uint32 nelems, int op_type);
static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
-/* function supporting iterate_json_string_values */
+/* function supporting iterate_json_values */
static void iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
+static void iterate_all_values_scalar(void *state, char *token, JsonTokenType tokentype);
/* functions supporting transform_json_string_values */
static void transform_string_values_object_start(void *state);
@@ -4939,8 +4940,8 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
}
/*
- * Iterate over jsonb string values or elements, and pass them together with an
- * iteration state to a specified JsonIterateStringValuesAction.
+ * Iterate over jsonb string values or elements, and pass them together
+ * with an iteration state to a specified JsonIterateStringValuesAction.
*/
void
iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action)
@@ -4961,11 +4962,50 @@ iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesActio
}
/*
- * Iterate over json string values or elements, and pass them together with an
+ * Iterate over jsonb string/numeric/boolean values or elements, and pass them
+ * together with an iteration state to a specified JsonIterateStringValuesAction.
+ */
+void
+iterate_jsonb_all_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action)
+{
+ JsonbIterator *it;
+ JsonbValue v;
+ JsonbIteratorToken type;
+ char *val;
+
+ it = JsonbIteratorInit(&jb->root);
+
+ while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+ {
+ if (type == WJB_VALUE || type == WJB_ELEM)
+ {
+
+ if (v.type == jbvString)
+ action(state, v.val.string.val, v.val.string.len);
+
+ if (v.type == jbvNumeric)
+ {
+ val = DatumGetCString(DirectFunctionCall1(numeric_out,
+ NumericGetDatum(v.val.numeric)));
+ action(state, val, strlen(val));
+ }
+
+ if (v.type == jbvBool)
+ {
+ val = v.val.boolean ? "true" : "false";
+ action(state, val, strlen(val));
+ }
+ }
+ }
+}
+
+/*
+ * Iterate over json values and elements, and pass them together with an
* iteration state to a specified JsonIterateStringValuesAction.
*/
void
-iterate_json_string_values(text *json, void *action_state, JsonIterateStringValuesAction action)
+iterate_json_values(text *json, bool all_types, void *action_state,
+ JsonIterateStringValuesAction action)
{
JsonLexContext *lex = makeJsonLexContext(json, true);
JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
@@ -4976,14 +5016,17 @@ iterate_json_string_values(text *json, void *action_state, JsonIterateStringValu
state->action_state = action_state;
sem->semstate = (void *) state;
- sem->scalar = iterate_string_values_scalar;
+ if (all_types)
+ sem->scalar = iterate_all_values_scalar;
+ else
+ sem->scalar = iterate_string_values_scalar;
pg_parse_json(lex, sem);
}
/*
- * An auxiliary function for iterate_json_string_values to invoke a specified
- * JsonIterateStringValuesAction.
+ * An auxiliary function for iterate_json_values to invoke a specified
+ * JsonIterateStringValuesAction for string values.
*/
static void
iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
@@ -4995,6 +5038,20 @@ iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
}
/*
+ * An auxiliary function for iterate_json_values to invoke a specified
+ * JsonIterateStringValuesAction for string/numeric/boolean values.
+ */
+static void
+iterate_all_values_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+ IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
+
+ if (tokentype == JSON_TOKEN_STRING || tokentype == JSON_TOKEN_NUMBER ||
+ tokentype == JSON_TOKEN_TRUE || tokentype == JSON_TOKEN_FALSE)
+ _state->action(_state->action_state, token, strlen(token));
+}
+
+/*
* Iterate over a jsonb, and apply a specified JsonTransformStringValuesAction
* to every string value or element. Any necessary context for a
* JsonTransformStringValuesAction can be passed in the action_state variable.
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index ec50afc..8b1a80e 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4975,13 +4975,21 @@ DESCR("transform to tsquery");
DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
DESCR("transform to tsquery");
DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
-DESCR("transform jsonb to tsvector");
+DESCR("transform string values from jsonb to tsvector");
+DATA(insert OID = 4213 ( jsonb_all_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_all_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform all values from jsonb to tsvector");
DATA(insert OID = 4210 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
-DESCR("transform json to tsvector");
+DESCR("transform string values from json to tsvector");
+DATA(insert OID = 4215 ( json_all_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_all_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform all values from json to tsvector");
DATA(insert OID = 4211 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector_byid _null_ _null_ _null_ ));
DESCR("transform jsonb to tsvector");
+DATA(insert OID = 4214 ( jsonb_all_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 3802" _null_ _null_ _null_ _null_ _null_ jsonb_all_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform jsonb to tsvector");
DATA(insert OID = 4212 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector_byid _null_ _null_ _null_ ));
-DESCR("transform json to tsvector");
+DESCR("transform string values from json to tsvector");
+DATA(insert OID = 4216 ( json_all_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 114" _null_ _null_ _null_ _null_ _null_ json_all_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform all values from json to tsvector");
DATA(insert OID = 3752 ( tsvector_update_trigger PGNSP PGUID 12 1 0 0 0 f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
DESCR("trigger for automatic update of tsvector column");
diff --git a/src/include/utils/jsonapi.h b/src/include/utils/jsonapi.h
index e39572e..6f5fb85 100644
--- a/src/include/utils/jsonapi.h
+++ b/src/include/utils/jsonapi.h
@@ -140,7 +140,9 @@ typedef text *(*JsonTransformStringValuesAction) (void *state, char *elem_value,
extern void iterate_jsonb_string_values(Jsonb *jb, void *state,
JsonIterateStringValuesAction action);
-extern void iterate_json_string_values(text *json, void *action_state,
+extern void iterate_jsonb_all_values(Jsonb *jb, void *state,
+ JsonIterateStringValuesAction action);
+extern void iterate_json_values(text *json, bool allTypes, void *action_state,
JsonIterateStringValuesAction action);
extern Jsonb *transform_jsonb_string_values(Jsonb *jsonb, void *action_state,
JsonTransformStringValuesAction transform_action);
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index 06c728e..8ce7874 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -2324,6 +2324,19 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+ to_tsvector
+---------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4
+(1 row)
+
+select json_all_to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+ json_all_to_tsvector
+-------------------------------------------------
+ '123':7 '456':9 'aaa':1 'bbb':3 'ccc':5 'ddd':4
+(1 row)
+
-- ts_vector corner cases
select to_tsvector('""'::json);
to_tsvector
@@ -2331,24 +2344,48 @@ select to_tsvector('""'::json);
(1 row)
+select json_all_to_tsvector('""'::json);
+ json_all_to_tsvector
+----------------------
+
+(1 row)
+
select to_tsvector('{}'::json);
to_tsvector
-------------
(1 row)
+select json_all_to_tsvector('{}'::json);
+ json_all_to_tsvector
+----------------------
+
+(1 row)
+
select to_tsvector('[]'::json);
to_tsvector
-------------
(1 row)
+select json_all_to_tsvector('[]'::json);
+ json_all_to_tsvector
+----------------------
+
+(1 row)
+
select to_tsvector('null'::json);
to_tsvector
-------------
(1 row)
+select json_all_to_tsvector('null'::json);
+ json_all_to_tsvector
+----------------------
+
+(1 row)
+
-- ts_headline for json
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
ts_headline
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index f8d6e6f..6ce24fa 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -4122,6 +4122,19 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+ to_tsvector
+---------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4
+(1 row)
+
+select jsonb_all_to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+ jsonb_all_to_tsvector
+-------------------------------------------------
+ '123':7 '456':9 'aaa':1 'bbb':3 'ccc':5 'ddd':4
+(1 row)
+
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
to_tsvector
@@ -4129,24 +4142,48 @@ select to_tsvector('""'::jsonb);
(1 row)
+select jsonb_all_to_tsvector('""'::jsonb);
+ jsonb_all_to_tsvector
+-----------------------
+
+(1 row)
+
select to_tsvector('{}'::jsonb);
to_tsvector
-------------
(1 row)
+select jsonb_all_to_tsvector('{}'::jsonb);
+ jsonb_all_to_tsvector
+-----------------------
+
+(1 row)
+
select to_tsvector('[]'::jsonb);
to_tsvector
-------------
(1 row)
+select jsonb_all_to_tsvector('[]'::jsonb);
+ jsonb_all_to_tsvector
+-----------------------
+
+(1 row)
+
select to_tsvector('null'::jsonb);
to_tsvector
-------------
(1 row)
+select jsonb_all_to_tsvector('null'::jsonb);
+ jsonb_all_to_tsvector
+-----------------------
+
+(1 row)
+
-- ts_headline for jsonb
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
ts_headline
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 256652c..f22c0db 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -763,11 +763,19 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c"
-- json to tsvector with stop words
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+select json_all_to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+
-- ts_vector corner cases
select to_tsvector('""'::json);
+select json_all_to_tsvector('""'::json);
select to_tsvector('{}'::json);
+select json_all_to_tsvector('{}'::json);
select to_tsvector('[]'::json);
+select json_all_to_tsvector('[]'::json);
select to_tsvector('null'::json);
+select json_all_to_tsvector('null'::json);
-- ts_headline for json
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index 2439f94..e16e7b4 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -1089,11 +1089,19 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c"
-- jsonb to tsvector with stop words
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+select jsonb_all_to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
+select jsonb_all_to_tsvector('""'::jsonb);
select to_tsvector('{}'::jsonb);
+select jsonb_all_to_tsvector('{}'::jsonb);
select to_tsvector('[]'::jsonb);
+select jsonb_all_to_tsvector('[]'::jsonb);
select to_tsvector('null'::jsonb);
+select jsonb_all_to_tsvector('null'::jsonb);
-- ts_headline for jsonb
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
Dmitry Dolgov wrote:
On 6 April 2018 at 16:25, Teodor Sigaev <teodor@sigaev.ru> wrote:
1) I don't like jsonb_all_to_tsvector too.. What if we will accept new
variant to index? Let me suggest:tsvector jsonb_to_tsvector([regclass,] jsonb, text[])
where text[] arg is actually a flags, array contains any combination of
literals 'numeric', 'string', 'boolean' (and even 'key' to index keys_ to
point which types should be indexed. More than it, may be, it should a jsonb
type for possible improvements in future. For now, it shouldbe a jsonb array
type with string elements described above, example:select jsonb_to_tsvector('{"a": "aaa in bbb ddd ccc", "b":123}',
'["numeric", "boolean"]');Form jsonb_to_tsvector('...', '["string"]) is effectively the same as
current to_tsvector(jsonb)Thank you for the suggestion, this sounds appealing. But I have two questions:
* why it should be a jsonb array, not a regular array?
To simplify extension of this array in future, for example to add limitation of
recursion level in converted jsonb, etc.
* it would introduce the idea of jsonb element type expressed in text format,
so "string", "numeric", "boolean" etc - are there any consequences of that?
As far as I understand so far there was only jsonb_typeof.
Good catch, jsonb_typeof strings are okay: "string", "number", "boolean"
also "all", "key", "value"
See workable sketch for parsing jsonb flags and new worker variant.
2) Now it fails, and I see something strange in resuling tsvector
Oh, sorry, stupid copy-paste mistake in the condition. Just for the records,
I've attached fixed version of the previous patch (without any changes about an
array instead of a boolean flag).
by the way:
Datum
jsonb_to_tsvector(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB_P(0);
...
PG_FREE_IF_COPY(jb, 1); //wrong arg number
}
jsonb_all_to_tsvector and json variants too
--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/
Attachments:
On 6 April 2018 at 18:55, Teodor Sigaev <teodor@sigaev.ru> wrote:
Dmitry Dolgov wrote:
On 6 April 2018 at 16:25, Teodor Sigaev <teodor@sigaev.ru> wrote:
1) I don't like jsonb_all_to_tsvector too.. What if we will accept new
variant to index? Let me suggest:tsvector jsonb_to_tsvector([regclass,] jsonb, text[])
where text[] arg is actually a flags, array contains any combination of
literals 'numeric', 'string', 'boolean' (and even 'key' to index keys_ to
point which types should be indexed. More than it, may be, it should a
jsonb
type for possible improvements in future. For now, it shouldbe a jsonb
array
type with string elements described above, example:select jsonb_to_tsvector('{"a": "aaa in bbb ddd ccc", "b":123}',
'["numeric", "boolean"]');Form jsonb_to_tsvector('...', '["string"]) is effectively the same as
current to_tsvector(jsonb)Thank you for the suggestion, this sounds appealing. But I have two
questions:* why it should be a jsonb array, not a regular array?
To simplify extension of this array in future, for example to add limitation
of recursion level in converted jsonb, etc.* it would introduce the idea of jsonb element type expressed in text
format,
so "string", "numeric", "boolean" etc - are there any consequences of
that?
As far as I understand so far there was only jsonb_typeof.Good catch, jsonb_typeof strings are okay: "string", "number", "boolean"
also "all", "key", "value"See workable sketch for parsing jsonb flags and new worker variant.
Yep, thanks for the sketch. Here is the new version of patch, does it look
close to what you have in mind?
Attachments:
jsonb_to_tsvector_numeric_v4.patchapplication/octet-stream; name=jsonb_to_tsvector_numeric_v4.patchDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5abb1c46fb..aea86e6b25 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9694,6 +9694,26 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
<entry><literal>to_tsvector('english', '{"a": "The Fat Rats"}'::json)</literal></entry>
<entry><literal>'fat':2 'rat':3</literal></entry>
</row>
+ <row>
+ <entry>
+ <literal><function>json(b)_to_tsvector(<optional> <replaceable class="parameter">config</replaceable> <type>regconfig</type>,
+ </optional> <replaceable class="parameter">document</replaceable> <type>json(b)</type>,
+ <replaceable class="parameter">filter</replaceable> <type>json(b)</type>)</function></literal>
+ </entry>
+ <entry><type>tsvector</type></entry>
+ <entry>
+ reduce each value in the document, specified by <replaceable class="parameter">filter</replaceable> to a <type>tsvector</type>,
+ and then concatenate those in document order to produce a single <type>tsvector</type>.
+ <replaceable class="parameter">filter</replaceable> is a jsonb array, that enumerates what kind of elements need to be included
+ into the resulting <type>tsvector</type>. Possible values for <replaceable class="parameter">filter</replaceable> are
+ <literal>"string"</literal> (to include all string values), <literal>"numeric"</literal> (to include all numeric values in the string format),
+ <literal>"boolean"</literal> (to include all boolean values in the string format "true"/"false"),
+ <literal>"key"</literal> (to include all keys) or <literal>"all"</literal> (to include all above). These values
+ can be combined together to include, e.g. all string and numeric values.
+ </entry>
+ <entry><literal>json_to_tsvector('english', '{"a": "The Fat Rats", "b": 123}'::json, '["string", "numeric"]')</literal></entry>
+ <entry><literal>'123':5 'fat':2 'rat':3</literal></entry>
+ </row>
<row>
<entry>
<indexterm>
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index ea5947a3a8..ffbc21ebe4 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -267,12 +267,12 @@ to_tsvector(PG_FUNCTION_ARGS)
PointerGetDatum(in)));
}
-Datum
-jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
+/*
+ * Worker function for jsonb(_string)_to_tsvector(_byid)
+ */
+static TSVector
+jsonb_to_tsvector_worker(Oid cfgId, Jsonb *jb, uint32 flags)
{
- Oid cfgId = PG_GETARG_OID(0);
- Jsonb *jb = PG_GETARG_JSONB_P(1);
- TSVector result;
TSVectorBuildState state;
ParsedText prs;
@@ -281,33 +281,77 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
state.prs = &prs;
state.cfgId = cfgId;
- iterate_jsonb_string_values(jb, &state, add_to_tsvector);
+ iterate_jsonb_values(jb, flags, &state, add_to_tsvector);
- PG_FREE_IF_COPY(jb, 1);
+ return make_tsvector(&prs);
+}
+
+Datum
+jsonb_string_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ Jsonb *jb = PG_GETARG_JSONB_P(1);
+ TSVector result;
- result = make_tsvector(&prs);
+ result = jsonb_to_tsvector_worker(cfgId, jb, jtiString);
+ PG_FREE_IF_COPY(jb, 1);
PG_RETURN_TSVECTOR(result);
}
Datum
-jsonb_to_tsvector(PG_FUNCTION_ARGS)
+jsonb_string_to_tsvector(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB_P(0);
Oid cfgId;
+ TSVector result;
cfgId = getTSCurrentConfig(true);
- PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
- ObjectIdGetDatum(cfgId),
- JsonbPGetDatum(jb)));
+ result = jsonb_to_tsvector_worker(cfgId, jb, jtiString);
+ PG_FREE_IF_COPY(jb, 0);
+
+ PG_RETURN_TSVECTOR(result);
}
Datum
-json_to_tsvector_byid(PG_FUNCTION_ARGS)
+jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
{
Oid cfgId = PG_GETARG_OID(0);
- text *json = PG_GETARG_TEXT_P(1);
+ Jsonb *jb = PG_GETARG_JSONB_P(1);
+ Jsonb *jbFlags = PG_GETARG_JSONB_P(2);
+ TSVector result;
+ uint32 flags = parse_jsonb_index_flags(jbFlags);
+
+ result = jsonb_to_tsvector_worker(cfgId, jb, flags);
+ PG_FREE_IF_COPY(jb, 1);
+ PG_FREE_IF_COPY(jbFlags, 2);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ Jsonb *jbFlags = PG_GETARG_JSONB_P(1);
+ Oid cfgId;
TSVector result;
+ uint32 flags = parse_jsonb_index_flags(jbFlags);
+
+ cfgId = getTSCurrentConfig(true);
+ result = jsonb_to_tsvector_worker(cfgId, jb, flags);
+ PG_FREE_IF_COPY(jb, 0);
+ PG_FREE_IF_COPY(jbFlags, 1);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+/*
+ * Worker function for json(_string)_to_tsvector(_byid)
+ */
+static TSVector
+json_to_tsvector_worker(Oid cfgId, text *json, uint32 flags)
+{
TSVectorBuildState state;
ParsedText prs;
@@ -316,11 +360,50 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS)
state.prs = &prs;
state.cfgId = cfgId;
- iterate_json_string_values(json, &state, add_to_tsvector);
+ iterate_json_values(json, flags, &state, add_to_tsvector);
+
+ return make_tsvector(&prs);
+}
+
+Datum
+json_string_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ text *json = PG_GETARG_TEXT_P(1);
+ TSVector result;
+ result = json_to_tsvector_worker(cfgId, json, jtiString);
PG_FREE_IF_COPY(json, 1);
- result = make_tsvector(&prs);
+ PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+json_string_to_tsvector(PG_FUNCTION_ARGS)
+{
+ text *json = PG_GETARG_TEXT_P(0);
+ Oid cfgId;
+ TSVector result;
+
+ cfgId = getTSCurrentConfig(true);
+ result = json_to_tsvector_worker(cfgId, json, jtiString);
+ PG_FREE_IF_COPY(json, 0);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+json_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ text *json = PG_GETARG_TEXT_P(1);
+ Jsonb *jbFlags = PG_GETARG_JSONB_P(2);
+ TSVector result;
+ uint32 flags = parse_jsonb_index_flags(jbFlags);
+
+ result = json_to_tsvector_worker(cfgId, json, flags);
+ PG_FREE_IF_COPY(json, 1);
+ PG_FREE_IF_COPY(jbFlags, 2);
PG_RETURN_TSVECTOR(result);
}
@@ -329,12 +412,17 @@ Datum
json_to_tsvector(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_P(0);
+ Jsonb *jbFlags = PG_GETARG_JSONB_P(1);
Oid cfgId;
+ TSVector result;
+ uint32 flags = parse_jsonb_index_flags(jbFlags);
cfgId = getTSCurrentConfig(true);
- PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
- ObjectIdGetDatum(cfgId),
- PointerGetDatum(json)));
+ result = json_to_tsvector_worker(cfgId, json, flags);
+ PG_FREE_IF_COPY(json, 0);
+ PG_FREE_IF_COPY(jbFlags, 1);
+
+ PG_RETURN_TSVECTOR(result);
}
/*
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index fa78451613..216cf0f338 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -60,6 +60,7 @@ typedef struct IterateJsonStringValuesState
JsonIterateStringValuesAction action; /* an action that will be applied
* to each json value */
void *action_state; /* any necessary context for iteration */
+ uint32 flags; /* what kind of elements from a json we want to iterate */
} IterateJsonStringValuesState;
/* state for transform_json_string_values function */
@@ -474,8 +475,9 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems,
int level, Jsonb *newval, uint32 nelems, int op_type);
static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
-/* function supporting iterate_json_string_values */
-static void iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
+/* function supporting iterate_json_values */
+static void iterate_values_scalar(void *state, char *token, JsonTokenType tokentype);
+static void iterate_values_object_field_start(void *state, char *fname, bool isnull);
/* functions supporting transform_json_string_values */
static void transform_string_values_object_start(void *state);
@@ -4939,11 +4941,64 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
}
/*
- * Iterate over jsonb string values or elements, and pass them together with an
- * iteration state to a specified JsonIterateStringValuesAction.
+ * Parse information about what elements of a jsonb document we want to iterate
+ * in functions iterate_json(b)_values. This information is presented in jsonb
+ * format, so that it can be easily extended in the future.
+ */
+uint32
+parse_jsonb_index_flags(Jsonb *jb)
+{
+ JsonbIterator *it;
+ JsonbValue v;
+ JsonbIteratorToken type;
+ uint32 flags = 0;
+
+ it = JsonbIteratorInit(&jb->root);
+
+ type = JsonbIteratorNext(&it, &v, false);
+
+ if (type != WJB_BEGIN_ARRAY)
+ elog(ERROR, "wrong flag type");
+
+ while ((type = JsonbIteratorNext(&it, &v, false)) == WJB_ELEM)
+ {
+ if (v.type != jbvString)
+ elog(ERROR, "text is only accepted");
+
+ if (v.val.string.len == 3 &&
+ pg_strncasecmp(v.val.string.val, "all", 3) == 0)
+ flags |= jtiAll;
+ else if (v.val.string.len == 3 &&
+ pg_strncasecmp(v.val.string.val, "key", 3) == 0)
+ flags |= jtiKey;
+ else if (v.val.string.len == 6 &&
+ pg_strncasecmp(v.val.string.val, "string", 5) == 0)
+ flags |= jtiString;
+ else if (v.val.string.len == 7 &&
+ pg_strncasecmp(v.val.string.val, "numeric", 7) == 0)
+ flags |= jtiNumeric;
+ else if (v.val.string.len == 7 &&
+ pg_strncasecmp(v.val.string.val, "boolean", 7) == 0)
+ flags |= jtiBool;
+ else
+ elog(ERROR, "string, numeric, boolean, key and all are only accepted");
+ }
+
+ if (type != WJB_END_ARRAY)
+ elog(ERROR, "wrong flag type");
+
+ JsonbIteratorNext(&it, &v, false);
+
+ return flags;
+}
+
+/*
+ * Iterate over jsonb values or elements, specified by flags, and pass them
+ * together with an iteration state to a specified JsonIterateStringValuesAction.
*/
void
-iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action)
+iterate_jsonb_values(Jsonb *jb, uint32 flags, void *state,
+ JsonIterateStringValuesAction action)
{
JsonbIterator *it;
JsonbValue v;
@@ -4953,19 +5008,51 @@ iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesActio
while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
{
- if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
- {
+ if (type == WJB_KEY && (flags & jtiKey))
action(state, v.val.string.val, v.val.string.len);
+
+ else if (type == WJB_VALUE || type == WJB_ELEM)
+ {
+ switch(v.type)
+ {
+ case jbvString:
+ if (flags & jtiString)
+ action(state, v.val.string.val, v.val.string.len);
+ break;
+ case jbvNumeric:
+ if (flags & jtiNumeric)
+ {
+ char *val;
+
+ val = DatumGetCString(DirectFunctionCall1(numeric_out,
+ NumericGetDatum(v.val.numeric)));
+
+ action(state, val, strlen(val));
+ }
+ break;
+ case jbvBool:
+ if (flags & jtiBool)
+ {
+ if (v.val.boolean)
+ action(state, pstrdup("true"), 4);
+ else
+ action(state, pstrdup("false"), 5);
+ }
+ break;
+ default:
+ break;
+ }
}
}
}
/*
- * Iterate over json string values or elements, and pass them together with an
- * iteration state to a specified JsonIterateStringValuesAction.
+ * Iterate over json values and elements, specified by flags, and pass them
+ * together with an iteration state to a specified JsonIterateStringValuesAction.
*/
void
-iterate_json_string_values(text *json, void *action_state, JsonIterateStringValuesAction action)
+iterate_json_values(text *json, uint32 flags, void *action_state,
+ JsonIterateStringValuesAction action)
{
JsonLexContext *lex = makeJsonLexContext(json, true);
JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
@@ -4974,24 +5061,54 @@ iterate_json_string_values(text *json, void *action_state, JsonIterateStringValu
state->lex = lex;
state->action = action;
state->action_state = action_state;
+ state->flags = flags;
sem->semstate = (void *) state;
- sem->scalar = iterate_string_values_scalar;
+ sem->scalar = iterate_values_scalar;
+ sem->object_field_start = iterate_values_object_field_start;
pg_parse_json(lex, sem);
}
/*
- * An auxiliary function for iterate_json_string_values to invoke a specified
- * JsonIterateStringValuesAction.
+ * An auxiliary function for iterate_json_values to invoke a specified
+ * JsonIterateStringValuesAction for specified values.
*/
static void
-iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
+iterate_values_scalar(void *state, char *token, JsonTokenType tokentype)
{
IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
- if (tokentype == JSON_TOKEN_STRING)
- _state->action(_state->action_state, token, strlen(token));
+ switch(tokentype)
+ {
+ case JSON_TOKEN_STRING:
+ if (_state->flags & jtiString)
+ _state->action(_state->action_state, token, strlen(token));
+ break;
+ case JSON_TOKEN_NUMBER:
+ if (_state->flags & jtiNumeric)
+ _state->action(_state->action_state, token, strlen(token));
+ break;
+ case JSON_TOKEN_TRUE:
+ case JSON_TOKEN_FALSE:
+ if (_state->flags & jtiBool)
+ _state->action(_state->action_state, token, strlen(token));
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+iterate_values_object_field_start(void *state, char *fname, bool isnull)
+{
+ IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
+
+ if (_state->flags & jtiKey)
+ {
+ char *val = pstrdup(fname);
+ _state->action(_state->action_state, val, strlen(val));
+ }
}
/*
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index ec50afcdf0..51c2db3bf6 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4974,14 +4974,22 @@ DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s
DESCR("transform to tsquery");
DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
DESCR("transform to tsquery");
-DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
-DESCR("transform jsonb to tsvector");
-DATA(insert OID = 4210 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
-DESCR("transform json to tsvector");
-DATA(insert OID = 4211 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector_byid _null_ _null_ _null_ ));
-DESCR("transform jsonb to tsvector");
-DATA(insert OID = 4212 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector_byid _null_ _null_ _null_ ));
-DESCR("transform json to tsvector");
+DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_string_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform string values from jsonb to tsvector");
+DATA(insert OID = 4213 ( jsonb_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 2 0 3614 "3802 3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform specified values from jsonb to tsvector");
+DATA(insert OID = 4210 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_string_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform string values from json to tsvector");
+DATA(insert OID = 4215 ( json_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 2 0 3614 "114 3802" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform specified values from json to tsvector");
+DATA(insert OID = 4211 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 3802" _null_ _null_ _null_ _null_ _null_ jsonb_string_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform string values from jsonb to tsvector");
+DATA(insert OID = 4214 ( jsonb_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 3 0 3614 "3734 3802 3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform specified values from jsonb to tsvector");
+DATA(insert OID = 4212 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 114" _null_ _null_ _null_ _null_ _null_ json_string_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform string values from json to tsvector");
+DATA(insert OID = 4216 ( json_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 3 0 3614 "3734 114 3802" _null_ _null_ _null_ _null_ _null_ json_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform specified values from json to tsvector");
DATA(insert OID = 3752 ( tsvector_update_trigger PGNSP PGUID 12 1 0 0 0 f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
DESCR("trigger for automatic update of tsvector column");
diff --git a/src/include/utils/jsonapi.h b/src/include/utils/jsonapi.h
index e39572e00f..b28201c2bc 100644
--- a/src/include/utils/jsonapi.h
+++ b/src/include/utils/jsonapi.h
@@ -132,15 +132,28 @@ extern JsonLexContext *makeJsonLexContextCstringLen(char *json,
*/
extern bool IsValidJsonNumber(const char *str, int len);
-/* an action that will be applied to each value in iterate_json(b)_string_vaues functions */
+/*
+ * Flag types for iterate_json(b)_values to specify what elements from a
+ * json(b) document we want to iterate.
+ */
+typedef enum JsonToIndex {
+ jtiKey = 0x01,
+ jtiString = 0x02,
+ jtiNumeric = 0x04,
+ jtiBool = 0x08,
+ jtiAll = jtiKey | jtiString | jtiNumeric | jtiBool
+} JsonToIndex;
+
+/* an action that will be applied to each value in iterate_json(b)_vaues functions */
typedef void (*JsonIterateStringValuesAction) (void *state, char *elem_value, int elem_len);
-/* an action that will be applied to each value in transform_json(b)_string_values functions */
+/* an action that will be applied to each value in transform_json(b)_values functions */
typedef text *(*JsonTransformStringValuesAction) (void *state, char *elem_value, int elem_len);
-extern void iterate_jsonb_string_values(Jsonb *jb, void *state,
+extern uint32 parse_jsonb_index_flags(Jsonb *jb);
+extern void iterate_jsonb_values(Jsonb *jb, uint32 flags, void *state,
JsonIterateStringValuesAction action);
-extern void iterate_json_string_values(text *json, void *action_state,
+extern void iterate_json_values(text *json, uint32 flags, void *action_state,
JsonIterateStringValuesAction action);
extern Jsonb *transform_jsonb_string_values(Jsonb *jsonb, void *action_state,
JsonTransformStringValuesAction transform_action);
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index 06c728e363..c4792ac0a8 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -2324,6 +2324,86 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+ to_tsvector
+---------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4
+(1 row)
+
+-- json_to_tsvector
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"');
+ json_to_tsvector
+----------------------------------------------------------------------------------------
+ '123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16
+(1 row)
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"');
+ json_to_tsvector
+--------------------------------
+ 'b':2 'c':4 'd':6 'f':8 'g':10
+(1 row)
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"');
+ json_to_tsvector
+------------------
+ 'aaa':1 'bbb':3
+(1 row)
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"');
+ json_to_tsvector
+------------------
+ '123':1 '456':3
+(1 row)
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"');
+ json_to_tsvector
+-------------------
+ 'fals':3 'true':1
+(1 row)
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]');
+ json_to_tsvector
+---------------------------------
+ '123':5 '456':7 'aaa':1 'bbb':3
+(1 row)
+
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"');
+ json_to_tsvector
+----------------------------------------------------------------------------------------
+ '123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16
+(1 row)
+
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"');
+ json_to_tsvector
+--------------------------------
+ 'b':2 'c':4 'd':6 'f':8 'g':10
+(1 row)
+
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"');
+ json_to_tsvector
+------------------
+ 'aaa':1 'bbb':3
+(1 row)
+
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"');
+ json_to_tsvector
+------------------
+ '123':1 '456':3
+(1 row)
+
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"');
+ json_to_tsvector
+-------------------
+ 'fals':3 'true':1
+(1 row)
+
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]');
+ json_to_tsvector
+---------------------------------
+ '123':5 '456':7 'aaa':1 'bbb':3
+(1 row)
+
-- ts_vector corner cases
select to_tsvector('""'::json);
to_tsvector
@@ -2349,6 +2429,45 @@ select to_tsvector('null'::json);
(1 row)
+-- json_to_tsvector corner cases
+select json_to_tsvector('""'::json, '"all"');
+ json_to_tsvector
+------------------
+
+(1 row)
+
+select json_to_tsvector('{}'::json, '"all"');
+ json_to_tsvector
+------------------
+
+(1 row)
+
+select json_to_tsvector('[]'::json, '"all"');
+ json_to_tsvector
+------------------
+
+(1 row)
+
+select json_to_tsvector('null'::json, '"all"');
+ json_to_tsvector
+------------------
+
+(1 row)
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '""');
+ERROR: string, numeric, boolean, key and all are only accepted
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '{}');
+ERROR: wrong flag type
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '[]');
+ json_to_tsvector
+------------------
+
+(1 row)
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, 'null');
+ERROR: text is only accepted
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["all", null]');
+ERROR: text is only accepted
-- ts_headline for json
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
ts_headline
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index f8d6e6f7cc..068dfe3599 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -4122,6 +4122,86 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
+-- jsonb to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+ to_tsvector
+---------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4
+(1 row)
+
+-- jsonb_to_tsvector
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"');
+ jsonb_to_tsvector
+----------------------------------------------------------------------------------------
+ '123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16
+(1 row)
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"');
+ jsonb_to_tsvector
+--------------------------------
+ 'b':2 'c':4 'd':6 'f':8 'g':10
+(1 row)
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"');
+ jsonb_to_tsvector
+-------------------
+ 'aaa':1 'bbb':3
+(1 row)
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"');
+ jsonb_to_tsvector
+-------------------
+ '123':1 '456':3
+(1 row)
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"');
+ jsonb_to_tsvector
+-------------------
+ 'fals':3 'true':1
+(1 row)
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]');
+ jsonb_to_tsvector
+---------------------------------
+ '123':5 '456':7 'aaa':1 'bbb':3
+(1 row)
+
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"');
+ jsonb_to_tsvector
+----------------------------------------------------------------------------------------
+ '123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16
+(1 row)
+
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"');
+ jsonb_to_tsvector
+--------------------------------
+ 'b':2 'c':4 'd':6 'f':8 'g':10
+(1 row)
+
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"');
+ jsonb_to_tsvector
+-------------------
+ 'aaa':1 'bbb':3
+(1 row)
+
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"');
+ jsonb_to_tsvector
+-------------------
+ '123':1 '456':3
+(1 row)
+
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"');
+ jsonb_to_tsvector
+-------------------
+ 'fals':3 'true':1
+(1 row)
+
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]');
+ jsonb_to_tsvector
+---------------------------------
+ '123':5 '456':7 'aaa':1 'bbb':3
+(1 row)
+
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
to_tsvector
@@ -4147,6 +4227,45 @@ select to_tsvector('null'::jsonb);
(1 row)
+-- jsonb_to_tsvector corner cases
+select jsonb_to_tsvector('""'::jsonb, '"all"');
+ jsonb_to_tsvector
+-------------------
+
+(1 row)
+
+select jsonb_to_tsvector('{}'::jsonb, '"all"');
+ jsonb_to_tsvector
+-------------------
+
+(1 row)
+
+select jsonb_to_tsvector('[]'::jsonb, '"all"');
+ jsonb_to_tsvector
+-------------------
+
+(1 row)
+
+select jsonb_to_tsvector('null'::jsonb, '"all"');
+ jsonb_to_tsvector
+-------------------
+
+(1 row)
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '""');
+ERROR: string, numeric, boolean, key and all are only accepted
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '{}');
+ERROR: wrong flag type
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '[]');
+ jsonb_to_tsvector
+-------------------
+
+(1 row)
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, 'null');
+ERROR: text is only accepted
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["all", null]');
+ERROR: text is only accepted
-- ts_headline for jsonb
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
ts_headline
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 256652c41f..add1e01cba 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -763,12 +763,42 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c"
-- json to tsvector with stop words
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+
+-- json_to_tsvector
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]');
+
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"');
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"');
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"');
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"');
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"');
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]');
+
-- ts_vector corner cases
select to_tsvector('""'::json);
select to_tsvector('{}'::json);
select to_tsvector('[]'::json);
select to_tsvector('null'::json);
+-- json_to_tsvector corner cases
+select json_to_tsvector('""'::json, '"all"');
+select json_to_tsvector('{}'::json, '"all"');
+select json_to_tsvector('[]'::json, '"all"');
+select json_to_tsvector('null'::json, '"all"');
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '""');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '{}');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '[]');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, 'null');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["all", null]');
+
-- ts_headline for json
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index 2439f949bd..f31dd2ac75 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -1089,12 +1089,42 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c"
-- jsonb to tsvector with stop words
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+-- jsonb to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+
+-- jsonb_to_tsvector
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]');
+
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"');
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"');
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"');
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"');
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"');
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]');
+
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
select to_tsvector('{}'::jsonb);
select to_tsvector('[]'::jsonb);
select to_tsvector('null'::jsonb);
+-- jsonb_to_tsvector corner cases
+select jsonb_to_tsvector('""'::jsonb, '"all"');
+select jsonb_to_tsvector('{}'::jsonb, '"all"');
+select jsonb_to_tsvector('[]'::jsonb, '"all"');
+select jsonb_to_tsvector('null'::jsonb, '"all"');
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '""');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '{}');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '[]');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, 'null');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["all", null]');
+
-- ts_headline for jsonb
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
See workable sketch for parsing jsonb flags and new worker variant.
Yep, thanks for the sketch. Here is the new version of patch, does it look
close to what you have in mind?
Patch looks good except error messaging, you took it directly from
sketch where I didn't spend time for it. Please, improve. elog() should
be used only for impossible error, whereas user input could contins
mistakes.
--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/
On 7 April 2018 at 17:09, Teodor Sigaev <teodor@sigaev.ru> wrote:
See workable sketch for parsing jsonb flags and new worker variant.
Yep, thanks for the sketch. Here is the new version of patch, does it look
close to what you have in mind?Patch looks good except error messaging, you took it directly from sketch
where I didn't spend time for it. Please, improve. elog() should be used
only for impossible error, whereas user input could contins mistakes.
I assume what you mean is that for user input errors we need to use ereport.
Indeed, thanks for noticing. I've replaced all elog except the last one, since
it actually describes an impossible situation, when we started to read an
array, but ended up having something else instead WJB_END_ARRAY.
Attachments:
jsonb_to_tsvector_numeric_v5.patchapplication/octet-stream; name=jsonb_to_tsvector_numeric_v5.patchDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5abb1c46fb..aea86e6b25 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9694,6 +9694,26 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
<entry><literal>to_tsvector('english', '{"a": "The Fat Rats"}'::json)</literal></entry>
<entry><literal>'fat':2 'rat':3</literal></entry>
</row>
+ <row>
+ <entry>
+ <literal><function>json(b)_to_tsvector(<optional> <replaceable class="parameter">config</replaceable> <type>regconfig</type>,
+ </optional> <replaceable class="parameter">document</replaceable> <type>json(b)</type>,
+ <replaceable class="parameter">filter</replaceable> <type>json(b)</type>)</function></literal>
+ </entry>
+ <entry><type>tsvector</type></entry>
+ <entry>
+ reduce each value in the document, specified by <replaceable class="parameter">filter</replaceable> to a <type>tsvector</type>,
+ and then concatenate those in document order to produce a single <type>tsvector</type>.
+ <replaceable class="parameter">filter</replaceable> is a jsonb array, that enumerates what kind of elements need to be included
+ into the resulting <type>tsvector</type>. Possible values for <replaceable class="parameter">filter</replaceable> are
+ <literal>"string"</literal> (to include all string values), <literal>"numeric"</literal> (to include all numeric values in the string format),
+ <literal>"boolean"</literal> (to include all boolean values in the string format "true"/"false"),
+ <literal>"key"</literal> (to include all keys) or <literal>"all"</literal> (to include all above). These values
+ can be combined together to include, e.g. all string and numeric values.
+ </entry>
+ <entry><literal>json_to_tsvector('english', '{"a": "The Fat Rats", "b": 123}'::json, '["string", "numeric"]')</literal></entry>
+ <entry><literal>'123':5 'fat':2 'rat':3</literal></entry>
+ </row>
<row>
<entry>
<indexterm>
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index ea5947a3a8..ffbc21ebe4 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -267,12 +267,12 @@ to_tsvector(PG_FUNCTION_ARGS)
PointerGetDatum(in)));
}
-Datum
-jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
+/*
+ * Worker function for jsonb(_string)_to_tsvector(_byid)
+ */
+static TSVector
+jsonb_to_tsvector_worker(Oid cfgId, Jsonb *jb, uint32 flags)
{
- Oid cfgId = PG_GETARG_OID(0);
- Jsonb *jb = PG_GETARG_JSONB_P(1);
- TSVector result;
TSVectorBuildState state;
ParsedText prs;
@@ -281,33 +281,77 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
state.prs = &prs;
state.cfgId = cfgId;
- iterate_jsonb_string_values(jb, &state, add_to_tsvector);
+ iterate_jsonb_values(jb, flags, &state, add_to_tsvector);
- PG_FREE_IF_COPY(jb, 1);
+ return make_tsvector(&prs);
+}
+
+Datum
+jsonb_string_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ Jsonb *jb = PG_GETARG_JSONB_P(1);
+ TSVector result;
- result = make_tsvector(&prs);
+ result = jsonb_to_tsvector_worker(cfgId, jb, jtiString);
+ PG_FREE_IF_COPY(jb, 1);
PG_RETURN_TSVECTOR(result);
}
Datum
-jsonb_to_tsvector(PG_FUNCTION_ARGS)
+jsonb_string_to_tsvector(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB_P(0);
Oid cfgId;
+ TSVector result;
cfgId = getTSCurrentConfig(true);
- PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
- ObjectIdGetDatum(cfgId),
- JsonbPGetDatum(jb)));
+ result = jsonb_to_tsvector_worker(cfgId, jb, jtiString);
+ PG_FREE_IF_COPY(jb, 0);
+
+ PG_RETURN_TSVECTOR(result);
}
Datum
-json_to_tsvector_byid(PG_FUNCTION_ARGS)
+jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
{
Oid cfgId = PG_GETARG_OID(0);
- text *json = PG_GETARG_TEXT_P(1);
+ Jsonb *jb = PG_GETARG_JSONB_P(1);
+ Jsonb *jbFlags = PG_GETARG_JSONB_P(2);
+ TSVector result;
+ uint32 flags = parse_jsonb_index_flags(jbFlags);
+
+ result = jsonb_to_tsvector_worker(cfgId, jb, flags);
+ PG_FREE_IF_COPY(jb, 1);
+ PG_FREE_IF_COPY(jbFlags, 2);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ Jsonb *jbFlags = PG_GETARG_JSONB_P(1);
+ Oid cfgId;
TSVector result;
+ uint32 flags = parse_jsonb_index_flags(jbFlags);
+
+ cfgId = getTSCurrentConfig(true);
+ result = jsonb_to_tsvector_worker(cfgId, jb, flags);
+ PG_FREE_IF_COPY(jb, 0);
+ PG_FREE_IF_COPY(jbFlags, 1);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+/*
+ * Worker function for json(_string)_to_tsvector(_byid)
+ */
+static TSVector
+json_to_tsvector_worker(Oid cfgId, text *json, uint32 flags)
+{
TSVectorBuildState state;
ParsedText prs;
@@ -316,11 +360,50 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS)
state.prs = &prs;
state.cfgId = cfgId;
- iterate_json_string_values(json, &state, add_to_tsvector);
+ iterate_json_values(json, flags, &state, add_to_tsvector);
+
+ return make_tsvector(&prs);
+}
+
+Datum
+json_string_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ text *json = PG_GETARG_TEXT_P(1);
+ TSVector result;
+ result = json_to_tsvector_worker(cfgId, json, jtiString);
PG_FREE_IF_COPY(json, 1);
- result = make_tsvector(&prs);
+ PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+json_string_to_tsvector(PG_FUNCTION_ARGS)
+{
+ text *json = PG_GETARG_TEXT_P(0);
+ Oid cfgId;
+ TSVector result;
+
+ cfgId = getTSCurrentConfig(true);
+ result = json_to_tsvector_worker(cfgId, json, jtiString);
+ PG_FREE_IF_COPY(json, 0);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+json_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ text *json = PG_GETARG_TEXT_P(1);
+ Jsonb *jbFlags = PG_GETARG_JSONB_P(2);
+ TSVector result;
+ uint32 flags = parse_jsonb_index_flags(jbFlags);
+
+ result = json_to_tsvector_worker(cfgId, json, flags);
+ PG_FREE_IF_COPY(json, 1);
+ PG_FREE_IF_COPY(jbFlags, 2);
PG_RETURN_TSVECTOR(result);
}
@@ -329,12 +412,17 @@ Datum
json_to_tsvector(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_P(0);
+ Jsonb *jbFlags = PG_GETARG_JSONB_P(1);
Oid cfgId;
+ TSVector result;
+ uint32 flags = parse_jsonb_index_flags(jbFlags);
cfgId = getTSCurrentConfig(true);
- PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
- ObjectIdGetDatum(cfgId),
- PointerGetDatum(json)));
+ result = json_to_tsvector_worker(cfgId, json, flags);
+ PG_FREE_IF_COPY(json, 0);
+ PG_FREE_IF_COPY(jbFlags, 1);
+
+ PG_RETURN_TSVECTOR(result);
}
/*
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index fa78451613..ac95535f63 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -60,6 +60,7 @@ typedef struct IterateJsonStringValuesState
JsonIterateStringValuesAction action; /* an action that will be applied
* to each json value */
void *action_state; /* any necessary context for iteration */
+ uint32 flags; /* what kind of elements from a json we want to iterate */
} IterateJsonStringValuesState;
/* state for transform_json_string_values function */
@@ -474,8 +475,9 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems,
int level, Jsonb *newval, uint32 nelems, int op_type);
static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
-/* function supporting iterate_json_string_values */
-static void iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
+/* function supporting iterate_json_values */
+static void iterate_values_scalar(void *state, char *token, JsonTokenType tokentype);
+static void iterate_values_object_field_start(void *state, char *fname, bool isnull);
/* functions supporting transform_json_string_values */
static void transform_string_values_object_start(void *state);
@@ -4939,11 +4941,69 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
}
/*
- * Iterate over jsonb string values or elements, and pass them together with an
- * iteration state to a specified JsonIterateStringValuesAction.
+ * Parse information about what elements of a jsonb document we want to iterate
+ * in functions iterate_json(b)_values. This information is presented in jsonb
+ * format, so that it can be easily extended in the future.
+ */
+uint32
+parse_jsonb_index_flags(Jsonb *jb)
+{
+ JsonbIterator *it;
+ JsonbValue v;
+ JsonbIteratorToken type;
+ uint32 flags = 0;
+
+ it = JsonbIteratorInit(&jb->root);
+
+ type = JsonbIteratorNext(&it, &v, false);
+
+ if (type != WJB_BEGIN_ARRAY)
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("wrong flag type, only arrays and scalars are allowed")));
+
+ while ((type = JsonbIteratorNext(&it, &v, false)) == WJB_ELEM)
+ {
+ if (v.type != jbvString)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("text is only accepted")));
+
+ if (v.val.string.len == 3 &&
+ pg_strncasecmp(v.val.string.val, "all", 3) == 0)
+ flags |= jtiAll;
+ else if (v.val.string.len == 3 &&
+ pg_strncasecmp(v.val.string.val, "key", 3) == 0)
+ flags |= jtiKey;
+ else if (v.val.string.len == 6 &&
+ pg_strncasecmp(v.val.string.val, "string", 5) == 0)
+ flags |= jtiString;
+ else if (v.val.string.len == 7 &&
+ pg_strncasecmp(v.val.string.val, "numeric", 7) == 0)
+ flags |= jtiNumeric;
+ else if (v.val.string.len == 7 &&
+ pg_strncasecmp(v.val.string.val, "boolean", 7) == 0)
+ flags |= jtiBool;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string, numeric, boolean, key and all are only accepted")));
+ }
+
+ if (type != WJB_END_ARRAY)
+ elog(ERROR, "wrong flag type, only arrays and scalars are allowed");
+
+ JsonbIteratorNext(&it, &v, false);
+
+ return flags;
+}
+
+/*
+ * Iterate over jsonb values or elements, specified by flags, and pass them
+ * together with an iteration state to a specified JsonIterateStringValuesAction.
*/
void
-iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action)
+iterate_jsonb_values(Jsonb *jb, uint32 flags, void *state,
+ JsonIterateStringValuesAction action)
{
JsonbIterator *it;
JsonbValue v;
@@ -4953,19 +5013,51 @@ iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesActio
while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
{
- if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
- {
+ if (type == WJB_KEY && (flags & jtiKey))
action(state, v.val.string.val, v.val.string.len);
+
+ else if (type == WJB_VALUE || type == WJB_ELEM)
+ {
+ switch(v.type)
+ {
+ case jbvString:
+ if (flags & jtiString)
+ action(state, v.val.string.val, v.val.string.len);
+ break;
+ case jbvNumeric:
+ if (flags & jtiNumeric)
+ {
+ char *val;
+
+ val = DatumGetCString(DirectFunctionCall1(numeric_out,
+ NumericGetDatum(v.val.numeric)));
+
+ action(state, val, strlen(val));
+ }
+ break;
+ case jbvBool:
+ if (flags & jtiBool)
+ {
+ if (v.val.boolean)
+ action(state, pstrdup("true"), 4);
+ else
+ action(state, pstrdup("false"), 5);
+ }
+ break;
+ default:
+ break;
+ }
}
}
}
/*
- * Iterate over json string values or elements, and pass them together with an
- * iteration state to a specified JsonIterateStringValuesAction.
+ * Iterate over json values and elements, specified by flags, and pass them
+ * together with an iteration state to a specified JsonIterateStringValuesAction.
*/
void
-iterate_json_string_values(text *json, void *action_state, JsonIterateStringValuesAction action)
+iterate_json_values(text *json, uint32 flags, void *action_state,
+ JsonIterateStringValuesAction action)
{
JsonLexContext *lex = makeJsonLexContext(json, true);
JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
@@ -4974,24 +5066,54 @@ iterate_json_string_values(text *json, void *action_state, JsonIterateStringValu
state->lex = lex;
state->action = action;
state->action_state = action_state;
+ state->flags = flags;
sem->semstate = (void *) state;
- sem->scalar = iterate_string_values_scalar;
+ sem->scalar = iterate_values_scalar;
+ sem->object_field_start = iterate_values_object_field_start;
pg_parse_json(lex, sem);
}
/*
- * An auxiliary function for iterate_json_string_values to invoke a specified
- * JsonIterateStringValuesAction.
+ * An auxiliary function for iterate_json_values to invoke a specified
+ * JsonIterateStringValuesAction for specified values.
*/
static void
-iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
+iterate_values_scalar(void *state, char *token, JsonTokenType tokentype)
{
IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
- if (tokentype == JSON_TOKEN_STRING)
- _state->action(_state->action_state, token, strlen(token));
+ switch(tokentype)
+ {
+ case JSON_TOKEN_STRING:
+ if (_state->flags & jtiString)
+ _state->action(_state->action_state, token, strlen(token));
+ break;
+ case JSON_TOKEN_NUMBER:
+ if (_state->flags & jtiNumeric)
+ _state->action(_state->action_state, token, strlen(token));
+ break;
+ case JSON_TOKEN_TRUE:
+ case JSON_TOKEN_FALSE:
+ if (_state->flags & jtiBool)
+ _state->action(_state->action_state, token, strlen(token));
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+iterate_values_object_field_start(void *state, char *fname, bool isnull)
+{
+ IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
+
+ if (_state->flags & jtiKey)
+ {
+ char *val = pstrdup(fname);
+ _state->action(_state->action_state, val, strlen(val));
+ }
}
/*
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index ec50afcdf0..51c2db3bf6 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4974,14 +4974,22 @@ DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s
DESCR("transform to tsquery");
DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
DESCR("transform to tsquery");
-DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
-DESCR("transform jsonb to tsvector");
-DATA(insert OID = 4210 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
-DESCR("transform json to tsvector");
-DATA(insert OID = 4211 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector_byid _null_ _null_ _null_ ));
-DESCR("transform jsonb to tsvector");
-DATA(insert OID = 4212 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector_byid _null_ _null_ _null_ ));
-DESCR("transform json to tsvector");
+DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_string_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform string values from jsonb to tsvector");
+DATA(insert OID = 4213 ( jsonb_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 2 0 3614 "3802 3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform specified values from jsonb to tsvector");
+DATA(insert OID = 4210 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_string_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform string values from json to tsvector");
+DATA(insert OID = 4215 ( json_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 2 0 3614 "114 3802" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform specified values from json to tsvector");
+DATA(insert OID = 4211 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 3802" _null_ _null_ _null_ _null_ _null_ jsonb_string_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform string values from jsonb to tsvector");
+DATA(insert OID = 4214 ( jsonb_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 3 0 3614 "3734 3802 3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform specified values from jsonb to tsvector");
+DATA(insert OID = 4212 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3614 "3734 114" _null_ _null_ _null_ _null_ _null_ json_string_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform string values from json to tsvector");
+DATA(insert OID = 4216 ( json_to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f i s 3 0 3614 "3734 114 3802" _null_ _null_ _null_ _null_ _null_ json_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform specified values from json to tsvector");
DATA(insert OID = 3752 ( tsvector_update_trigger PGNSP PGUID 12 1 0 0 0 f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
DESCR("trigger for automatic update of tsvector column");
diff --git a/src/include/utils/jsonapi.h b/src/include/utils/jsonapi.h
index e39572e00f..b28201c2bc 100644
--- a/src/include/utils/jsonapi.h
+++ b/src/include/utils/jsonapi.h
@@ -132,15 +132,28 @@ extern JsonLexContext *makeJsonLexContextCstringLen(char *json,
*/
extern bool IsValidJsonNumber(const char *str, int len);
-/* an action that will be applied to each value in iterate_json(b)_string_vaues functions */
+/*
+ * Flag types for iterate_json(b)_values to specify what elements from a
+ * json(b) document we want to iterate.
+ */
+typedef enum JsonToIndex {
+ jtiKey = 0x01,
+ jtiString = 0x02,
+ jtiNumeric = 0x04,
+ jtiBool = 0x08,
+ jtiAll = jtiKey | jtiString | jtiNumeric | jtiBool
+} JsonToIndex;
+
+/* an action that will be applied to each value in iterate_json(b)_vaues functions */
typedef void (*JsonIterateStringValuesAction) (void *state, char *elem_value, int elem_len);
-/* an action that will be applied to each value in transform_json(b)_string_values functions */
+/* an action that will be applied to each value in transform_json(b)_values functions */
typedef text *(*JsonTransformStringValuesAction) (void *state, char *elem_value, int elem_len);
-extern void iterate_jsonb_string_values(Jsonb *jb, void *state,
+extern uint32 parse_jsonb_index_flags(Jsonb *jb);
+extern void iterate_jsonb_values(Jsonb *jb, uint32 flags, void *state,
JsonIterateStringValuesAction action);
-extern void iterate_json_string_values(text *json, void *action_state,
+extern void iterate_json_values(text *json, uint32 flags, void *action_state,
JsonIterateStringValuesAction action);
extern Jsonb *transform_jsonb_string_values(Jsonb *jsonb, void *action_state,
JsonTransformStringValuesAction transform_action);
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index 06c728e363..ec76932b15 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -2324,6 +2324,86 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+ to_tsvector
+---------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4
+(1 row)
+
+-- json_to_tsvector
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"');
+ json_to_tsvector
+----------------------------------------------------------------------------------------
+ '123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16
+(1 row)
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"');
+ json_to_tsvector
+--------------------------------
+ 'b':2 'c':4 'd':6 'f':8 'g':10
+(1 row)
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"');
+ json_to_tsvector
+------------------
+ 'aaa':1 'bbb':3
+(1 row)
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"');
+ json_to_tsvector
+------------------
+ '123':1 '456':3
+(1 row)
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"');
+ json_to_tsvector
+-------------------
+ 'fals':3 'true':1
+(1 row)
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]');
+ json_to_tsvector
+---------------------------------
+ '123':5 '456':7 'aaa':1 'bbb':3
+(1 row)
+
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"');
+ json_to_tsvector
+----------------------------------------------------------------------------------------
+ '123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16
+(1 row)
+
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"');
+ json_to_tsvector
+--------------------------------
+ 'b':2 'c':4 'd':6 'f':8 'g':10
+(1 row)
+
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"');
+ json_to_tsvector
+------------------
+ 'aaa':1 'bbb':3
+(1 row)
+
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"');
+ json_to_tsvector
+------------------
+ '123':1 '456':3
+(1 row)
+
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"');
+ json_to_tsvector
+-------------------
+ 'fals':3 'true':1
+(1 row)
+
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]');
+ json_to_tsvector
+---------------------------------
+ '123':5 '456':7 'aaa':1 'bbb':3
+(1 row)
+
-- ts_vector corner cases
select to_tsvector('""'::json);
to_tsvector
@@ -2349,6 +2429,45 @@ select to_tsvector('null'::json);
(1 row)
+-- json_to_tsvector corner cases
+select json_to_tsvector('""'::json, '"all"');
+ json_to_tsvector
+------------------
+
+(1 row)
+
+select json_to_tsvector('{}'::json, '"all"');
+ json_to_tsvector
+------------------
+
+(1 row)
+
+select json_to_tsvector('[]'::json, '"all"');
+ json_to_tsvector
+------------------
+
+(1 row)
+
+select json_to_tsvector('null'::json, '"all"');
+ json_to_tsvector
+------------------
+
+(1 row)
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '""');
+ERROR: string, numeric, boolean, key and all are only accepted
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '{}');
+ERROR: wrong flag type, only arrays and scalars are allowed
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '[]');
+ json_to_tsvector
+------------------
+
+(1 row)
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, 'null');
+ERROR: text is only accepted
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["all", null]');
+ERROR: text is only accepted
-- ts_headline for json
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
ts_headline
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index f8d6e6f7cc..edadfc5afd 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -4122,6 +4122,86 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
+-- jsonb to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+ to_tsvector
+---------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4
+(1 row)
+
+-- jsonb_to_tsvector
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"');
+ jsonb_to_tsvector
+----------------------------------------------------------------------------------------
+ '123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16
+(1 row)
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"');
+ jsonb_to_tsvector
+--------------------------------
+ 'b':2 'c':4 'd':6 'f':8 'g':10
+(1 row)
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"');
+ jsonb_to_tsvector
+-------------------
+ 'aaa':1 'bbb':3
+(1 row)
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"');
+ jsonb_to_tsvector
+-------------------
+ '123':1 '456':3
+(1 row)
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"');
+ jsonb_to_tsvector
+-------------------
+ 'fals':3 'true':1
+(1 row)
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]');
+ jsonb_to_tsvector
+---------------------------------
+ '123':5 '456':7 'aaa':1 'bbb':3
+(1 row)
+
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"');
+ jsonb_to_tsvector
+----------------------------------------------------------------------------------------
+ '123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16
+(1 row)
+
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"');
+ jsonb_to_tsvector
+--------------------------------
+ 'b':2 'c':4 'd':6 'f':8 'g':10
+(1 row)
+
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"');
+ jsonb_to_tsvector
+-------------------
+ 'aaa':1 'bbb':3
+(1 row)
+
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"');
+ jsonb_to_tsvector
+-------------------
+ '123':1 '456':3
+(1 row)
+
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"');
+ jsonb_to_tsvector
+-------------------
+ 'fals':3 'true':1
+(1 row)
+
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]');
+ jsonb_to_tsvector
+---------------------------------
+ '123':5 '456':7 'aaa':1 'bbb':3
+(1 row)
+
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
to_tsvector
@@ -4147,6 +4227,45 @@ select to_tsvector('null'::jsonb);
(1 row)
+-- jsonb_to_tsvector corner cases
+select jsonb_to_tsvector('""'::jsonb, '"all"');
+ jsonb_to_tsvector
+-------------------
+
+(1 row)
+
+select jsonb_to_tsvector('{}'::jsonb, '"all"');
+ jsonb_to_tsvector
+-------------------
+
+(1 row)
+
+select jsonb_to_tsvector('[]'::jsonb, '"all"');
+ jsonb_to_tsvector
+-------------------
+
+(1 row)
+
+select jsonb_to_tsvector('null'::jsonb, '"all"');
+ jsonb_to_tsvector
+-------------------
+
+(1 row)
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '""');
+ERROR: string, numeric, boolean, key and all are only accepted
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '{}');
+ERROR: wrong flag type, only arrays and scalars are allowed
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '[]');
+ jsonb_to_tsvector
+-------------------
+
+(1 row)
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, 'null');
+ERROR: text is only accepted
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["all", null]');
+ERROR: text is only accepted
-- ts_headline for jsonb
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
ts_headline
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 256652c41f..add1e01cba 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -763,12 +763,42 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c"
-- json to tsvector with stop words
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+-- json to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json);
+
+-- json_to_tsvector
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]');
+
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"');
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"');
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"');
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"');
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"');
+select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]');
+
-- ts_vector corner cases
select to_tsvector('""'::json);
select to_tsvector('{}'::json);
select to_tsvector('[]'::json);
select to_tsvector('null'::json);
+-- json_to_tsvector corner cases
+select json_to_tsvector('""'::json, '"all"');
+select json_to_tsvector('{}'::json, '"all"');
+select json_to_tsvector('[]'::json, '"all"');
+select json_to_tsvector('null'::json, '"all"');
+
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '""');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '{}');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '[]');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, 'null');
+select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["all", null]');
+
-- ts_headline for json
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index 2439f949bd..f31dd2ac75 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -1089,12 +1089,42 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c"
-- jsonb to tsvector with stop words
select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+-- jsonb to tsvector with numeric values
+select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb);
+
+-- jsonb_to_tsvector
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]');
+
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"');
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"');
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"');
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"');
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"');
+select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]');
+
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
select to_tsvector('{}'::jsonb);
select to_tsvector('[]'::jsonb);
select to_tsvector('null'::jsonb);
+-- jsonb_to_tsvector corner cases
+select jsonb_to_tsvector('""'::jsonb, '"all"');
+select jsonb_to_tsvector('{}'::jsonb, '"all"');
+select jsonb_to_tsvector('[]'::jsonb, '"all"');
+select jsonb_to_tsvector('null'::jsonb, '"all"');
+
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '""');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '{}');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '[]');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, 'null');
+select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["all", null]');
+
-- ts_headline for jsonb
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
Thank you, pushed with some editorization
Dmitry Dolgov wrote:
On 7 April 2018 at 17:09, Teodor Sigaev <teodor@sigaev.ru> wrote:
See workable sketch for parsing jsonb flags and new worker variant.
Yep, thanks for the sketch. Here is the new version of patch, does it look
close to what you have in mind?Patch looks good except error messaging, you took it directly from sketch
where I didn't spend time for it. Please, improve. elog() should be used
only for impossible error, whereas user input could contins mistakes.I assume what you mean is that for user input errors we need to use ereport.
Indeed, thanks for noticing. I've replaced all elog except the last one, since
it actually describes an impossible situation, when we started to read an
array, but ended up having something else instead WJB_END_ARRAY.
--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/