[PATCH] few fts functions for jsonb

Started by Dmitry Dolgovalmost 9 years ago19 messages

9erthalion6@gmail.com

almost 9 years ago

1 attachment(s)

Hi all

I would like to propose patch with a set of new small functions for fts in
case of
jsonb data type:

* to_tsvector(config, jsonb) - make a tsvector from all string values and
elements of jsonb object. To prevent the situation, when tsquery can find
a
phrase consisting of lexemes from two different values/elements, this
function will add an increment to position of each lexeme from every new
value/element.

* ts_headline(config, jsonb, tsquery, options) - generate a headline
directly
from jsonb object

Here are the examples how they work:

```
=# select to_tsvector('{"a": "aaa bbb", "b": ["ccc ddd"], "c": {"d": "eee
fff"}}'::jsonb);
to_tsvector
-------------------------------------------------
'aaa':1 'bbb':2 'ccc':4 'ddd':5 'eee':7 'fff':8
(1 row)

=# select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc
ddd"}}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
ts_headline
----------------------
aaa <bbb> ccc <ddd>
(1 row)
```

Any comments or suggestions?

Attachments:

jsonb_fts_v1.patchtext/x-patch; charset=US-ASCII; name=jsonb_fts_v1.patchDownload

diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 6e5de8f..08e08e5 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -16,6 +16,8 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonb.h"
+#include "utils/fmgrprotos.h"
 
 
 typedef struct MorphOpaque
@@ -256,6 +258,58 @@ to_tsvector(PG_FUNCTION_ARGS)
 										PointerGetDatum(in)));
 }
 
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+	Jsonb				*jb = PG_GETARG_JSONB(0);
+	JsonbIterator		*it;
+	JsonbValue			v;
+	Oid					cfgId;
+	ParsedText			prs;
+	TSVector			result, item_vector;
+	JsonbIteratorToken	type;
+	int					i;
+
+	cfgId = getTSCurrentConfig(true);
+	it = JsonbIteratorInit(&jb->root);
+
+	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+		{
+			prs.lenwords = v.val.string.len / 6;
+
+			if (prs.lenwords == 0)
+				prs.lenwords = 2;
+
+			prs.curwords = 0;
+			prs.pos = 0;
+			prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
+
+			parsetext(cfgId, &prs, v.val.string.val, v.val.string.len);
+
+			if (prs.curwords)
+			{
+				if (result->size != 0)
+				{
+					for (i = 0; i < prs.curwords; i++)
+						prs.words[i].pos.pos = prs.words[i].pos.pos + TS_JUMP;
+
+					item_vector = make_tsvector(&prs);
+
+					result = DirectFunctionCall2(tsvector_concat,
+											TSVectorGetDatum(result),
+											PointerGetDatum(item_vector));
+				}
+				else
+					result = make_tsvector(&prs);
+			}
+		}
+	}
+
+	PG_RETURN_DATUM(result);
+}
+
 /*
  * to_tsquery
  */
diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c
index 8ca1c62..035632e 100644
--- a/src/backend/tsearch/wparser.c
+++ b/src/backend/tsearch/wparser.c
@@ -21,6 +21,7 @@
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
 #include "utils/varlena.h"
+#include "utils/jsonb.h"
 
 
 /******sql-level interface******/
@@ -362,3 +363,41 @@ ts_headline_opt(PG_FUNCTION_ARGS)
 										PG_GETARG_DATUM(1),
 										PG_GETARG_DATUM(2)));
 }
+
+Datum
+ts_headline_jsonb(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										CStringGetTextDatum(jsonb_values_as_string(PG_GETARG_DATUM(0))),
+										PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
+										PG_GETARG_DATUM(0),
+										CStringGetTextDatum(jsonb_values_as_string(PG_GETARG_DATUM(1))),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										CStringGetTextDatum(jsonb_values_as_string(PG_GETARG_DATUM(0))),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
+										PG_GETARG_DATUM(0),
+										CStringGetTextDatum(jsonb_values_as_string(PG_GETARG_DATUM(1))),
+										PG_GETARG_DATUM(2),
+										PG_GETARG_DATUM(3)));
+}
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 6a7aab2..d504b87 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -4130,3 +4130,29 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
 		}
 	}
 }
+
+/*
+ * Gather all string values and elements from jsonb into one string buffer.
+ * It's convenient for using inside ts_headline_* functions.
+ */
+char*
+jsonb_values_as_string(Jsonb *jb)
+{
+	JsonbIterator		*it;
+	JsonbValue			v;
+	JsonbIteratorToken	type;
+	StringInfo			buffer = makeStringInfo();
+
+	it = JsonbIteratorInit(&jb->root);
+
+	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+		{
+			appendStringInfoString(buffer, v.val.string.val);
+			appendBinaryStringInfo(buffer, " ", 1);
+		}
+	}
+
+	return buffer->data;
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index a4cc86d..ccedece 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4759,6 +4759,15 @@ DESCR("generate headline");
 DATA(insert OID = 3755 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 25 "25 3615" _null_ _null_ _null_ _null_ _null_ ts_headline _null_ _null_ _null_ ));
 DESCR("generate headline");
 
+DATA(insert OID = 4201 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 25 "3734 3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid_opt _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4202 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 25 "3734 3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4203 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 25 "3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_opt _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4204 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 25 "3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+
 DATA(insert OID = 3745 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3614 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsvector_byid _null_ _null_ _null_ ));
 DESCR("transform to tsvector");
 DATA(insert OID = 3746 (  to_tsquery		PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsquery_byid _null_ _null_ _null_ ));
@@ -4775,6 +4784,8 @@ DATA(insert OID = 3751 (  plainto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f f t f s
 DESCR("transform to tsquery");
 DATA(insert OID = 5001 (  phraseto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
 DESCR("transform to tsquery");
+DATA(insert OID = 3800 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform jsonb to tsvector");
 
 DATA(insert OID = 3752 (  tsvector_update_trigger			PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
 DESCR("trigger for automatic update of tsvector column");
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index 155650c..873e2e1 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -86,6 +86,15 @@ typedef struct
 #define MAXNUMPOS	(256)
 #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
 
+/*
+ * In case if a TSVector contains several parts and we want to treat them as
+ * separate, it's necessary to add an artificial increment to position of each
+ * lexeme from every next part. It's required to avoid the situation when
+ * tsquery can find a phrase consisting of lexemes from two of such parts.
+ * TS_JUMP defined a value of this increment.
+ */
+#define TS_JUMP 1
+
 /* This struct represents a complete tsvector datum */
 typedef struct
 {
diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h
index 411e158..233f7b7 100644
--- a/src/include/utils/jsonb.h
+++ b/src/include/utils/jsonb.h
@@ -377,5 +377,7 @@ extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
 extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
 					 int estimated_len);
 
+extern char *jsonb_values_as_string(Jsonb *jsonb);
+
 
 #endif   /* __JSONB_H__ */
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index 8ec4150..3333730 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -3474,3 +3474,86 @@ HINT:  Try using the function jsonb_set to replace key value.
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
 ERROR:  cannot replace existing key
 HINT:  Try using the function jsonb_set to replace key value.
+-- jsonb to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- jsonb to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+                                to_tsvector                                 
+----------------------------------------------------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
+(1 row)
+
+-- ts_vector corner cases
+select to_tsvector('""'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('{}'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('[]'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('null'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+-- ts_headline for jsonb
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+                                 ts_headline                                  
+------------------------------------------------------------------------------
+ aaa <b>bbb</b> ccc <b>ddd</b> fffccc1 ddd1 ccc1 ddd1 ggg hhhiii jjj iii jjj 
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+                          ts_headline                          
+---------------------------------------------------------------
+ aaa <b>bbb</b> ccc <b>ddd</b> fff\x02 ggg hhhiii jjj iii jjj 
+(1 row)
+
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                            ts_headline                             
+--------------------------------------------------------------------
+ aaa <bbb> ccc <ddd> fffccc1 ddd1 ccc1 ddd1 ggg hhhiii jjj iii jjj 
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                            ts_headline                             
+--------------------------------------------------------------------
+ aaa <bbb> ccc <ddd> fffccc1 ddd1 ccc1 ddd1 ggg hhhiii jjj iii jjj 
+(1 row)
+
+-- corner cases for ts_headline with jsonb
+select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ 
+(1 row)
+
+select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ 
+(1 row)
+
+select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ 
+(1 row)
+
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index e2eaca0..2e31ee6 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -878,3 +878,26 @@ select jsonb_insert('{"a": {"b": "value"}}', '{a, c}', '"new_value"', true);
 
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"');
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
+
+-- jsonb to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+
+-- jsonb to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+
+-- ts_vector corner cases
+select to_tsvector('""'::jsonb);
+select to_tsvector('{}'::jsonb);
+select to_tsvector('[]'::jsonb);
+select to_tsvector('null'::jsonb);
+
+-- ts_headline for jsonb
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+
+-- corner cases for ts_headline with jsonb
+select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
+select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
+select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));

Oleg Bartunov

obartunov@gmail.com

almost 9 years ago

In reply to: Dmitry Dolgov (#1)

Re: [PATCH] few fts functions for jsonb

The proposed patch looks not very important, but I consider it as an
important feature, which Oracle and Microsoft already have, that's why I
asked Dmitry to work on this and made it before feature freeze. My comments
follows below the post.

On Tue, Feb 28, 2017 at 1:59 PM, Dmitry Dolgov <9erthalion6@gmail.com>
wrote:

Hi all

I would like to propose patch with a set of new small functions for fts in
case of
jsonb data type:

* to_tsvector(config, jsonb) - make a tsvector from all string values and
elements of jsonb object. To prevent the situation, when tsquery can
find a
phrase consisting of lexemes from two different values/elements, this
function will add an increment to position of each lexeme from every new
value/element.

* ts_headline(config, jsonb, tsquery, options) - generate a headline
directly
from jsonb object

Here are the examples how they work:

```
=# select to_tsvector('{"a": "aaa bbb", "b": ["ccc ddd"], "c": {"d": "eee
fff"}}'::jsonb);
to_tsvector
-------------------------------------------------
'aaa':1 'bbb':2 'ccc':4 'ddd':5 'eee':7 'fff':8
(1 row)

=# select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc
ddd"}}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
ts_headline
----------------------
aaa <bbb> ccc <ddd>
(1 row)
```

Any comments or suggestions?

1. add json support
2. Its_headline should returns the original json with highlighting. As a
first try the proposed ts_headline could be ok, probably need special
option.

Show quoted text

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Dmitry Dolgov

9erthalion6@gmail.com

almost 9 years ago

In reply to: Oleg Bartunov (#2)

1 attachment(s)

Re: [PATCH] few fts functions for jsonb

On 28 February 2017 at 19:21, Oleg Bartunov <obartunov@gmail.com> wrote:
1. add json support

I've added json support for all functions.

Its_headline should returns the original json with highlighting

Yes, I see now. I don't think it's worth it to add a special option for that
purpose, so I've just changed the implementation to return the original
json(b).

Attachments:

jsonb_fts_v2.patchtext/x-patch; charset=US-ASCII; name=jsonb_fts_v2.patchDownload

diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 6e5de8f..8f7bcfe 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -16,6 +16,7 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonb.h"
 
 
 typedef struct MorphOpaque
@@ -24,6 +25,14 @@ typedef struct MorphOpaque
 	int			qoperator;		/* query operator */
 } MorphOpaque;
 
+typedef struct TSVectorBuildState
+{
+	ParsedText	*prs;
+	TSVector	result;
+	Oid			cfgId;
+} TSVectorBuildState;
+
+static void add_to_tsvector(void *state, char *elem_value, int elem_len);
 
 Datum
 get_current_ts_config(PG_FUNCTION_ARGS)
@@ -256,6 +265,109 @@ to_tsvector(PG_FUNCTION_ARGS)
 										PointerGetDatum(in)));
 }
 
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+	Jsonb				*jb = PG_GETARG_JSONB(0);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = getTSCurrentConfig(true);
+	state.prs = prs;
+
+	iterate_jsonb_values(jb, &state, (JsonIterateAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(jb, 1);
+
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in jsonb,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+Datum
+json_to_tsvector(PG_FUNCTION_ARGS)
+{
+	text				*json = PG_GETARG_TEXT_P(0);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = getTSCurrentConfig(true);
+	state.prs = prs;
+
+	iterate_json_values(json, &state, (JsonIterateAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(json, 1);
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in json,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+/*
+ * Extend current TSVector from _state with a new one,
+ * build over a json(b) element.
+ */
+static void
+add_to_tsvector(void *_state, char *elem_value, int elem_len)
+{
+	TSVectorBuildState *state = (TSVectorBuildState *) _state;
+	ParsedText	*prs = state->prs;
+	TSVector	item_vector;
+	int			i;
+
+	prs->lenwords = elem_len / 6;
+	if (prs->lenwords == 0)
+		prs->lenwords = 2;
+
+	prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
+	prs->curwords = 0;
+	prs->pos = 0;
+
+	parsetext(state->cfgId, prs, elem_value, elem_len);
+
+	if (prs->curwords)
+	{
+		if (state->result != NULL)
+		{
+			for (i = 0; i < prs->curwords; i++)
+				prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP;
+
+			item_vector = make_tsvector(prs);
+
+			state->result = (TSVector) DirectFunctionCall2(tsvector_concat,
+									TSVectorGetDatum(state->result),
+									PointerGetDatum(item_vector));
+		}
+		else
+			state->result = make_tsvector(prs);
+	}
+}
+
 /*
  * to_tsquery
  */
diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c
index 8ca1c62..b648996 100644
--- a/src/backend/tsearch/wparser.c
+++ b/src/backend/tsearch/wparser.c
@@ -21,6 +21,7 @@
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
 #include "utils/varlena.h"
+#include "utils/jsonb.h"
 
 
 /******sql-level interface******/
@@ -31,6 +32,19 @@ typedef struct
 	LexDescr   *list;
 } TSTokenTypeStorage;
 
+/* state for ts_headline_json_* */
+typedef struct HeadlineJsonState
+{
+	HeadlineParsedText *prs;
+	TSConfigCacheEntry *cfg;
+	TSParserCacheEntry *prsobj;
+	TSQuery				query;
+	List				*prsoptions;
+	bool				transformed;
+} HeadlineJsonState;
+
+static text * headline_json_value(void *_state, char *elem_value, int elem_len);
+
 static void
 tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
 {
@@ -362,3 +376,177 @@ ts_headline_opt(PG_FUNCTION_ARGS)
 										PG_GETARG_DATUM(1),
 										PG_GETARG_DATUM(2)));
 }
+
+Datum
+ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
+{
+	Jsonb			*out, *jb = PG_GETARG_JSONB(1);
+	TSQuery			query = PG_GETARG_TSQUERY(2);
+	text			*opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+
+	HeadlineParsedText prs;
+	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+	memset(&prs, 0, sizeof(HeadlineParsedText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+	state->prs = &prs;
+	state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+	state->query = query;
+	if (opt)
+		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+	else
+		state->prsoptions = NIL;
+
+	if (!OidIsValid(state->prsobj->headlineOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+		   errmsg("text search parser does not support headline creation")));
+
+	out = transform_jsonb(jb, state, (JsonTransformAction) headline_json_value);
+
+	PG_FREE_IF_COPY(jb, 1);
+	PG_FREE_IF_COPY(query, 2);
+	if (opt)
+		PG_FREE_IF_COPY(opt, 3);
+
+	pfree(prs.words);
+
+	if (state->transformed)
+	{
+		pfree(prs.startsel);
+		pfree(prs.stopsel);
+	}
+
+	PG_RETURN_JSONB(out);
+}
+
+Datum
+ts_headline_jsonb(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
+{
+	text				*json = PG_GETARG_TEXT_P(1);
+	TSQuery				query = PG_GETARG_TSQUERY(2);
+	text				*opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+	StringInfo			out;
+
+	HeadlineParsedText prs;
+	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+	memset(&prs, 0, sizeof(HeadlineParsedText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+	state->prs = &prs;
+	state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+	state->query = query;
+	if (opt)
+		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+	else
+		state->prsoptions = NIL;
+
+	if (!OidIsValid(state->prsobj->headlineOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+		   errmsg("text search parser does not support headline creation")));
+
+	out = transform_json(json, state, (JsonTransformAction) headline_json_value);
+
+	PG_FREE_IF_COPY(json, 1);
+	PG_FREE_IF_COPY(query, 2);
+	if (opt)
+		PG_FREE_IF_COPY(opt, 3);
+	pfree(prs.words);
+
+	if (state->transformed)
+	{
+		pfree(prs.startsel);
+		pfree(prs.stopsel);
+	}
+
+	PG_RETURN_TEXT_P(cstring_to_text_with_len(out->data, out->len));
+}
+
+Datum
+ts_headline_json(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_json_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+
+/*
+ * Return headline in text from, generated from a json(b) element
+ */
+static text *
+headline_json_value(void *_state, char *elem_value, int elem_len)
+{
+	HeadlineJsonState *state = (HeadlineJsonState *) _state;
+
+	HeadlineParsedText *prs = state->prs;
+	TSConfigCacheEntry *cfg = state->cfg;
+	TSParserCacheEntry *prsobj = state->prsobj;
+	TSQuery	query = state->query;
+	List *prsoptions = state->prsoptions;
+
+	prs->curwords = 0;
+	hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
+	FunctionCall3(&(prsobj->prsheadline),
+				  PointerGetDatum(prs),
+				  PointerGetDatum(prsoptions),
+				  PointerGetDatum(query));
+
+	state->transformed = true;
+	return generateHeadline(prs);
+}
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 6a7aab2..c7bc9e0 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -42,6 +42,8 @@
 #define JB_PATH_CREATE_OR_INSERT \
 	(JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER | JB_PATH_CREATE)
 
+#define is_jsonb_data(type) (type == WJB_KEY || type == WJB_VALUE || type == WJB_ELEM)
+
 /* state for json_object_keys */
 typedef struct OkeysState
 {
@@ -52,6 +54,23 @@ typedef struct OkeysState
 	int			sent_count;
 } OkeysState;
 
+/* state for iterate_json(b) functions */
+typedef struct IterateJsonState
+{
+	JsonLexContext		*lex;
+	JsonIterateAction	action;
+	void				*action_state;
+} IterateJsonState;
+
+/* state for transform_json(b) functions */
+typedef struct TransformJsonState
+{
+	JsonLexContext		*lex;
+	StringInfo			strval;
+	JsonTransformAction	action;
+	void				*action_state;
+} TransformJsonState;
+
 /* state for json_get* functions */
 typedef struct GetState
 {
@@ -271,6 +290,18 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems,
 			 int level, Jsonb *newval, uint32 nelems, int op_type);
 static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
 
+/* function supporting iterate_json(b) */
+static void apply_action(void *state, char *token, JsonTokenType tokentype);
+
+/* function supporting transform_json(b) */
+static void transform_object_start(void *state);
+static void transform_object_end(void *state);
+static void transform_array_start(void *state);
+static void transform_array_end(void *state);
+static void transform_object_field_start(void *state, char *fname, bool isnull);
+static void transform_array_element_start(void *state, bool isnull);
+static void transform_scalar(void *state, char *token, JsonTokenType tokentype);
+
 
 /*
  * SQL function json_object_keys
@@ -4130,3 +4161,196 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
 		}
 	}
 }
+
+/*
+ * Iterate over jsonb string values or elements, and pass them to
+ * a specified JsonIterateAction.
+ */
+void *
+iterate_jsonb_values(Jsonb *jb, void *state, JsonIterateAction action)
+{
+	JsonbIterator		*it;
+	JsonbValue			v;
+	JsonbIteratorToken	type;
+
+	it = JsonbIteratorInit(&jb->root);
+
+	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+		{
+			action(state, v.val.string.val, v.val.string.len);
+		}
+	}
+
+	return state;
+}
+
+/*
+ * Iterate over json string values or elements, and pass them to
+ * a specified JsonIterateAction.
+ */
+void *
+iterate_json_values(text *json, void *action_state, JsonIterateAction action)
+{
+	JsonLexContext *lex = makeJsonLexContext(json, true);
+	JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
+	IterateJsonState   *state = palloc0(sizeof(IterateJsonState));
+
+	state->lex = lex;
+	state->action = action;
+	state->action_state = action_state;
+
+	sem->semstate = (void *) state;
+	sem->scalar = apply_action;
+
+	pg_parse_json(lex, sem);
+
+	return state;
+}
+
+static void
+apply_action(void *state, char *token, JsonTokenType tokentype)
+{
+	IterateJsonState   *_state = (IterateJsonState *) state;
+	if (tokentype == JSON_TOKEN_STRING)
+		(*_state->action) (_state->action_state, token, strlen(token));
+}
+
+/*
+ * Iterate over a jsonb, and apply a specified JsonTransformAction
+ * to every string value or element. Function returns a copy of
+ * an original jsonb object with transformed values.
+ */
+Jsonb *
+transform_jsonb(Jsonb *jsonb, void *action_state, JsonTransformAction transform_action)
+{
+	JsonbIterator		*it;
+	JsonbValue			v, *res = NULL;
+	JsonbIteratorToken	type;
+	JsonbParseState		*st = NULL;
+	text				*out;
+	bool				is_scalar = false;
+
+	it = JsonbIteratorInit(&jsonb->root);
+	is_scalar = it->isScalar;
+
+	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+		{
+			out = transform_action(action_state, v.val.string.val, v.val.string.len);
+			v.val.string.val = VARDATA_ANY(out);
+			v.val.string.len = VARSIZE_ANY_EXHDR(out);
+			res = pushJsonbValue(&st, type, type < WJB_BEGIN_ARRAY ? &v : NULL);
+		}
+		else
+		{
+			res = pushJsonbValue(&st, type, is_jsonb_data(type) ? &v : NULL);
+		}
+	}
+
+	if (res->type == jbvArray)
+		res->val.array.rawScalar = is_scalar;
+
+	return JsonbValueToJsonb(res);
+}
+
+/*
+ * Iterate over a json, and apply a specified JsonTransformAction
+ * to every string value or element. Function returns a StringInfo,
+ * which is a copy of an original json with transformed values.
+ */
+StringInfo
+transform_json(text *json, void *action_state, JsonTransformAction transform_action)
+{
+	JsonLexContext *lex = makeJsonLexContext(json, true);
+	JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
+	TransformJsonState *state = palloc0(sizeof(TransformJsonState));
+
+	state->lex = lex;
+	state->strval = makeStringInfo();
+	state->action = transform_action;
+	state->action_state = action_state;
+
+	sem->semstate = (void *) state;
+	sem->scalar = transform_scalar;
+	sem->object_start = transform_object_start;
+	sem->object_end = transform_object_end;
+	sem->array_start = transform_array_start;
+	sem->array_end = transform_array_end;
+	sem->scalar = transform_scalar;
+	sem->array_element_start = transform_array_element_start;
+	sem->object_field_start = transform_object_field_start;
+
+	pg_parse_json(lex, sem);
+
+	return state->strval;
+}
+
+static void
+transform_object_start(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, '{');
+}
+
+static void
+transform_object_end(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, '}');
+}
+
+static void
+transform_array_start(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, '[');
+}
+
+static void
+transform_array_end(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, ']');
+}
+
+static void
+transform_object_field_start(void *state, char *fname, bool isnull)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+
+	if (_state->strval->data[_state->strval->len - 1] != '{')
+		appendStringInfoCharMacro(_state->strval, ',');
+
+	/*
+	 * Unfortunately we don't have the quoted and escaped string any more, so
+	 * we have to re-escape it.
+	 */
+	escape_json(_state->strval, fname);
+	appendStringInfoCharMacro(_state->strval, ':');
+}
+
+static void
+transform_array_element_start(void *state, bool isnull)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+
+	if (_state->strval->data[_state->strval->len - 1] != '[')
+		appendStringInfoCharMacro(_state->strval, ',');
+}
+
+static void
+transform_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+
+	if (tokentype == JSON_TOKEN_STRING)
+	{
+		text *out = (*_state->action) (_state->action_state, token, strlen(token));
+		escape_json(_state->strval, text_to_cstring(out));
+	}
+	else
+		appendStringInfoString(_state->strval, token);
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index a4cc86d..505764c 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4759,6 +4759,24 @@ DESCR("generate headline");
 DATA(insert OID = 3755 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 25 "25 3615" _null_ _null_ _null_ _null_ _null_ ts_headline _null_ _null_ _null_ ));
 DESCR("generate headline");
 
+DATA(insert OID = 4201 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 3802 "3734 3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid_opt _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4202 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 3802 "3734 3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4203 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 3802 "3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_opt _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4204 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3802 "3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+
+DATA(insert OID = 4205 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 114 "3734 114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid_opt _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4206 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 114 "3734 114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4207 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 114 "114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_opt _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4208 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 114 "114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+
 DATA(insert OID = 3745 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3614 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsvector_byid _null_ _null_ _null_ ));
 DESCR("transform to tsvector");
 DATA(insert OID = 3746 (  to_tsquery		PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsquery_byid _null_ _null_ _null_ ));
@@ -4775,6 +4793,10 @@ DATA(insert OID = 3751 (  plainto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f f t f s
 DESCR("transform to tsquery");
 DATA(insert OID = 5001 (  phraseto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
 DESCR("transform to tsquery");
+DATA(insert OID = 3800 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform jsonb to tsvector");
+DATA(insert OID = 3801 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform json to tsvector");
 
 DATA(insert OID = 3752 (  tsvector_update_trigger			PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
 DESCR("trigger for automatic update of tsvector column");
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index 155650c..873e2e1 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -86,6 +86,15 @@ typedef struct
 #define MAXNUMPOS	(256)
 #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
 
+/*
+ * In case if a TSVector contains several parts and we want to treat them as
+ * separate, it's necessary to add an artificial increment to position of each
+ * lexeme from every next part. It's required to avoid the situation when
+ * tsquery can find a phrase consisting of lexemes from two of such parts.
+ * TS_JUMP defined a value of this increment.
+ */
+#define TS_JUMP 1
+
 /* This struct represents a complete tsvector datum */
 typedef struct
 {
diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h
index 411e158..94e2399 100644
--- a/src/include/utils/jsonb.h
+++ b/src/include/utils/jsonb.h
@@ -351,6 +351,9 @@ typedef struct JsonbIterator
 	struct JsonbIterator *parent;
 } JsonbIterator;
 
+typedef void (*JsonIterateAction) (void *state, char *elem_value, int elem_len);
+typedef text * (*JsonTransformAction) (void *state, char *elem_value, int elem_len);
+
 
 /* Support functions */
 extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
@@ -377,5 +380,12 @@ extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
 extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
 					 int estimated_len);
 
+extern void *iterate_jsonb_values(Jsonb *jb, void *state, JsonIterateAction action);
+extern void *iterate_json_values(text *json, void *action_state, JsonIterateAction action);
+extern Jsonb *transform_jsonb(Jsonb *jsonb, void *action_state,
+						JsonTransformAction transform_action);
+extern StringInfo transform_json(text *json, void *action_state,
+							JsonTransformAction transform_action);
+
 
 #endif   /* __JSONB_H__ */
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index 1bb8768..046ead3 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -1674,3 +1674,86 @@ select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }');
  {"a":{},"d":{}}
 (1 row)
 
+-- json to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- json to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+                                to_tsvector                                 
+----------------------------------------------------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
+(1 row)
+
+-- ts_vector corner cases
+select to_tsvector('""'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('{}'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('[]'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('null'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+-- ts_headline for json
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+                                               ts_headline                                               
+---------------------------------------------------------------------------------------------------------
+ {"a":"aaa <b>bbb</b>","b":{"c":"ccc <b>ddd</b> fff","c1":"ccc1 ddd1"},"d":["ggg <b>hhh</b>","iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+                                      ts_headline                                       
+----------------------------------------------------------------------------------------
+ {"a":"aaa <b>bbb</b>","b":{"c":"ccc <b>ddd</b> fff"},"d":["ggg <b>hhh</b>","iii jjj"]}
+(1 row)
+
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                       ts_headline                                        
+------------------------------------------------------------------------------------------
+ {"a":"aaa <bbb>","b":{"c":"ccc <ddd> fff","c1":"ccc1 ddd1"},"d":["ggg <hhh>","iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                       ts_headline                                        
+------------------------------------------------------------------------------------------
+ {"a":"aaa <bbb>","b":{"c":"ccc <ddd> fff","c1":"ccc1 ddd1"},"d":["ggg <hhh>","iii jjj"]}
+(1 row)
+
+-- corner cases for ts_headline with json
+select ts_headline('null'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ null
+(1 row)
+
+select ts_headline('{}'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ {}
+(1 row)
+
+select ts_headline('[]'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ []
+(1 row)
+
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index 8ec4150..7cbbcfc 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -3474,3 +3474,86 @@ HINT:  Try using the function jsonb_set to replace key value.
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
 ERROR:  cannot replace existing key
 HINT:  Try using the function jsonb_set to replace key value.
+-- jsonb to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- jsonb to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+                                to_tsvector                                 
+----------------------------------------------------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
+(1 row)
+
+-- ts_vector corner cases
+select to_tsvector('""'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('{}'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('[]'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('null'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+-- ts_headline for jsonb
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+                                                   ts_headline                                                    
+------------------------------------------------------------------------------------------------------------------
+ {"a": "aaa <b>bbb</b>", "b": {"c": "ccc <b>ddd</b> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <b>hhh</b>", "iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+                                          ts_headline                                          
+-----------------------------------------------------------------------------------------------
+ {"a": "aaa <b>bbb</b>", "b": {"c": "ccc <b>ddd</b> fff"}, "d": ["ggg <b>hhh</b>", "iii jjj"]}
+(1 row)
+
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                            ts_headline                                            
+---------------------------------------------------------------------------------------------------
+ {"a": "aaa <bbb>", "b": {"c": "ccc <ddd> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <hhh>", "iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                            ts_headline                                            
+---------------------------------------------------------------------------------------------------
+ {"a": "aaa <bbb>", "b": {"c": "ccc <ddd> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <hhh>", "iii jjj"]}
+(1 row)
+
+-- corner cases for ts_headline with jsonb
+select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ null
+(1 row)
+
+select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ {}
+(1 row)
+
+select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ []
+(1 row)
+
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 5e61922..e661f96 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -551,3 +551,26 @@ select json_strip_nulls('[1,{"a":1,"b":null,"c":2},3]');
 
 -- an empty object is not null and should not be stripped
 select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }');
+
+-- json to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+
+-- json to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+
+-- ts_vector corner cases
+select to_tsvector('""'::json);
+select to_tsvector('{}'::json);
+select to_tsvector('[]'::json);
+select to_tsvector('null'::json);
+
+-- ts_headline for json
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+
+-- corner cases for ts_headline with json
+select ts_headline('null'::json, tsquery('aaa & bbb'));
+select ts_headline('{}'::json, tsquery('aaa & bbb'));
+select ts_headline('[]'::json, tsquery('aaa & bbb'));
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index e2eaca0..2e31ee6 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -878,3 +878,26 @@ select jsonb_insert('{"a": {"b": "value"}}', '{a, c}', '"new_value"', true);
 
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"');
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
+
+-- jsonb to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+
+-- jsonb to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+
+-- ts_vector corner cases
+select to_tsvector('""'::jsonb);
+select to_tsvector('{}'::jsonb);
+select to_tsvector('[]'::jsonb);
+select to_tsvector('null'::jsonb);
+
+-- ts_headline for jsonb
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+
+-- corner cases for ts_headline with jsonb
+select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
+select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
+select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));

Andrew Dunstan

andrew.dunstan@2ndquadrant.com

almost 9 years ago

In reply to: Dmitry Dolgov (#3)

Re: [PATCH] few fts functions for jsonb

On 03/10/2017 11:13 AM, Dmitry Dolgov wrote:

On 28 February 2017 at 19:21, Oleg Bartunov <obartunov@gmail.com

<mailto:obartunov@gmail.com>> wrote:

1. add json support

I've added json support for all functions.

Its_headline should returns the original json with highlighting

Yes, I see now. I don't think it's worth it to add a special option
for that
purpose, so I've just changed the implementation to return the
original json(b).

This is a pretty good idea.

However, I think it should probably be broken up into a couple of pieces
- one for the generic json/jsonb transforms infrastructure (which
probably needs some more comments) and one for the FTS functions that
will use it.

cheers

andrew

--
Andrew Dunstan https://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Dmitry Dolgov

9erthalion6@gmail.com

almost 9 years ago

In reply to: Andrew Dunstan (#4)

2 attachment(s)

Re: [PATCH] few fts functions for jsonb

On 21 March 2017 at 03:03, Andrew Dunstan <andrew.dunstan@2ndquadrant.com>

wrote:

However, I think it should probably be broken up into a couple of pieces -
one for the generic json/jsonb transforms infrastructure (which probably
needs some more comments) and one for the FTS functions that will use it.

Sure, here are two patches with separated functionality and a bit more
commentaries for the transform functions.

Attachments:

jsonb_fts_support_v1.patchtext/x-patch; charset=US-ASCII; name=jsonb_fts_support_v1.patchDownload

diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 6a7aab2..bac08c0 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -42,6 +42,8 @@
 #define JB_PATH_CREATE_OR_INSERT \
 	(JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER | JB_PATH_CREATE)
 
+#define is_jsonb_data(type) (type == WJB_KEY || type == WJB_VALUE || type == WJB_ELEM)
+
 /* state for json_object_keys */
 typedef struct OkeysState
 {
@@ -52,6 +54,23 @@ typedef struct OkeysState
 	int			sent_count;
 } OkeysState;
 
+/* state for iterate_json function */
+typedef struct IterateJsonState
+{
+	JsonLexContext		*lex;
+	JsonIterateAction	action;			/* an action that will be applied to each json value */
+	void				*action_state;	/* any necessary context for iteration */
+} IterateJsonState;
+
+/* state for transform_json function */
+typedef struct TransformJsonState
+{
+	JsonLexContext		*lex;
+	StringInfo			strval;			/* resulting json */
+	JsonTransformAction	action;			/* an action that will be applied to each json value */
+	void				*action_state;	/* any necessary context for transformation */
+} TransformJsonState;
+
 /* state for json_get* functions */
 typedef struct GetState
 {
@@ -271,6 +290,18 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems,
 			 int level, Jsonb *newval, uint32 nelems, int op_type);
 static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
 
+/* function supporting iterate_json(b) */
+static void apply_action(void *state, char *token, JsonTokenType tokentype);
+
+/* function supporting transform_json(b) */
+static void transform_object_start(void *state);
+static void transform_object_end(void *state);
+static void transform_array_start(void *state);
+static void transform_array_end(void *state);
+static void transform_object_field_start(void *state, char *fname, bool isnull);
+static void transform_array_element_start(void *state, bool isnull);
+static void transform_scalar(void *state, char *token, JsonTokenType tokentype);
+
 
 /*
  * SQL function json_object_keys
@@ -4130,3 +4161,206 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
 		}
 	}
 }
+
+/*
+ * Iterate over jsonb string values or elements, and pass them together with
+ * an iteration state to a specified JsonIterateAction.
+ */
+void *
+iterate_jsonb_values(Jsonb *jb, void *state, JsonIterateAction action)
+{
+	JsonbIterator		*it;
+	JsonbValue			v;
+	JsonbIteratorToken	type;
+
+	it = JsonbIteratorInit(&jb->root);
+
+	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+		{
+			action(state, v.val.string.val, v.val.string.len);
+		}
+	}
+
+	return state;
+}
+
+/*
+ * Iterate over json string values or elements, and pass them together with an
+ * iteration state to a specified JsonIterateAction.
+ */
+void *
+iterate_json_values(text *json, void *action_state, JsonIterateAction action)
+{
+	JsonLexContext *lex = makeJsonLexContext(json, true);
+	JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
+	IterateJsonState   *state = palloc0(sizeof(IterateJsonState));
+
+	state->lex = lex;
+	state->action = action;
+	state->action_state = action_state;
+
+	sem->semstate = (void *) state;
+	sem->scalar = apply_action;
+
+	pg_parse_json(lex, sem);
+
+	return state;
+}
+
+/*
+ * An auxiliary function for iterate_json_values to invoke a specified
+ * JsonIterateAction.
+ */
+static void
+apply_action(void *state, char *token, JsonTokenType tokentype)
+{
+	IterateJsonState   *_state = (IterateJsonState *) state;
+	if (tokentype == JSON_TOKEN_STRING)
+		(*_state->action) (_state->action_state, token, strlen(token));
+}
+
+/*
+ * Iterate over a jsonb, and apply a specified JsonTransformAction to every
+ * string value or element. Any necessary context for a JsonTransformAction can
+ * be passed in the action_state variable. Function returns a copy of an original jsonb
+ * object with transformed values.
+ */
+Jsonb *
+transform_jsonb(Jsonb *jsonb, void *action_state, JsonTransformAction transform_action)
+{
+	JsonbIterator		*it;
+	JsonbValue			v, *res = NULL;
+	JsonbIteratorToken	type;
+	JsonbParseState		*st = NULL;
+	text				*out;
+	bool				is_scalar = false;
+
+	it = JsonbIteratorInit(&jsonb->root);
+	is_scalar = it->isScalar;
+
+	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+		{
+			out = transform_action(action_state, v.val.string.val, v.val.string.len);
+			v.val.string.val = VARDATA_ANY(out);
+			v.val.string.len = VARSIZE_ANY_EXHDR(out);
+			res = pushJsonbValue(&st, type, type < WJB_BEGIN_ARRAY ? &v : NULL);
+		}
+		else
+		{
+			res = pushJsonbValue(&st, type, is_jsonb_data(type) ? &v : NULL);
+		}
+	}
+
+	if (res->type == jbvArray)
+		res->val.array.rawScalar = is_scalar;
+
+	return JsonbValueToJsonb(res);
+}
+
+/*
+ * Iterate over a json, and apply a specified JsonTransformAction to every
+ * string value or element. Any necessary context for a JsonTransformAction can
+ * be passed in the action_state variable. Function returns a StringInfo, which
+ * is a copy of an original json with transformed values.
+ */
+StringInfo
+transform_json(text *json, void *action_state, JsonTransformAction transform_action)
+{
+	JsonLexContext *lex = makeJsonLexContext(json, true);
+	JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
+	TransformJsonState *state = palloc0(sizeof(TransformJsonState));
+
+	state->lex = lex;
+	state->strval = makeStringInfo();
+	state->action = transform_action;
+	state->action_state = action_state;
+
+	sem->semstate = (void *) state;
+	sem->scalar = transform_scalar;
+	sem->object_start = transform_object_start;
+	sem->object_end = transform_object_end;
+	sem->array_start = transform_array_start;
+	sem->array_end = transform_array_end;
+	sem->scalar = transform_scalar;
+	sem->array_element_start = transform_array_element_start;
+	sem->object_field_start = transform_object_field_start;
+
+	pg_parse_json(lex, sem);
+
+	return state->strval;
+}
+
+/*
+ * Set of auxiliary functions for transform_json to invoke a specified
+ * JsonTransformAction for all values and left everything else untouched.
+ */
+static void
+transform_object_start(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, '{');
+}
+
+static void
+transform_object_end(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, '}');
+}
+
+static void
+transform_array_start(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, '[');
+}
+
+static void
+transform_array_end(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, ']');
+}
+
+static void
+transform_object_field_start(void *state, char *fname, bool isnull)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+
+	if (_state->strval->data[_state->strval->len - 1] != '{')
+		appendStringInfoCharMacro(_state->strval, ',');
+
+	/*
+	 * Unfortunately we don't have the quoted and escaped string any more, so
+	 * we have to re-escape it.
+	 */
+	escape_json(_state->strval, fname);
+	appendStringInfoCharMacro(_state->strval, ':');
+}
+
+static void
+transform_array_element_start(void *state, bool isnull)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+
+	if (_state->strval->data[_state->strval->len - 1] != '[')
+		appendStringInfoCharMacro(_state->strval, ',');
+}
+
+static void
+transform_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+
+	if (tokentype == JSON_TOKEN_STRING)
+	{
+		text *out = (*_state->action) (_state->action_state, token, strlen(token));
+		escape_json(_state->strval, text_to_cstring(out));
+	}
+	else
+		appendStringInfoString(_state->strval, token);
+}
diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h
index 411e158..bf3de44 100644
--- a/src/include/utils/jsonb.h
+++ b/src/include/utils/jsonb.h
@@ -351,6 +351,12 @@ typedef struct JsonbIterator
 	struct JsonbIterator *parent;
 } JsonbIterator;
 
+/* an action that will be applied to each value in iterate_json(b) functions */
+typedef void (*JsonIterateAction) (void *state, char *elem_value, int elem_len);
+
+/* an action that will be applied to each value in transform_json(b) functions */
+typedef text * (*JsonTransformAction) (void *state, char *elem_value, int elem_len);
+
 
 /* Support functions */
 extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
@@ -377,5 +383,12 @@ extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
 extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
 					 int estimated_len);
 
+extern void *iterate_jsonb_values(Jsonb *jb, void *state, JsonIterateAction action);
+extern void *iterate_json_values(text *json, void *action_state, JsonIterateAction action);
+extern Jsonb *transform_jsonb(Jsonb *jsonb, void *action_state,
+						JsonTransformAction transform_action);
+extern StringInfo transform_json(text *json, void *action_state,
+							JsonTransformAction transform_action);
+
 
 #endif   /* __JSONB_H__ */

jsonb_fts_functions_v1.patchtext/x-patch; charset=US-ASCII; name=jsonb_fts_functions_v1.patchDownload

diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 6e5de8f..8f7bcfe 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -16,6 +16,7 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonb.h"
 
 
 typedef struct MorphOpaque
@@ -24,6 +25,14 @@ typedef struct MorphOpaque
 	int			qoperator;		/* query operator */
 } MorphOpaque;
 
+typedef struct TSVectorBuildState
+{
+	ParsedText	*prs;
+	TSVector	result;
+	Oid			cfgId;
+} TSVectorBuildState;
+
+static void add_to_tsvector(void *state, char *elem_value, int elem_len);
 
 Datum
 get_current_ts_config(PG_FUNCTION_ARGS)
@@ -256,6 +265,109 @@ to_tsvector(PG_FUNCTION_ARGS)
 										PointerGetDatum(in)));
 }
 
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+	Jsonb				*jb = PG_GETARG_JSONB(0);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = getTSCurrentConfig(true);
+	state.prs = prs;
+
+	iterate_jsonb_values(jb, &state, (JsonIterateAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(jb, 1);
+
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in jsonb,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+Datum
+json_to_tsvector(PG_FUNCTION_ARGS)
+{
+	text				*json = PG_GETARG_TEXT_P(0);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = getTSCurrentConfig(true);
+	state.prs = prs;
+
+	iterate_json_values(json, &state, (JsonIterateAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(json, 1);
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in json,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+/*
+ * Extend current TSVector from _state with a new one,
+ * build over a json(b) element.
+ */
+static void
+add_to_tsvector(void *_state, char *elem_value, int elem_len)
+{
+	TSVectorBuildState *state = (TSVectorBuildState *) _state;
+	ParsedText	*prs = state->prs;
+	TSVector	item_vector;
+	int			i;
+
+	prs->lenwords = elem_len / 6;
+	if (prs->lenwords == 0)
+		prs->lenwords = 2;
+
+	prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
+	prs->curwords = 0;
+	prs->pos = 0;
+
+	parsetext(state->cfgId, prs, elem_value, elem_len);
+
+	if (prs->curwords)
+	{
+		if (state->result != NULL)
+		{
+			for (i = 0; i < prs->curwords; i++)
+				prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP;
+
+			item_vector = make_tsvector(prs);
+
+			state->result = (TSVector) DirectFunctionCall2(tsvector_concat,
+									TSVectorGetDatum(state->result),
+									PointerGetDatum(item_vector));
+		}
+		else
+			state->result = make_tsvector(prs);
+	}
+}
+
 /*
  * to_tsquery
  */
diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c
index 8ca1c62..b648996 100644
--- a/src/backend/tsearch/wparser.c
+++ b/src/backend/tsearch/wparser.c
@@ -21,6 +21,7 @@
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
 #include "utils/varlena.h"
+#include "utils/jsonb.h"
 
 
 /******sql-level interface******/
@@ -31,6 +32,19 @@ typedef struct
 	LexDescr   *list;
 } TSTokenTypeStorage;
 
+/* state for ts_headline_json_* */
+typedef struct HeadlineJsonState
+{
+	HeadlineParsedText *prs;
+	TSConfigCacheEntry *cfg;
+	TSParserCacheEntry *prsobj;
+	TSQuery				query;
+	List				*prsoptions;
+	bool				transformed;
+} HeadlineJsonState;
+
+static text * headline_json_value(void *_state, char *elem_value, int elem_len);
+
 static void
 tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
 {
@@ -362,3 +376,177 @@ ts_headline_opt(PG_FUNCTION_ARGS)
 										PG_GETARG_DATUM(1),
 										PG_GETARG_DATUM(2)));
 }
+
+Datum
+ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
+{
+	Jsonb			*out, *jb = PG_GETARG_JSONB(1);
+	TSQuery			query = PG_GETARG_TSQUERY(2);
+	text			*opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+
+	HeadlineParsedText prs;
+	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+	memset(&prs, 0, sizeof(HeadlineParsedText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+	state->prs = &prs;
+	state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+	state->query = query;
+	if (opt)
+		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+	else
+		state->prsoptions = NIL;
+
+	if (!OidIsValid(state->prsobj->headlineOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+		   errmsg("text search parser does not support headline creation")));
+
+	out = transform_jsonb(jb, state, (JsonTransformAction) headline_json_value);
+
+	PG_FREE_IF_COPY(jb, 1);
+	PG_FREE_IF_COPY(query, 2);
+	if (opt)
+		PG_FREE_IF_COPY(opt, 3);
+
+	pfree(prs.words);
+
+	if (state->transformed)
+	{
+		pfree(prs.startsel);
+		pfree(prs.stopsel);
+	}
+
+	PG_RETURN_JSONB(out);
+}
+
+Datum
+ts_headline_jsonb(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
+{
+	text				*json = PG_GETARG_TEXT_P(1);
+	TSQuery				query = PG_GETARG_TSQUERY(2);
+	text				*opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+	StringInfo			out;
+
+	HeadlineParsedText prs;
+	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+	memset(&prs, 0, sizeof(HeadlineParsedText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+	state->prs = &prs;
+	state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+	state->query = query;
+	if (opt)
+		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+	else
+		state->prsoptions = NIL;
+
+	if (!OidIsValid(state->prsobj->headlineOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+		   errmsg("text search parser does not support headline creation")));
+
+	out = transform_json(json, state, (JsonTransformAction) headline_json_value);
+
+	PG_FREE_IF_COPY(json, 1);
+	PG_FREE_IF_COPY(query, 2);
+	if (opt)
+		PG_FREE_IF_COPY(opt, 3);
+	pfree(prs.words);
+
+	if (state->transformed)
+	{
+		pfree(prs.startsel);
+		pfree(prs.stopsel);
+	}
+
+	PG_RETURN_TEXT_P(cstring_to_text_with_len(out->data, out->len));
+}
+
+Datum
+ts_headline_json(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_json_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+
+/*
+ * Return headline in text from, generated from a json(b) element
+ */
+static text *
+headline_json_value(void *_state, char *elem_value, int elem_len)
+{
+	HeadlineJsonState *state = (HeadlineJsonState *) _state;
+
+	HeadlineParsedText *prs = state->prs;
+	TSConfigCacheEntry *cfg = state->cfg;
+	TSParserCacheEntry *prsobj = state->prsobj;
+	TSQuery	query = state->query;
+	List *prsoptions = state->prsoptions;
+
+	prs->curwords = 0;
+	hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
+	FunctionCall3(&(prsobj->prsheadline),
+				  PointerGetDatum(prs),
+				  PointerGetDatum(prsoptions),
+				  PointerGetDatum(query));
+
+	state->transformed = true;
+	return generateHeadline(prs);
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index a4cc86d..dd74bac 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4759,6 +4759,24 @@ DESCR("generate headline");
 DATA(insert OID = 3755 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 25 "25 3615" _null_ _null_ _null_ _null_ _null_ ts_headline _null_ _null_ _null_ ));
 DESCR("generate headline");
 
+DATA(insert OID = 4201 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 3802 "3734 3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid_opt _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4202 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 3802 "3734 3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4203 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 3802 "3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_opt _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4204 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3802 "3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+
+DATA(insert OID = 4205 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 114 "3734 114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid_opt _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4206 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 114 "3734 114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4207 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 114 "114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_opt _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4208 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 114 "114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+
 DATA(insert OID = 3745 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3614 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsvector_byid _null_ _null_ _null_ ));
 DESCR("transform to tsvector");
 DATA(insert OID = 3746 (  to_tsquery		PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsquery_byid _null_ _null_ _null_ ));
@@ -4775,6 +4793,10 @@ DATA(insert OID = 3751 (  plainto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f f t f s
 DESCR("transform to tsquery");
 DATA(insert OID = 5001 (  phraseto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
 DESCR("transform to tsquery");
+DATA(insert OID = 4209 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform jsonb to tsvector");
+DATA(insert OID = 4210 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform json to tsvector");
 
 DATA(insert OID = 3752 (  tsvector_update_trigger			PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
 DESCR("trigger for automatic update of tsvector column");
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index 155650c..873e2e1 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -86,6 +86,15 @@ typedef struct
 #define MAXNUMPOS	(256)
 #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
 
+/*
+ * In case if a TSVector contains several parts and we want to treat them as
+ * separate, it's necessary to add an artificial increment to position of each
+ * lexeme from every next part. It's required to avoid the situation when
+ * tsquery can find a phrase consisting of lexemes from two of such parts.
+ * TS_JUMP defined a value of this increment.
+ */
+#define TS_JUMP 1
+
 /* This struct represents a complete tsvector datum */
 typedef struct
 {
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index 1bb8768..046ead3 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -1674,3 +1674,86 @@ select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }');
  {"a":{},"d":{}}
 (1 row)
 
+-- json to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- json to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+                                to_tsvector                                 
+----------------------------------------------------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
+(1 row)
+
+-- ts_vector corner cases
+select to_tsvector('""'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('{}'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('[]'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('null'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+-- ts_headline for json
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+                                               ts_headline                                               
+---------------------------------------------------------------------------------------------------------
+ {"a":"aaa <b>bbb</b>","b":{"c":"ccc <b>ddd</b> fff","c1":"ccc1 ddd1"},"d":["ggg <b>hhh</b>","iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+                                      ts_headline                                       
+----------------------------------------------------------------------------------------
+ {"a":"aaa <b>bbb</b>","b":{"c":"ccc <b>ddd</b> fff"},"d":["ggg <b>hhh</b>","iii jjj"]}
+(1 row)
+
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                       ts_headline                                        
+------------------------------------------------------------------------------------------
+ {"a":"aaa <bbb>","b":{"c":"ccc <ddd> fff","c1":"ccc1 ddd1"},"d":["ggg <hhh>","iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                       ts_headline                                        
+------------------------------------------------------------------------------------------
+ {"a":"aaa <bbb>","b":{"c":"ccc <ddd> fff","c1":"ccc1 ddd1"},"d":["ggg <hhh>","iii jjj"]}
+(1 row)
+
+-- corner cases for ts_headline with json
+select ts_headline('null'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ null
+(1 row)
+
+select ts_headline('{}'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ {}
+(1 row)
+
+select ts_headline('[]'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ []
+(1 row)
+
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index 8ec4150..7cbbcfc 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -3474,3 +3474,86 @@ HINT:  Try using the function jsonb_set to replace key value.
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
 ERROR:  cannot replace existing key
 HINT:  Try using the function jsonb_set to replace key value.
+-- jsonb to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- jsonb to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+                                to_tsvector                                 
+----------------------------------------------------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
+(1 row)
+
+-- ts_vector corner cases
+select to_tsvector('""'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('{}'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('[]'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('null'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+-- ts_headline for jsonb
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+                                                   ts_headline                                                    
+------------------------------------------------------------------------------------------------------------------
+ {"a": "aaa <b>bbb</b>", "b": {"c": "ccc <b>ddd</b> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <b>hhh</b>", "iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+                                          ts_headline                                          
+-----------------------------------------------------------------------------------------------
+ {"a": "aaa <b>bbb</b>", "b": {"c": "ccc <b>ddd</b> fff"}, "d": ["ggg <b>hhh</b>", "iii jjj"]}
+(1 row)
+
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                            ts_headline                                            
+---------------------------------------------------------------------------------------------------
+ {"a": "aaa <bbb>", "b": {"c": "ccc <ddd> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <hhh>", "iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                            ts_headline                                            
+---------------------------------------------------------------------------------------------------
+ {"a": "aaa <bbb>", "b": {"c": "ccc <ddd> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <hhh>", "iii jjj"]}
+(1 row)
+
+-- corner cases for ts_headline with jsonb
+select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ null
+(1 row)
+
+select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ {}
+(1 row)
+
+select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ []
+(1 row)
+
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 5e61922..e661f96 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -551,3 +551,26 @@ select json_strip_nulls('[1,{"a":1,"b":null,"c":2},3]');
 
 -- an empty object is not null and should not be stripped
 select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }');
+
+-- json to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+
+-- json to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+
+-- ts_vector corner cases
+select to_tsvector('""'::json);
+select to_tsvector('{}'::json);
+select to_tsvector('[]'::json);
+select to_tsvector('null'::json);
+
+-- ts_headline for json
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+
+-- corner cases for ts_headline with json
+select ts_headline('null'::json, tsquery('aaa & bbb'));
+select ts_headline('{}'::json, tsquery('aaa & bbb'));
+select ts_headline('[]'::json, tsquery('aaa & bbb'));
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index e2eaca0..2e31ee6 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -878,3 +878,26 @@ select jsonb_insert('{"a": {"b": "value"}}', '{a, c}', '"new_value"', true);
 
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"');
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
+
+-- jsonb to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+
+-- jsonb to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+
+-- ts_vector corner cases
+select to_tsvector('""'::jsonb);
+select to_tsvector('{}'::jsonb);
+select to_tsvector('[]'::jsonb);
+select to_tsvector('null'::jsonb);
+
+-- ts_headline for jsonb
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+
+-- corner cases for ts_headline with jsonb
+select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
+select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
+select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));

Andrew Dunstan

andrew.dunstan@2ndquadrant.com

almost 9 years ago

In reply to: Dmitry Dolgov (#5)

Re: [PATCH] few fts functions for jsonb

On 03/21/2017 06:28 PM, Dmitry Dolgov wrote:

On 21 March 2017 at 03:03, Andrew Dunstan

<andrew.dunstan@2ndquadrant.com
<mailto:andrew.dunstan@2ndquadrant.com>> wrote:

However, I think it should probably be broken up into a couple of

pieces -

one for the generic json/jsonb transforms infrastructure (which probably
needs some more comments) and one for the FTS functions that will

use it.

Sure, here are two patches with separated functionality and a bit more
commentaries for the transform functions.

I'm not through looking at this. However, here are a few preliminary
comments

* we might need to rationalize the header locations a bit
* iterate_json(b) and transform_json(b) are a bit too generally named.
Really what they do is iterate over or transform string values in
the json(b). They ignore / preserve the structure, keys, and
non-string scalar values in the json(b). A general iterate or
transform function would be called in effect with a stream of all
the elements in the json, not just scalar strings.
* Unless I'm missing something the iterate_json(b)_values return value
is ignored. Instead of returning the state it looks to me like it
should return nothing and be declared as void instead of void *
* transform_jsonb and transform_json are somewhat asymmetrical. The
latter should probably return a text* instead of a StringInfo, to be
consistent with the former.

cheers

andrew

--
Andrew Dunstan https://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Dmitry Dolgov

9erthalion6@gmail.com

almost 9 years ago

In reply to: Andrew Dunstan (#6)

2 attachment(s)

Re: [PATCH] few fts functions for jsonb

I'm not through looking at this. However, here are a few preliminary

comments

I've attached new versions of the patches with improvements related to
these commentaries.

Attachments:

jsonb_fts_functions_v2.patchtext/x-patch; charset=US-ASCII; name=jsonb_fts_functions_v2.patchDownload

diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 6e5de8f..8f7bcfe 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -16,6 +16,7 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonb.h"
 
 
 typedef struct MorphOpaque
@@ -24,6 +25,14 @@ typedef struct MorphOpaque
 	int			qoperator;		/* query operator */
 } MorphOpaque;
 
+typedef struct TSVectorBuildState
+{
+	ParsedText	*prs;
+	TSVector	result;
+	Oid			cfgId;
+} TSVectorBuildState;
+
+static void add_to_tsvector(void *state, char *elem_value, int elem_len);
 
 Datum
 get_current_ts_config(PG_FUNCTION_ARGS)
@@ -256,6 +265,109 @@ to_tsvector(PG_FUNCTION_ARGS)
 										PointerGetDatum(in)));
 }
 
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+	Jsonb				*jb = PG_GETARG_JSONB(0);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = getTSCurrentConfig(true);
+	state.prs = prs;
+
+	iterate_jsonb_values(jb, &state, (JsonIterateAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(jb, 1);
+
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in jsonb,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+Datum
+json_to_tsvector(PG_FUNCTION_ARGS)
+{
+	text				*json = PG_GETARG_TEXT_P(0);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = getTSCurrentConfig(true);
+	state.prs = prs;
+
+	iterate_json_values(json, &state, (JsonIterateAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(json, 1);
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in json,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+/*
+ * Extend current TSVector from _state with a new one,
+ * build over a json(b) element.
+ */
+static void
+add_to_tsvector(void *_state, char *elem_value, int elem_len)
+{
+	TSVectorBuildState *state = (TSVectorBuildState *) _state;
+	ParsedText	*prs = state->prs;
+	TSVector	item_vector;
+	int			i;
+
+	prs->lenwords = elem_len / 6;
+	if (prs->lenwords == 0)
+		prs->lenwords = 2;
+
+	prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
+	prs->curwords = 0;
+	prs->pos = 0;
+
+	parsetext(state->cfgId, prs, elem_value, elem_len);
+
+	if (prs->curwords)
+	{
+		if (state->result != NULL)
+		{
+			for (i = 0; i < prs->curwords; i++)
+				prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP;
+
+			item_vector = make_tsvector(prs);
+
+			state->result = (TSVector) DirectFunctionCall2(tsvector_concat,
+									TSVectorGetDatum(state->result),
+									PointerGetDatum(item_vector));
+		}
+		else
+			state->result = make_tsvector(prs);
+	}
+}
+
 /*
  * to_tsquery
  */
diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c
index 8ca1c62..ab1716a 100644
--- a/src/backend/tsearch/wparser.c
+++ b/src/backend/tsearch/wparser.c
@@ -20,6 +20,7 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonb.h"
 #include "utils/varlena.h"
 
 
@@ -31,6 +32,19 @@ typedef struct
 	LexDescr   *list;
 } TSTokenTypeStorage;
 
+/* state for ts_headline_json_* */
+typedef struct HeadlineJsonState
+{
+	HeadlineParsedText *prs;
+	TSConfigCacheEntry *cfg;
+	TSParserCacheEntry *prsobj;
+	TSQuery				query;
+	List				*prsoptions;
+	bool				transformed;
+} HeadlineJsonState;
+
+static text * headline_json_value(void *_state, char *elem_value, int elem_len);
+
 static void
 tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
 {
@@ -362,3 +376,177 @@ ts_headline_opt(PG_FUNCTION_ARGS)
 										PG_GETARG_DATUM(1),
 										PG_GETARG_DATUM(2)));
 }
+
+Datum
+ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
+{
+	Jsonb			*out, *jb = PG_GETARG_JSONB(1);
+	TSQuery			query = PG_GETARG_TSQUERY(2);
+	text			*opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+
+	HeadlineParsedText prs;
+	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+	memset(&prs, 0, sizeof(HeadlineParsedText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+	state->prs = &prs;
+	state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+	state->query = query;
+	if (opt)
+		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+	else
+		state->prsoptions = NIL;
+
+	if (!OidIsValid(state->prsobj->headlineOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+		   errmsg("text search parser does not support headline creation")));
+
+	out = transform_jsonb_values(jb, state, (JsonTransformAction) headline_json_value);
+
+	PG_FREE_IF_COPY(jb, 1);
+	PG_FREE_IF_COPY(query, 2);
+	if (opt)
+		PG_FREE_IF_COPY(opt, 3);
+
+	pfree(prs.words);
+
+	if (state->transformed)
+	{
+		pfree(prs.startsel);
+		pfree(prs.stopsel);
+	}
+
+	PG_RETURN_JSONB(out);
+}
+
+Datum
+ts_headline_jsonb(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
+{
+	text				*json = PG_GETARG_TEXT_P(1);
+	TSQuery				query = PG_GETARG_TSQUERY(2);
+	text				*opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+	text				*out;
+
+	HeadlineParsedText prs;
+	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+	memset(&prs, 0, sizeof(HeadlineParsedText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+	state->prs = &prs;
+	state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+	state->query = query;
+	if (opt)
+		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+	else
+		state->prsoptions = NIL;
+
+	if (!OidIsValid(state->prsobj->headlineOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+		   errmsg("text search parser does not support headline creation")));
+
+	out = transform_json_values(json, state, (JsonTransformAction) headline_json_value);
+
+	PG_FREE_IF_COPY(json, 1);
+	PG_FREE_IF_COPY(query, 2);
+	if (opt)
+		PG_FREE_IF_COPY(opt, 3);
+	pfree(prs.words);
+
+	if (state->transformed)
+	{
+		pfree(prs.startsel);
+		pfree(prs.stopsel);
+	}
+
+	PG_RETURN_TEXT_P(out);
+}
+
+Datum
+ts_headline_json(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_json_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+
+/*
+ * Return headline in text from, generated from a json(b) element
+ */
+static text *
+headline_json_value(void *_state, char *elem_value, int elem_len)
+{
+	HeadlineJsonState *state = (HeadlineJsonState *) _state;
+
+	HeadlineParsedText *prs = state->prs;
+	TSConfigCacheEntry *cfg = state->cfg;
+	TSParserCacheEntry *prsobj = state->prsobj;
+	TSQuery	query = state->query;
+	List *prsoptions = state->prsoptions;
+
+	prs->curwords = 0;
+	hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
+	FunctionCall3(&(prsobj->prsheadline),
+				  PointerGetDatum(prs),
+				  PointerGetDatum(prsoptions),
+				  PointerGetDatum(query));
+
+	state->transformed = true;
+	return generateHeadline(prs);
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index a4cc86d..dd74bac 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4759,6 +4759,24 @@ DESCR("generate headline");
 DATA(insert OID = 3755 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 25 "25 3615" _null_ _null_ _null_ _null_ _null_ ts_headline _null_ _null_ _null_ ));
 DESCR("generate headline");
 
+DATA(insert OID = 4201 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 3802 "3734 3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid_opt _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4202 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 3802 "3734 3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4203 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 3802 "3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_opt _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4204 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3802 "3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+
+DATA(insert OID = 4205 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 114 "3734 114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid_opt _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4206 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 114 "3734 114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4207 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 114 "114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_opt _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4208 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 114 "114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+
 DATA(insert OID = 3745 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3614 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsvector_byid _null_ _null_ _null_ ));
 DESCR("transform to tsvector");
 DATA(insert OID = 3746 (  to_tsquery		PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsquery_byid _null_ _null_ _null_ ));
@@ -4775,6 +4793,10 @@ DATA(insert OID = 3751 (  plainto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f f t f s
 DESCR("transform to tsquery");
 DATA(insert OID = 5001 (  phraseto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
 DESCR("transform to tsquery");
+DATA(insert OID = 4209 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform jsonb to tsvector");
+DATA(insert OID = 4210 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform json to tsvector");
 
 DATA(insert OID = 3752 (  tsvector_update_trigger			PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
 DESCR("trigger for automatic update of tsvector column");
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index 155650c..873e2e1 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -86,6 +86,15 @@ typedef struct
 #define MAXNUMPOS	(256)
 #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
 
+/*
+ * In case if a TSVector contains several parts and we want to treat them as
+ * separate, it's necessary to add an artificial increment to position of each
+ * lexeme from every next part. It's required to avoid the situation when
+ * tsquery can find a phrase consisting of lexemes from two of such parts.
+ * TS_JUMP defined a value of this increment.
+ */
+#define TS_JUMP 1
+
 /* This struct represents a complete tsvector datum */
 typedef struct
 {
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index 1bb8768..046ead3 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -1674,3 +1674,86 @@ select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }');
  {"a":{},"d":{}}
 (1 row)
 
+-- json to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- json to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+                                to_tsvector                                 
+----------------------------------------------------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
+(1 row)
+
+-- ts_vector corner cases
+select to_tsvector('""'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('{}'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('[]'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('null'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+-- ts_headline for json
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+                                               ts_headline                                               
+---------------------------------------------------------------------------------------------------------
+ {"a":"aaa <b>bbb</b>","b":{"c":"ccc <b>ddd</b> fff","c1":"ccc1 ddd1"},"d":["ggg <b>hhh</b>","iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+                                      ts_headline                                       
+----------------------------------------------------------------------------------------
+ {"a":"aaa <b>bbb</b>","b":{"c":"ccc <b>ddd</b> fff"},"d":["ggg <b>hhh</b>","iii jjj"]}
+(1 row)
+
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                       ts_headline                                        
+------------------------------------------------------------------------------------------
+ {"a":"aaa <bbb>","b":{"c":"ccc <ddd> fff","c1":"ccc1 ddd1"},"d":["ggg <hhh>","iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                       ts_headline                                        
+------------------------------------------------------------------------------------------
+ {"a":"aaa <bbb>","b":{"c":"ccc <ddd> fff","c1":"ccc1 ddd1"},"d":["ggg <hhh>","iii jjj"]}
+(1 row)
+
+-- corner cases for ts_headline with json
+select ts_headline('null'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ null
+(1 row)
+
+select ts_headline('{}'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ {}
+(1 row)
+
+select ts_headline('[]'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ []
+(1 row)
+
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index 8ec4150..7cbbcfc 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -3474,3 +3474,86 @@ HINT:  Try using the function jsonb_set to replace key value.
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
 ERROR:  cannot replace existing key
 HINT:  Try using the function jsonb_set to replace key value.
+-- jsonb to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- jsonb to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+                                to_tsvector                                 
+----------------------------------------------------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
+(1 row)
+
+-- ts_vector corner cases
+select to_tsvector('""'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('{}'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('[]'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('null'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+-- ts_headline for jsonb
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+                                                   ts_headline                                                    
+------------------------------------------------------------------------------------------------------------------
+ {"a": "aaa <b>bbb</b>", "b": {"c": "ccc <b>ddd</b> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <b>hhh</b>", "iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+                                          ts_headline                                          
+-----------------------------------------------------------------------------------------------
+ {"a": "aaa <b>bbb</b>", "b": {"c": "ccc <b>ddd</b> fff"}, "d": ["ggg <b>hhh</b>", "iii jjj"]}
+(1 row)
+
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                            ts_headline                                            
+---------------------------------------------------------------------------------------------------
+ {"a": "aaa <bbb>", "b": {"c": "ccc <ddd> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <hhh>", "iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                            ts_headline                                            
+---------------------------------------------------------------------------------------------------
+ {"a": "aaa <bbb>", "b": {"c": "ccc <ddd> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <hhh>", "iii jjj"]}
+(1 row)
+
+-- corner cases for ts_headline with jsonb
+select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ null
+(1 row)
+
+select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ {}
+(1 row)
+
+select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ []
+(1 row)
+
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 5e61922..e661f96 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -551,3 +551,26 @@ select json_strip_nulls('[1,{"a":1,"b":null,"c":2},3]');
 
 -- an empty object is not null and should not be stripped
 select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }');
+
+-- json to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+
+-- json to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+
+-- ts_vector corner cases
+select to_tsvector('""'::json);
+select to_tsvector('{}'::json);
+select to_tsvector('[]'::json);
+select to_tsvector('null'::json);
+
+-- ts_headline for json
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+
+-- corner cases for ts_headline with json
+select ts_headline('null'::json, tsquery('aaa & bbb'));
+select ts_headline('{}'::json, tsquery('aaa & bbb'));
+select ts_headline('[]'::json, tsquery('aaa & bbb'));
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index e2eaca0..2e31ee6 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -878,3 +878,26 @@ select jsonb_insert('{"a": {"b": "value"}}', '{a, c}', '"new_value"', true);
 
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"');
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
+
+-- jsonb to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+
+-- jsonb to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+
+-- ts_vector corner cases
+select to_tsvector('""'::jsonb);
+select to_tsvector('{}'::jsonb);
+select to_tsvector('[]'::jsonb);
+select to_tsvector('null'::jsonb);
+
+-- ts_headline for jsonb
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+
+-- corner cases for ts_headline with jsonb
+select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
+select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
+select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));

jsonb_fts_support_v2.patchtext/x-patch; charset=US-ASCII; name=jsonb_fts_support_v2.patchDownload

diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 6a7aab2..8fbe08d 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -42,6 +42,8 @@
 #define JB_PATH_CREATE_OR_INSERT \
 	(JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER | JB_PATH_CREATE)
 
+#define is_jsonb_data(type) (type == WJB_KEY || type == WJB_VALUE || type == WJB_ELEM)
+
 /* state for json_object_keys */
 typedef struct OkeysState
 {
@@ -52,6 +54,23 @@ typedef struct OkeysState
 	int			sent_count;
 } OkeysState;
 
+/* state for iterate_json function */
+typedef struct IterateJsonState
+{
+	JsonLexContext		*lex;
+	JsonIterateAction	action;			/* an action that will be applied to each json value */
+	void				*action_state;	/* any necessary context for iteration */
+} IterateJsonState;
+
+/* state for transform_json function */
+typedef struct TransformJsonState
+{
+	JsonLexContext		*lex;
+	StringInfo			strval;			/* resulting json */
+	JsonTransformAction	action;			/* an action that will be applied to each json value */
+	void				*action_state;	/* any necessary context for transformation */
+} TransformJsonState;
+
 /* state for json_get* functions */
 typedef struct GetState
 {
@@ -271,6 +290,18 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems,
 			 int level, Jsonb *newval, uint32 nelems, int op_type);
 static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
 
+/* function supporting iterate_json(b) */
+static void apply_action(void *state, char *token, JsonTokenType tokentype);
+
+/* function supporting transform_json(b) */
+static void transform_object_start(void *state);
+static void transform_object_end(void *state);
+static void transform_array_start(void *state);
+static void transform_array_end(void *state);
+static void transform_object_field_start(void *state, char *fname, bool isnull);
+static void transform_array_element_start(void *state, bool isnull);
+static void transform_scalar(void *state, char *token, JsonTokenType tokentype);
+
 
 /*
  * SQL function json_object_keys
@@ -4130,3 +4161,202 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
 		}
 	}
 }
+
+/*
+ * Iterate over jsonb string values or elements, and pass them together with
+ * an iteration state to a specified JsonIterateAction.
+ */
+void
+iterate_jsonb_values(Jsonb *jb, void *state, JsonIterateAction action)
+{
+	JsonbIterator		*it;
+	JsonbValue			v;
+	JsonbIteratorToken	type;
+
+	it = JsonbIteratorInit(&jb->root);
+
+	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+		{
+			action(state, v.val.string.val, v.val.string.len);
+		}
+	}
+}
+
+/*
+ * Iterate over json string values or elements, and pass them together with an
+ * iteration state to a specified JsonIterateAction.
+ */
+void
+iterate_json_values(text *json, void *action_state, JsonIterateAction action)
+{
+	JsonLexContext *lex = makeJsonLexContext(json, true);
+	JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
+	IterateJsonState   *state = palloc0(sizeof(IterateJsonState));
+
+	state->lex = lex;
+	state->action = action;
+	state->action_state = action_state;
+
+	sem->semstate = (void *) state;
+	sem->scalar = apply_action;
+
+	pg_parse_json(lex, sem);
+}
+
+/*
+ * An auxiliary function for iterate_json_values to invoke a specified
+ * JsonIterateAction.
+ */
+static void
+apply_action(void *state, char *token, JsonTokenType tokentype)
+{
+	IterateJsonState   *_state = (IterateJsonState *) state;
+	if (tokentype == JSON_TOKEN_STRING)
+		(*_state->action) (_state->action_state, token, strlen(token));
+}
+
+/*
+ * Iterate over a jsonb, and apply a specified JsonTransformAction to every
+ * string value or element. Any necessary context for a JsonTransformAction can
+ * be passed in the action_state variable. Function returns a copy of an original jsonb
+ * object with transformed values.
+ */
+Jsonb *
+transform_jsonb_values(Jsonb *jsonb, void *action_state, JsonTransformAction transform_action)
+{
+	JsonbIterator		*it;
+	JsonbValue			v, *res = NULL;
+	JsonbIteratorToken	type;
+	JsonbParseState		*st = NULL;
+	text				*out;
+	bool				is_scalar = false;
+
+	it = JsonbIteratorInit(&jsonb->root);
+	is_scalar = it->isScalar;
+
+	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+		{
+			out = transform_action(action_state, v.val.string.val, v.val.string.len);
+			v.val.string.val = VARDATA_ANY(out);
+			v.val.string.len = VARSIZE_ANY_EXHDR(out);
+			res = pushJsonbValue(&st, type, type < WJB_BEGIN_ARRAY ? &v : NULL);
+		}
+		else
+		{
+			res = pushJsonbValue(&st, type, is_jsonb_data(type) ? &v : NULL);
+		}
+	}
+
+	if (res->type == jbvArray)
+		res->val.array.rawScalar = is_scalar;
+
+	return JsonbValueToJsonb(res);
+}
+
+/*
+ * Iterate over a json, and apply a specified JsonTransformAction to every
+ * string value or element. Any necessary context for a JsonTransformAction can
+ * be passed in the action_state variable. Function returns a StringInfo, which
+ * is a copy of an original json with transformed values.
+ */
+text *
+transform_json_values(text *json, void *action_state, JsonTransformAction transform_action)
+{
+	JsonLexContext *lex = makeJsonLexContext(json, true);
+	JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
+	TransformJsonState *state = palloc0(sizeof(TransformJsonState));
+
+	state->lex = lex;
+	state->strval = makeStringInfo();
+	state->action = transform_action;
+	state->action_state = action_state;
+
+	sem->semstate = (void *) state;
+	sem->scalar = transform_scalar;
+	sem->object_start = transform_object_start;
+	sem->object_end = transform_object_end;
+	sem->array_start = transform_array_start;
+	sem->array_end = transform_array_end;
+	sem->scalar = transform_scalar;
+	sem->array_element_start = transform_array_element_start;
+	sem->object_field_start = transform_object_field_start;
+
+	pg_parse_json(lex, sem);
+
+	return cstring_to_text_with_len(state->strval->data, state->strval->len);
+}
+
+/*
+ * Set of auxiliary functions for transform_json to invoke a specified
+ * JsonTransformAction for all values and left everything else untouched.
+ */
+static void
+transform_object_start(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, '{');
+}
+
+static void
+transform_object_end(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, '}');
+}
+
+static void
+transform_array_start(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, '[');
+}
+
+static void
+transform_array_end(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, ']');
+}
+
+static void
+transform_object_field_start(void *state, char *fname, bool isnull)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+
+	if (_state->strval->data[_state->strval->len - 1] != '{')
+		appendStringInfoCharMacro(_state->strval, ',');
+
+	/*
+	 * Unfortunately we don't have the quoted and escaped string any more, so
+	 * we have to re-escape it.
+	 */
+	escape_json(_state->strval, fname);
+	appendStringInfoCharMacro(_state->strval, ':');
+}
+
+static void
+transform_array_element_start(void *state, bool isnull)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+
+	if (_state->strval->data[_state->strval->len - 1] != '[')
+		appendStringInfoCharMacro(_state->strval, ',');
+}
+
+static void
+transform_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+
+	if (tokentype == JSON_TOKEN_STRING)
+	{
+		text *out = (*_state->action) (_state->action_state, token, strlen(token));
+		escape_json(_state->strval, text_to_cstring(out));
+	}
+	else
+		appendStringInfoString(_state->strval, token);
+}
diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h
index 411e158..2d0ee19 100644
--- a/src/include/utils/jsonb.h
+++ b/src/include/utils/jsonb.h
@@ -351,6 +351,12 @@ typedef struct JsonbIterator
 	struct JsonbIterator *parent;
 } JsonbIterator;
 
+/* an action that will be applied to each value in iterate_json(b) functions */
+typedef void (*JsonIterateAction) (void *state, char *elem_value, int elem_len);
+
+/* an action that will be applied to each value in transform_json(b) functions */
+typedef text * (*JsonTransformAction) (void *state, char *elem_value, int elem_len);
+
 
 /* Support functions */
 extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
@@ -377,5 +383,12 @@ extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
 extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
 					 int estimated_len);
 
+extern void iterate_jsonb_values(Jsonb *jb, void *state, JsonIterateAction action);
+extern void iterate_json_values(text *json, void *action_state, JsonIterateAction action);
+extern Jsonb *transform_jsonb_values(Jsonb *jsonb, void *action_state,
+						JsonTransformAction transform_action);
+extern text *transform_json_values(text *json, void *action_state,
+							JsonTransformAction transform_action);
+
 
 #endif   /* __JSONB_H__ */

Andrew Dunstan

andrew.dunstan@2ndquadrant.com

almost 9 years ago

In reply to: Dmitry Dolgov (#7)

Re: [PATCH] few fts functions for jsonb

On 26 March 2017 at 17:57, Dmitry Dolgov <9erthalion6@gmail.com> wrote:

I'm not through looking at this. However, here are a few preliminary
comments

I've attached new versions of the patches with improvements related to these
commentaries.

These patches seem fundamentally OK. But I'm still not happy with the
naming etc.

I think the header changes would probably be better placed in
jsonapi.h or in a new header file.

And the names still seem too general to me. e.g. transform_json_values
should probably be transform_json_string_values, and the static
support functions should be renamed to match. Also the
JsonIterateAction and JsonTransformAction funtion typedefs should
probably be renamed to match.

I'm not sure there is any great point in the is_jsonb_data macro,
which is only used in one spot. I would get rid of it and expand the
test in place.

I don't have much time this week to work on it, as there are one or
two other patches I also want to look at. If you clean these things
up I will commit it. The second patch looks fine.

cheers

andrew

--
Andrew Dunstan https://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Dmitry Dolgov

9erthalion6@gmail.com

almost 9 years ago

In reply to: Andrew Dunstan (#8)

2 attachment(s)

Re: [PATCH] few fts functions for jsonb

On 29 March 2017 at 18:28, Andrew Dunstan <andrew.dunstan@2ndquadrant.com>

wrote:

These patches seem fundamentally OK. But I'm still not happy with the
naming etc.

I've changed names for all functions and action definitions, moved out the
changes in header file to `jsonapi.h` and removed `is_jsonb_data` macro. So
it
should be better now.

Attachments:

jsonb_fts_support_v3.patchtext/x-patch; charset=US-ASCII; name=jsonb_fts_support_v3.patchDownload

diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 6a7aab2..c9f86b0 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -52,6 +52,25 @@ typedef struct OkeysState
 	int			sent_count;
 } OkeysState;
 
+/* state for iterate_json function */
+typedef struct IterateJsonState
+{
+	JsonLexContext					*lex;
+	JsonIterateStringValuesAction	action;			/* an action that will be applied
+													   to each json value */
+	void							*action_state;	/* any necessary context for iteration */
+} IterateJsonState;
+
+/* state for transform_json function */
+typedef struct TransformJsonState
+{
+	JsonLexContext					*lex;
+	StringInfo						strval;			/* resulting json */
+	JsonTransformStringValuesAction	action;			/* an action that will be applied
+													   to each json value */
+	void							*action_state;	/* any necessary context for transformation */
+} TransformJsonState;
+
 /* state for json_get* functions */
 typedef struct GetState
 {
@@ -271,6 +290,18 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems,
 			 int level, Jsonb *newval, uint32 nelems, int op_type);
 static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
 
+/* function supporting iterate_json(b) */
+static void iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
+
+/* function supporting transform_json(b) */
+static void transform_string_values_object_start(void *state);
+static void transform_string_values_object_end(void *state);
+static void transform_string_values_array_start(void *state);
+static void transform_string_values_array_end(void *state);
+static void transform_string_values_object_field_start(void *state, char *fname, bool isnull);
+static void transform_string_values_array_element_start(void *state, bool isnull);
+static void transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
+
 
 /*
  * SQL function json_object_keys
@@ -4130,3 +4161,208 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
 		}
 	}
 }
+
+/*
+ * Iterate over jsonb string values or elements, and pass them together with an
+ * iteration state to a specified JsonIterateStringValuesAction.
+ */
+void
+iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action)
+{
+	JsonbIterator		*it;
+	JsonbValue			v;
+	JsonbIteratorToken	type;
+
+	it = JsonbIteratorInit(&jb->root);
+
+	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+		{
+			action(state, v.val.string.val, v.val.string.len);
+		}
+	}
+}
+
+/*
+ * Iterate over json string values or elements, and pass them together with an
+ * iteration state to a specified JsonIterateStringValuesAction.
+ */
+void
+iterate_json_string_values(text *json, void *action_state, JsonIterateStringValuesAction action)
+{
+	JsonLexContext *lex = makeJsonLexContext(json, true);
+	JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
+	IterateJsonState   *state = palloc0(sizeof(IterateJsonState));
+
+	state->lex = lex;
+	state->action = action;
+	state->action_state = action_state;
+
+	sem->semstate = (void *) state;
+	sem->scalar = iterate_string_values_scalar;
+
+	pg_parse_json(lex, sem);
+}
+
+/*
+ * An auxiliary function for iterate_json_string_values to invoke a specified
+ * JsonIterateStringValuesAction.
+ */
+static void
+iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+	IterateJsonState   *_state = (IterateJsonState *) state;
+	if (tokentype == JSON_TOKEN_STRING)
+		(*_state->action) (_state->action_state, token, strlen(token));
+}
+
+/*
+ * Iterate over a jsonb, and apply a specified JsonTransformStringValuesAction
+ * to every string value or element. Any necessary context for a
+ * JsonTransformStringValuesAction can be passed in the action_state variable.
+ * Function returns a copy of an original jsonb object with transformed values.
+ */
+Jsonb *
+transform_jsonb_string_values(Jsonb *jsonb, void *action_state,
+							  JsonTransformStringValuesAction transform_action)
+{
+	JsonbIterator		*it;
+	JsonbValue			v, *res = NULL;
+	JsonbIteratorToken	type;
+	JsonbParseState		*st = NULL;
+	text				*out;
+	bool				is_scalar = false;
+
+	it = JsonbIteratorInit(&jsonb->root);
+	is_scalar = it->isScalar;
+
+	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+		{
+			out = transform_action(action_state, v.val.string.val, v.val.string.len);
+			v.val.string.val = VARDATA_ANY(out);
+			v.val.string.len = VARSIZE_ANY_EXHDR(out);
+			res = pushJsonbValue(&st, type, type < WJB_BEGIN_ARRAY ? &v : NULL);
+		}
+		else
+		{
+			res = pushJsonbValue(&st, type, (type == WJB_KEY ||
+											 type == WJB_VALUE ||
+											 type == WJB_ELEM) ? &v : NULL);
+		}
+	}
+
+	if (res->type == jbvArray)
+		res->val.array.rawScalar = is_scalar;
+
+	return JsonbValueToJsonb(res);
+}
+
+/*
+ * Iterate over a json, and apply a specified JsonTransformStringValuesAction
+ * to every string value or element. Any necessary context for a
+ * JsonTransformStringValuesAction can be passed in the action_state variable.
+ * Function returns a StringInfo, which is a copy of an original json with
+ * transformed values.
+ */
+text *
+transform_json_string_values(text *json, void *action_state,
+							 JsonTransformStringValuesAction transform_action)
+{
+	JsonLexContext *lex = makeJsonLexContext(json, true);
+	JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
+	TransformJsonState *state = palloc0(sizeof(TransformJsonState));
+
+	state->lex = lex;
+	state->strval = makeStringInfo();
+	state->action = transform_action;
+	state->action_state = action_state;
+
+	sem->semstate = (void *) state;
+	sem->scalar = transform_string_values_scalar;
+	sem->object_start = transform_string_values_object_start;
+	sem->object_end = transform_string_values_object_end;
+	sem->array_start = transform_string_values_array_start;
+	sem->array_end = transform_string_values_array_end;
+	sem->scalar = transform_string_values_scalar;
+	sem->array_element_start = transform_string_values_array_element_start;
+	sem->object_field_start = transform_string_values_object_field_start;
+
+	pg_parse_json(lex, sem);
+
+	return cstring_to_text_with_len(state->strval->data, state->strval->len);
+}
+
+/*
+ * Set of auxiliary functions for transform_json_string_values to invoke a
+ * specified JsonTransformStringValuesAction for all values and left everything
+ * else untouched.
+ */
+static void
+transform_string_values_object_start(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, '{');
+}
+
+static void
+transform_string_values_object_end(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, '}');
+}
+
+static void
+transform_string_values_array_start(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, '[');
+}
+
+static void
+transform_string_values_array_end(void *state)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+	appendStringInfoCharMacro(_state->strval, ']');
+}
+
+static void
+transform_string_values_object_field_start(void *state, char *fname, bool isnull)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+
+	if (_state->strval->data[_state->strval->len - 1] != '{')
+		appendStringInfoCharMacro(_state->strval, ',');
+
+	/*
+	 * Unfortunately we don't have the quoted and escaped string any more, so
+	 * we have to re-escape it.
+	 */
+	escape_json(_state->strval, fname);
+	appendStringInfoCharMacro(_state->strval, ':');
+}
+
+static void
+transform_string_values_array_element_start(void *state, bool isnull)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+
+	if (_state->strval->data[_state->strval->len - 1] != '[')
+		appendStringInfoCharMacro(_state->strval, ',');
+}
+
+static void
+transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+	TransformJsonState *_state = (TransformJsonState *) state;
+
+	if (tokentype == JSON_TOKEN_STRING)
+	{
+		text *out = (*_state->action) (_state->action_state, token, strlen(token));
+		escape_json(_state->strval, text_to_cstring(out));
+	}
+	else
+		appendStringInfoString(_state->strval, token);
+}
diff --git a/src/include/utils/jsonapi.h b/src/include/utils/jsonapi.h
index 6962f1a..a6b8fbd 100644
--- a/src/include/utils/jsonapi.h
+++ b/src/include/utils/jsonapi.h
@@ -14,6 +14,7 @@
 #ifndef JSONAPI_H
 #define JSONAPI_H
 
+#include "jsonb.h"
 #include "lib/stringinfo.h"
 
 typedef enum
@@ -131,4 +132,19 @@ extern JsonLexContext *makeJsonLexContextCstringLen(char *json,
  */
 extern bool IsValidJsonNumber(const char *str, int len);
 
+/* an action that will be applied to each value in iterate_json(b)_string_vaues functions */
+typedef void (*JsonIterateStringValuesAction) (void *state, char *elem_value, int elem_len);
+
+/* an action that will be applied to each value in transform_json(b)_string_values functions */
+typedef text * (*JsonTransformStringValuesAction) (void *state, char *elem_value, int elem_len);
+
+extern void iterate_jsonb_string_values(Jsonb *jb, void *state,
+										JsonIterateStringValuesAction action);
+extern void iterate_json_string_values(text *json, void *action_state,
+									   JsonIterateStringValuesAction action);
+extern Jsonb *transform_jsonb_string_values(Jsonb *jsonb, void *action_state,
+											JsonTransformStringValuesAction transform_action);
+extern text *transform_json_string_values(text *json, void *action_state,
+										  JsonTransformStringValuesAction transform_action);
+
 #endif   /* JSONAPI_H */

jsonb_fts_functions_v3.patchtext/x-patch; charset=US-ASCII; name=jsonb_fts_functions_v3.patchDownload

diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 6e5de8f..42034ab 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -16,6 +16,7 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonapi.h"
 
 
 typedef struct MorphOpaque
@@ -24,6 +25,14 @@ typedef struct MorphOpaque
 	int			qoperator;		/* query operator */
 } MorphOpaque;
 
+typedef struct TSVectorBuildState
+{
+	ParsedText	*prs;
+	TSVector	result;
+	Oid			cfgId;
+} TSVectorBuildState;
+
+static void add_to_tsvector(void *state, char *elem_value, int elem_len);
 
 Datum
 get_current_ts_config(PG_FUNCTION_ARGS)
@@ -256,6 +265,109 @@ to_tsvector(PG_FUNCTION_ARGS)
 										PointerGetDatum(in)));
 }
 
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+	Jsonb				*jb = PG_GETARG_JSONB(0);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = getTSCurrentConfig(true);
+	state.prs = prs;
+
+	iterate_jsonb_string_values(jb, &state, (JsonIterateStringValuesAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(jb, 1);
+
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in jsonb,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+Datum
+json_to_tsvector(PG_FUNCTION_ARGS)
+{
+	text				*json = PG_GETARG_TEXT_P(0);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = getTSCurrentConfig(true);
+	state.prs = prs;
+
+	iterate_json_string_values(json, &state, (JsonIterateStringValuesAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(json, 1);
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in json,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+/*
+ * Extend current TSVector from _state with a new one,
+ * build over a json(b) element.
+ */
+static void
+add_to_tsvector(void *_state, char *elem_value, int elem_len)
+{
+	TSVectorBuildState *state = (TSVectorBuildState *) _state;
+	ParsedText	*prs = state->prs;
+	TSVector	item_vector;
+	int			i;
+
+	prs->lenwords = elem_len / 6;
+	if (prs->lenwords == 0)
+		prs->lenwords = 2;
+
+	prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
+	prs->curwords = 0;
+	prs->pos = 0;
+
+	parsetext(state->cfgId, prs, elem_value, elem_len);
+
+	if (prs->curwords)
+	{
+		if (state->result != NULL)
+		{
+			for (i = 0; i < prs->curwords; i++)
+				prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP;
+
+			item_vector = make_tsvector(prs);
+
+			state->result = (TSVector) DirectFunctionCall2(tsvector_concat,
+									TSVectorGetDatum(state->result),
+									PointerGetDatum(item_vector));
+		}
+		else
+			state->result = make_tsvector(prs);
+	}
+}
+
 /*
  * to_tsquery
  */
diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c
index 8ca1c62..6e4e445 100644
--- a/src/backend/tsearch/wparser.c
+++ b/src/backend/tsearch/wparser.c
@@ -20,6 +20,7 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonapi.h"
 #include "utils/varlena.h"
 
 
@@ -31,6 +32,19 @@ typedef struct
 	LexDescr   *list;
 } TSTokenTypeStorage;
 
+/* state for ts_headline_json_* */
+typedef struct HeadlineJsonState
+{
+	HeadlineParsedText *prs;
+	TSConfigCacheEntry *cfg;
+	TSParserCacheEntry *prsobj;
+	TSQuery				query;
+	List				*prsoptions;
+	bool				transformed;
+} HeadlineJsonState;
+
+static text * headline_json_value(void *_state, char *elem_value, int elem_len);
+
 static void
 tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
 {
@@ -362,3 +376,179 @@ ts_headline_opt(PG_FUNCTION_ARGS)
 										PG_GETARG_DATUM(1),
 										PG_GETARG_DATUM(2)));
 }
+
+Datum
+ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
+{
+	Jsonb			*out, *jb = PG_GETARG_JSONB(1);
+	TSQuery			query = PG_GETARG_TSQUERY(2);
+	text			*opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+	JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
+
+	HeadlineParsedText prs;
+	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+	memset(&prs, 0, sizeof(HeadlineParsedText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+	state->prs = &prs;
+	state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+	state->query = query;
+	if (opt)
+		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+	else
+		state->prsoptions = NIL;
+
+	if (!OidIsValid(state->prsobj->headlineOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+		   errmsg("text search parser does not support headline creation")));
+
+	out = transform_jsonb_string_values(jb, state, action);
+
+	PG_FREE_IF_COPY(jb, 1);
+	PG_FREE_IF_COPY(query, 2);
+	if (opt)
+		PG_FREE_IF_COPY(opt, 3);
+
+	pfree(prs.words);
+
+	if (state->transformed)
+	{
+		pfree(prs.startsel);
+		pfree(prs.stopsel);
+	}
+
+	PG_RETURN_JSONB(out);
+}
+
+Datum
+ts_headline_jsonb(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
+{
+	text				*json = PG_GETARG_TEXT_P(1);
+	TSQuery				query = PG_GETARG_TSQUERY(2);
+	text				*opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+	text				*out;
+	JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
+
+	HeadlineParsedText prs;
+	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+	memset(&prs, 0, sizeof(HeadlineParsedText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+	state->prs = &prs;
+	state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+	state->query = query;
+	if (opt)
+		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+	else
+		state->prsoptions = NIL;
+
+	if (!OidIsValid(state->prsobj->headlineOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+		   errmsg("text search parser does not support headline creation")));
+
+	out = transform_json_string_values(json, state, action);
+
+	PG_FREE_IF_COPY(json, 1);
+	PG_FREE_IF_COPY(query, 2);
+	if (opt)
+		PG_FREE_IF_COPY(opt, 3);
+	pfree(prs.words);
+
+	if (state->transformed)
+	{
+		pfree(prs.startsel);
+		pfree(prs.stopsel);
+	}
+
+	PG_RETURN_TEXT_P(out);
+}
+
+Datum
+ts_headline_json(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_json_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+
+/*
+ * Return headline in text from, generated from a json(b) element
+ */
+static text *
+headline_json_value(void *_state, char *elem_value, int elem_len)
+{
+	HeadlineJsonState *state = (HeadlineJsonState *) _state;
+
+	HeadlineParsedText *prs = state->prs;
+	TSConfigCacheEntry *cfg = state->cfg;
+	TSParserCacheEntry *prsobj = state->prsobj;
+	TSQuery	query = state->query;
+	List *prsoptions = state->prsoptions;
+
+	prs->curwords = 0;
+	hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
+	FunctionCall3(&(prsobj->prsheadline),
+				  PointerGetDatum(prs),
+				  PointerGetDatum(prsoptions),
+				  PointerGetDatum(query));
+
+	state->transformed = true;
+	return generateHeadline(prs);
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index a4cc86d..dd74bac 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4759,6 +4759,24 @@ DESCR("generate headline");
 DATA(insert OID = 3755 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 25 "25 3615" _null_ _null_ _null_ _null_ _null_ ts_headline _null_ _null_ _null_ ));
 DESCR("generate headline");
 
+DATA(insert OID = 4201 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 3802 "3734 3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid_opt _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4202 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 3802 "3734 3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4203 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 3802 "3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_opt _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4204 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3802 "3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+
+DATA(insert OID = 4205 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 114 "3734 114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid_opt _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4206 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 114 "3734 114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4207 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 114 "114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_opt _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4208 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 114 "114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+
 DATA(insert OID = 3745 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3614 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsvector_byid _null_ _null_ _null_ ));
 DESCR("transform to tsvector");
 DATA(insert OID = 3746 (  to_tsquery		PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsquery_byid _null_ _null_ _null_ ));
@@ -4775,6 +4793,10 @@ DATA(insert OID = 3751 (  plainto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f f t f s
 DESCR("transform to tsquery");
 DATA(insert OID = 5001 (  phraseto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
 DESCR("transform to tsquery");
+DATA(insert OID = 4209 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform jsonb to tsvector");
+DATA(insert OID = 4210 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform json to tsvector");
 
 DATA(insert OID = 3752 (  tsvector_update_trigger			PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
 DESCR("trigger for automatic update of tsvector column");
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index 155650c..873e2e1 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -86,6 +86,15 @@ typedef struct
 #define MAXNUMPOS	(256)
 #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
 
+/*
+ * In case if a TSVector contains several parts and we want to treat them as
+ * separate, it's necessary to add an artificial increment to position of each
+ * lexeme from every next part. It's required to avoid the situation when
+ * tsquery can find a phrase consisting of lexemes from two of such parts.
+ * TS_JUMP defined a value of this increment.
+ */
+#define TS_JUMP 1
+
 /* This struct represents a complete tsvector datum */
 typedef struct
 {
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index 1bb8768..046ead3 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -1674,3 +1674,86 @@ select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }');
  {"a":{},"d":{}}
 (1 row)
 
+-- json to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- json to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+                                to_tsvector                                 
+----------------------------------------------------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
+(1 row)
+
+-- ts_vector corner cases
+select to_tsvector('""'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('{}'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('[]'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('null'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+-- ts_headline for json
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+                                               ts_headline                                               
+---------------------------------------------------------------------------------------------------------
+ {"a":"aaa <b>bbb</b>","b":{"c":"ccc <b>ddd</b> fff","c1":"ccc1 ddd1"},"d":["ggg <b>hhh</b>","iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+                                      ts_headline                                       
+----------------------------------------------------------------------------------------
+ {"a":"aaa <b>bbb</b>","b":{"c":"ccc <b>ddd</b> fff"},"d":["ggg <b>hhh</b>","iii jjj"]}
+(1 row)
+
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                       ts_headline                                        
+------------------------------------------------------------------------------------------
+ {"a":"aaa <bbb>","b":{"c":"ccc <ddd> fff","c1":"ccc1 ddd1"},"d":["ggg <hhh>","iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                       ts_headline                                        
+------------------------------------------------------------------------------------------
+ {"a":"aaa <bbb>","b":{"c":"ccc <ddd> fff","c1":"ccc1 ddd1"},"d":["ggg <hhh>","iii jjj"]}
+(1 row)
+
+-- corner cases for ts_headline with json
+select ts_headline('null'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ null
+(1 row)
+
+select ts_headline('{}'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ {}
+(1 row)
+
+select ts_headline('[]'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ []
+(1 row)
+
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index 8ec4150..7cbbcfc 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -3474,3 +3474,86 @@ HINT:  Try using the function jsonb_set to replace key value.
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
 ERROR:  cannot replace existing key
 HINT:  Try using the function jsonb_set to replace key value.
+-- jsonb to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- jsonb to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+                                to_tsvector                                 
+----------------------------------------------------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
+(1 row)
+
+-- ts_vector corner cases
+select to_tsvector('""'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('{}'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('[]'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('null'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+-- ts_headline for jsonb
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+                                                   ts_headline                                                    
+------------------------------------------------------------------------------------------------------------------
+ {"a": "aaa <b>bbb</b>", "b": {"c": "ccc <b>ddd</b> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <b>hhh</b>", "iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+                                          ts_headline                                          
+-----------------------------------------------------------------------------------------------
+ {"a": "aaa <b>bbb</b>", "b": {"c": "ccc <b>ddd</b> fff"}, "d": ["ggg <b>hhh</b>", "iii jjj"]}
+(1 row)
+
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                            ts_headline                                            
+---------------------------------------------------------------------------------------------------
+ {"a": "aaa <bbb>", "b": {"c": "ccc <ddd> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <hhh>", "iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                            ts_headline                                            
+---------------------------------------------------------------------------------------------------
+ {"a": "aaa <bbb>", "b": {"c": "ccc <ddd> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <hhh>", "iii jjj"]}
+(1 row)
+
+-- corner cases for ts_headline with jsonb
+select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ null
+(1 row)
+
+select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ {}
+(1 row)
+
+select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ []
+(1 row)
+
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 5e61922..e661f96 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -551,3 +551,26 @@ select json_strip_nulls('[1,{"a":1,"b":null,"c":2},3]');
 
 -- an empty object is not null and should not be stripped
 select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }');
+
+-- json to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+
+-- json to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+
+-- ts_vector corner cases
+select to_tsvector('""'::json);
+select to_tsvector('{}'::json);
+select to_tsvector('[]'::json);
+select to_tsvector('null'::json);
+
+-- ts_headline for json
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+
+-- corner cases for ts_headline with json
+select ts_headline('null'::json, tsquery('aaa & bbb'));
+select ts_headline('{}'::json, tsquery('aaa & bbb'));
+select ts_headline('[]'::json, tsquery('aaa & bbb'));
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index e2eaca0..2e31ee6 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -878,3 +878,26 @@ select jsonb_insert('{"a": {"b": "value"}}', '{a, c}', '"new_value"', true);
 
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"');
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
+
+-- jsonb to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+
+-- jsonb to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+
+-- ts_vector corner cases
+select to_tsvector('""'::jsonb);
+select to_tsvector('{}'::jsonb);
+select to_tsvector('[]'::jsonb);
+select to_tsvector('null'::jsonb);
+
+-- ts_headline for jsonb
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+
+-- corner cases for ts_headline with jsonb
+select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
+select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
+select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));

#10

Andrew Dunstan

andrew.dunstan@2ndquadrant.com

almost 9 years ago

In reply to: Dmitry Dolgov (#9)

Re: [PATCH] few fts functions for jsonb

On 29 March 2017 at 16:19, Dmitry Dolgov <9erthalion6@gmail.com> wrote:

On 29 March 2017 at 18:28, Andrew Dunstan <andrew.dunstan@2ndquadrant.com>
wrote:

These patches seem fundamentally OK. But I'm still not happy with the
naming etc.

I've changed names for all functions and action definitions, moved out the
changes in header file to `jsonapi.h` and removed `is_jsonb_data` macro. So
it
should be better now.

I have just noticed as I was writing/testing the non-existent docs for
this patch that it doesn't supply variants of to_tsvector that take a
regconfig as the first argument. Is there a reason for that? Why
should the json(b) versions be different from the text versions?

cheers

andrew

--
Andrew Dunstan https://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#11

Dmitry Dolgov

9erthalion6@gmail.com

almost 9 years ago

In reply to: Andrew Dunstan (#10)

1 attachment(s)

Re: [PATCH] few fts functions for jsonb

On 31 March 2017 at 00:01, Andrew Dunstan <andrew.dunstan@2ndquadrant.com>
wrote:

I have just noticed as I was writing/testing the non-existent docs for
this patch that it doesn't supply variants of to_tsvector that take a
regconfig as the first argument. Is there a reason for that? Why
should the json(b) versions be different from the text versions?

No, there is no reason, I just missed that. Here is a new version of the
patch (only the functions part)
to add those variants.

Attachments:

jsonb_fts_functions_v4.patchtext/x-patch; charset=US-ASCII; name=jsonb_fts_functions_v4.patchDownload

diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 6e5de8f..f19383e 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -16,6 +16,7 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonapi.h"
 
 
 typedef struct MorphOpaque
@@ -24,6 +25,14 @@ typedef struct MorphOpaque
 	int			qoperator;		/* query operator */
 } MorphOpaque;
 
+typedef struct TSVectorBuildState
+{
+	ParsedText	*prs;
+	TSVector	result;
+	Oid			cfgId;
+} TSVectorBuildState;
+
+static void add_to_tsvector(void *state, char *elem_value, int elem_len);
 
 Datum
 get_current_ts_config(PG_FUNCTION_ARGS)
@@ -256,6 +265,135 @@ to_tsvector(PG_FUNCTION_ARGS)
 										PointerGetDatum(in)));
 }
 
+Datum
+jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+	Oid					cfgId = PG_GETARG_OID(0);
+	Jsonb				*jb = PG_GETARG_JSONB(1);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = cfgId;
+	state.prs = prs;
+
+	iterate_jsonb_string_values(jb, &state, (JsonIterateStringValuesAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(jb, 1);
+
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in jsonb,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+	Jsonb	*jb = PG_GETARG_JSONB(0);
+	Oid		cfgId;
+
+	cfgId = getTSCurrentConfig(true);
+	PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
+										ObjectIdGetDatum(cfgId),
+										JsonbGetDatum(jb)));
+}
+
+Datum
+json_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+	Oid					cfgId = PG_GETARG_OID(0);
+	text				*json = PG_GETARG_TEXT_P(1);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = cfgId;
+	state.prs = prs;
+
+	iterate_json_string_values(json, &state, (JsonIterateStringValuesAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(json, 1);
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in json,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+Datum
+json_to_tsvector(PG_FUNCTION_ARGS)
+{
+	text	*json = PG_GETARG_TEXT_P(0);
+	Oid		cfgId;
+
+	cfgId = getTSCurrentConfig(true);
+	PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
+										ObjectIdGetDatum(cfgId),
+										PointerGetDatum(json)));
+}
+
+/*
+ * Extend current TSVector from _state with a new one,
+ * build over a json(b) element.
+ */
+static void
+add_to_tsvector(void *_state, char *elem_value, int elem_len)
+{
+	TSVectorBuildState *state = (TSVectorBuildState *) _state;
+	ParsedText	*prs = state->prs;
+	TSVector	item_vector;
+	int			i;
+
+	prs->lenwords = elem_len / 6;
+	if (prs->lenwords == 0)
+		prs->lenwords = 2;
+
+	prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
+	prs->curwords = 0;
+	prs->pos = 0;
+
+	parsetext(state->cfgId, prs, elem_value, elem_len);
+
+	if (prs->curwords)
+	{
+		if (state->result != NULL)
+		{
+			for (i = 0; i < prs->curwords; i++)
+				prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP;
+
+			item_vector = make_tsvector(prs);
+
+			state->result = (TSVector) DirectFunctionCall2(tsvector_concat,
+									TSVectorGetDatum(state->result),
+									PointerGetDatum(item_vector));
+		}
+		else
+			state->result = make_tsvector(prs);
+	}
+}
+
 /*
  * to_tsquery
  */
diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c
index 8ca1c62..6e4e445 100644
--- a/src/backend/tsearch/wparser.c
+++ b/src/backend/tsearch/wparser.c
@@ -20,6 +20,7 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonapi.h"
 #include "utils/varlena.h"
 
 
@@ -31,6 +32,19 @@ typedef struct
 	LexDescr   *list;
 } TSTokenTypeStorage;
 
+/* state for ts_headline_json_* */
+typedef struct HeadlineJsonState
+{
+	HeadlineParsedText *prs;
+	TSConfigCacheEntry *cfg;
+	TSParserCacheEntry *prsobj;
+	TSQuery				query;
+	List				*prsoptions;
+	bool				transformed;
+} HeadlineJsonState;
+
+static text * headline_json_value(void *_state, char *elem_value, int elem_len);
+
 static void
 tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
 {
@@ -362,3 +376,179 @@ ts_headline_opt(PG_FUNCTION_ARGS)
 										PG_GETARG_DATUM(1),
 										PG_GETARG_DATUM(2)));
 }
+
+Datum
+ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
+{
+	Jsonb			*out, *jb = PG_GETARG_JSONB(1);
+	TSQuery			query = PG_GETARG_TSQUERY(2);
+	text			*opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+	JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
+
+	HeadlineParsedText prs;
+	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+	memset(&prs, 0, sizeof(HeadlineParsedText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+	state->prs = &prs;
+	state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+	state->query = query;
+	if (opt)
+		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+	else
+		state->prsoptions = NIL;
+
+	if (!OidIsValid(state->prsobj->headlineOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+		   errmsg("text search parser does not support headline creation")));
+
+	out = transform_jsonb_string_values(jb, state, action);
+
+	PG_FREE_IF_COPY(jb, 1);
+	PG_FREE_IF_COPY(query, 2);
+	if (opt)
+		PG_FREE_IF_COPY(opt, 3);
+
+	pfree(prs.words);
+
+	if (state->transformed)
+	{
+		pfree(prs.startsel);
+		pfree(prs.stopsel);
+	}
+
+	PG_RETURN_JSONB(out);
+}
+
+Datum
+ts_headline_jsonb(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
+{
+	text				*json = PG_GETARG_TEXT_P(1);
+	TSQuery				query = PG_GETARG_TSQUERY(2);
+	text				*opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+	text				*out;
+	JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
+
+	HeadlineParsedText prs;
+	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+	memset(&prs, 0, sizeof(HeadlineParsedText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+	state->prs = &prs;
+	state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+	state->query = query;
+	if (opt)
+		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+	else
+		state->prsoptions = NIL;
+
+	if (!OidIsValid(state->prsobj->headlineOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+		   errmsg("text search parser does not support headline creation")));
+
+	out = transform_json_string_values(json, state, action);
+
+	PG_FREE_IF_COPY(json, 1);
+	PG_FREE_IF_COPY(query, 2);
+	if (opt)
+		PG_FREE_IF_COPY(opt, 3);
+	pfree(prs.words);
+
+	if (state->transformed)
+	{
+		pfree(prs.startsel);
+		pfree(prs.stopsel);
+	}
+
+	PG_RETURN_TEXT_P(out);
+}
+
+Datum
+ts_headline_json(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_json_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
+								  ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+
+/*
+ * Return headline in text from, generated from a json(b) element
+ */
+static text *
+headline_json_value(void *_state, char *elem_value, int elem_len)
+{
+	HeadlineJsonState *state = (HeadlineJsonState *) _state;
+
+	HeadlineParsedText *prs = state->prs;
+	TSConfigCacheEntry *cfg = state->cfg;
+	TSParserCacheEntry *prsobj = state->prsobj;
+	TSQuery	query = state->query;
+	List *prsoptions = state->prsoptions;
+
+	prs->curwords = 0;
+	hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
+	FunctionCall3(&(prsobj->prsheadline),
+				  PointerGetDatum(prs),
+				  PointerGetDatum(prsoptions),
+				  PointerGetDatum(query));
+
+	state->transformed = true;
+	return generateHeadline(prs);
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index a4cc86d..db3d869 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4759,6 +4759,24 @@ DESCR("generate headline");
 DATA(insert OID = 3755 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 25 "25 3615" _null_ _null_ _null_ _null_ _null_ ts_headline _null_ _null_ _null_ ));
 DESCR("generate headline");
 
+DATA(insert OID = 4201 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 3802 "3734 3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid_opt _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4202 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 3802 "3734 3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4203 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 3802 "3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_opt _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4204 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3802 "3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+
+DATA(insert OID = 4205 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 114 "3734 114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid_opt _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4206 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 114 "3734 114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4207 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 114 "114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_opt _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4208 (  ts_headline	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 114 "114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+
 DATA(insert OID = 3745 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3614 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsvector_byid _null_ _null_ _null_ ));
 DESCR("transform to tsvector");
 DATA(insert OID = 3746 (  to_tsquery		PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsquery_byid _null_ _null_ _null_ ));
@@ -4775,6 +4793,14 @@ DATA(insert OID = 3751 (  plainto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f f t f s
 DESCR("transform to tsquery");
 DATA(insert OID = 5001 (  phraseto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
 DESCR("transform to tsquery");
+DATA(insert OID = 4209 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform jsonb to tsvector");
+DATA(insert OID = 4210 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform json to tsvector");
+DATA(insert OID = 4211 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3614 "3734 3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform jsonb to tsvector");
+DATA(insert OID = 4212 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3614 "3734 114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform json to tsvector");
 
 DATA(insert OID = 3752 (  tsvector_update_trigger			PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
 DESCR("trigger for automatic update of tsvector column");
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index 155650c..873e2e1 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -86,6 +86,15 @@ typedef struct
 #define MAXNUMPOS	(256)
 #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
 
+/*
+ * In case if a TSVector contains several parts and we want to treat them as
+ * separate, it's necessary to add an artificial increment to position of each
+ * lexeme from every next part. It's required to avoid the situation when
+ * tsquery can find a phrase consisting of lexemes from two of such parts.
+ * TS_JUMP defined a value of this increment.
+ */
+#define TS_JUMP 1
+
 /* This struct represents a complete tsvector datum */
 typedef struct
 {
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index 1bb8768..47b2b6e 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -1674,3 +1674,93 @@ select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }');
  {"a":{},"d":{}}
 (1 row)
 
+-- json to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- json to tsvector with config
+select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- json to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+                                to_tsvector                                 
+----------------------------------------------------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
+(1 row)
+
+-- ts_vector corner cases
+select to_tsvector('""'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('{}'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('[]'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('null'::json);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+-- ts_headline for json
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+                                               ts_headline                                               
+---------------------------------------------------------------------------------------------------------
+ {"a":"aaa <b>bbb</b>","b":{"c":"ccc <b>ddd</b> fff","c1":"ccc1 ddd1"},"d":["ggg <b>hhh</b>","iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+                                      ts_headline                                       
+----------------------------------------------------------------------------------------
+ {"a":"aaa <b>bbb</b>","b":{"c":"ccc <b>ddd</b> fff"},"d":["ggg <b>hhh</b>","iii jjj"]}
+(1 row)
+
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                       ts_headline                                        
+------------------------------------------------------------------------------------------
+ {"a":"aaa <bbb>","b":{"c":"ccc <ddd> fff","c1":"ccc1 ddd1"},"d":["ggg <hhh>","iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                       ts_headline                                        
+------------------------------------------------------------------------------------------
+ {"a":"aaa <bbb>","b":{"c":"ccc <ddd> fff","c1":"ccc1 ddd1"},"d":["ggg <hhh>","iii jjj"]}
+(1 row)
+
+-- corner cases for ts_headline with json
+select ts_headline('null'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ null
+(1 row)
+
+select ts_headline('{}'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ {}
+(1 row)
+
+select ts_headline('[]'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ []
+(1 row)
+
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index 8ec4150..e72a950 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -3474,3 +3474,93 @@ HINT:  Try using the function jsonb_set to replace key value.
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
 ERROR:  cannot replace existing key
 HINT:  Try using the function jsonb_set to replace key value.
+-- jsonb to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- jsonb to tsvector with config
+select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- jsonb to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+                                to_tsvector                                 
+----------------------------------------------------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
+(1 row)
+
+-- ts_vector corner cases
+select to_tsvector('""'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('{}'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('[]'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+select to_tsvector('null'::jsonb);
+ to_tsvector 
+-------------
+ 
+(1 row)
+
+-- ts_headline for jsonb
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+                                                   ts_headline                                                    
+------------------------------------------------------------------------------------------------------------------
+ {"a": "aaa <b>bbb</b>", "b": {"c": "ccc <b>ddd</b> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <b>hhh</b>", "iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+                                          ts_headline                                          
+-----------------------------------------------------------------------------------------------
+ {"a": "aaa <b>bbb</b>", "b": {"c": "ccc <b>ddd</b> fff"}, "d": ["ggg <b>hhh</b>", "iii jjj"]}
+(1 row)
+
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                            ts_headline                                            
+---------------------------------------------------------------------------------------------------
+ {"a": "aaa <bbb>", "b": {"c": "ccc <ddd> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <hhh>", "iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                            ts_headline                                            
+---------------------------------------------------------------------------------------------------
+ {"a": "aaa <bbb>", "b": {"c": "ccc <ddd> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <hhh>", "iii jjj"]}
+(1 row)
+
+-- corner cases for ts_headline with jsonb
+select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ null
+(1 row)
+
+select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ {}
+(1 row)
+
+select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ []
+(1 row)
+
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 5e61922..1acf4de 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -551,3 +551,29 @@ select json_strip_nulls('[1,{"a":1,"b":null,"c":2},3]');
 
 -- an empty object is not null and should not be stripped
 select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }');
+
+-- json to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+
+-- json to tsvector with config
+select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+
+-- json to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+
+-- ts_vector corner cases
+select to_tsvector('""'::json);
+select to_tsvector('{}'::json);
+select to_tsvector('[]'::json);
+select to_tsvector('null'::json);
+
+-- ts_headline for json
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+
+-- corner cases for ts_headline with json
+select ts_headline('null'::json, tsquery('aaa & bbb'));
+select ts_headline('{}'::json, tsquery('aaa & bbb'));
+select ts_headline('[]'::json, tsquery('aaa & bbb'));
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index e2eaca0..c9fa1fc 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -878,3 +878,29 @@ select jsonb_insert('{"a": {"b": "value"}}', '{a, c}', '"new_value"', true);
 
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"');
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
+
+-- jsonb to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+
+-- jsonb to tsvector with config
+select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+
+-- jsonb to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+
+-- ts_vector corner cases
+select to_tsvector('""'::jsonb);
+select to_tsvector('{}'::jsonb);
+select to_tsvector('[]'::jsonb);
+select to_tsvector('null'::jsonb);
+
+-- ts_headline for jsonb
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+
+-- corner cases for ts_headline with jsonb
+select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
+select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
+select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));

#12

Oleg Bartunov

obartunov@gmail.com

almost 9 years ago

In reply to: Dmitry Dolgov (#11)

Re: [PATCH] few fts functions for jsonb

On 30 Mar 2017 23:43, "Dmitry Dolgov" <9erthalion6@gmail.com> wrote:

On 31 March 2017 at 00:01, Andrew Dunstan <andrew.dunstan@2ndquadrant.com>
wrote:

I have just noticed as I was writing/testing the non-existent docs for
this patch that it doesn't supply variants of to_tsvector that take a
regconfig as the first argument. Is there a reason for that? Why
should the json(b) versions be different from the text versions?

No, there is no reason, I just missed that. Here is a new version of the
patch (only the functions part)
to add those variants.

Congratulations with patch committed, who will write an addition
documentation? I think we need to touch FTS and JSON parts.

#13

Andrew Dunstan

andrew.dunstan@2ndquadrant.com

almost 9 years ago

In reply to: Oleg Bartunov (#12)

Re: [PATCH] few fts functions for jsonb

On 03/31/2017 03:17 PM, Oleg Bartunov wrote:

On 30 Mar 2017 23:43, "Dmitry Dolgov" <9erthalion6@gmail.com
<mailto:9erthalion6@gmail.com>> wrote:

On 31 March 2017 at 00:01, Andrew Dunstan
<andrew.dunstan@2ndquadrant.com
<mailto:andrew.dunstan@2ndquadrant.com>> wrote:

I have just noticed as I was writing/testing the non-existent

docs for

this patch that it doesn't supply variants of to_tsvector that

take a

regconfig as the first argument. Is there a reason for that? Why
should the json(b) versions be different from the text versions?

No, there is no reason, I just missed that. Here is a new version
of the patch (only the functions part)
to add those variants.

Congratulations with patch committed, who will write an addition
documentation? I think we need to touch FTS and JSON parts.

I added documentation when I committed it for the new functions, in the
FTS section. I'm not sure what we need to add to the JSON section if
anything.

cheers

andrew

--
Andrew Dunstan https://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#14

Andres Freund

andres@anarazel.de

almost 9 years ago

In reply to: Andrew Dunstan (#13)

Re: [PATCH] few fts functions for jsonb

On 2017-04-01 16:20:46 -0400, Andrew Dunstan wrote:

On 03/31/2017 03:17 PM, Oleg Bartunov wrote:

On 30 Mar 2017 23:43, "Dmitry Dolgov" <9erthalion6@gmail.com
<mailto:9erthalion6@gmail.com>> wrote:

On 31 March 2017 at 00:01, Andrew Dunstan
<andrew.dunstan@2ndquadrant.com
<mailto:andrew.dunstan@2ndquadrant.com>> wrote:

I have just noticed as I was writing/testing the non-existent

docs for

this patch that it doesn't supply variants of to_tsvector that

take a

regconfig as the first argument. Is there a reason for that? Why
should the json(b) versions be different from the text versions?

No, there is no reason, I just missed that. Here is a new version
of the patch (only the functions part)
to add those variants.

Congratulations with patch committed, who will write an addition
documentation? I think we need to touch FTS and JSON parts.

I added documentation when I committed it for the new functions, in the
FTS section. I'm not sure what we need to add to the JSON section if
anything.

I see that the CF entry for this hasn't been marked as committed:
https://commitfest.postgresql.org/13/1054/
Is there anything left here?

- Andres

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#15

Sven R. Kunze

srkunze@mail.de

almost 9 years ago

In reply to: Andrew Dunstan (#13)

Re: [PATCH] few fts functions for jsonb

On 01.04.2017 22:20, Andrew Dunstan wrote:

I added documentation when I committed it for the new functions, in the
FTS section. I'm not sure what we need to add to the JSON section if
anything.

Not sure, if this is related but the formatting of
https://www.postgresql.org/docs/devel/static/functions-textsearch.html
looks a bit strange.

Just 2 questions/notes:
1) in what order are the values of the JSON extracted?

2) Regarding the additional line:
to_tsvector([ config regconfig , ] document json(b)) tsvector reduce
document text to tsvector to_tsvector('english', '{"a": "The Fat
Rats"}'::json) 'fat':2 'rat':3

Maybe change "reduce document text to tsvector" to "extracting JSON
values <in what order> and reduce to tsvector"?

Sven

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#16

Andrew Dunstan

andrew.dunstan@2ndquadrant.com

almost 9 years ago

In reply to: Andres Freund (#14)

Re: [PATCH] few fts functions for jsonb

On 04/03/2017 02:22 PM, Andres Freund wrote:

On 2017-04-01 16:20:46 -0400, Andrew Dunstan wrote:

On 03/31/2017 03:17 PM, Oleg Bartunov wrote:

On 30 Mar 2017 23:43, "Dmitry Dolgov" <9erthalion6@gmail.com
<mailto:9erthalion6@gmail.com>> wrote:

On 31 March 2017 at 00:01, Andrew Dunstan
<andrew.dunstan@2ndquadrant.com
<mailto:andrew.dunstan@2ndquadrant.com>> wrote:

I have just noticed as I was writing/testing the non-existent

docs for

this patch that it doesn't supply variants of to_tsvector that

take a

regconfig as the first argument. Is there a reason for that? Why
should the json(b) versions be different from the text versions?

No, there is no reason, I just missed that. Here is a new version
of the patch (only the functions part)
to add those variants.

Congratulations with patch committed, who will write an addition
documentation? I think we need to touch FTS and JSON parts.

I added documentation when I committed it for the new functions, in the
FTS section. I'm not sure what we need to add to the JSON section if
anything.

I see that the CF entry for this hasn't been marked as committed:
https://commitfest.postgresql.org/13/1054/
Is there anything left here?

Says "Status committed" for me. I fixed this in Sunday after Tom prodded me.

cheers

andrew

--
Andrew Dunstan https://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#17

Andrew Dunstan

andrew.dunstan@2ndquadrant.com

almost 9 years ago

In reply to: Sven R. Kunze (#15)

Re: [PATCH] few fts functions for jsonb

On 04/03/2017 02:44 PM, Sven R. Kunze wrote:

On 01.04.2017 22:20, Andrew Dunstan wrote:

I added documentation when I committed it for the new functions, in the
FTS section. I'm not sure what we need to add to the JSON section if
anything.

Not sure, if this is related but the formatting of
https://www.postgresql.org/docs/devel/static/functions-textsearch.html
looks a bit strange.

Just 2 questions/notes:
1) in what order are the values of the JSON extracted?

In the order they exist in the underlying document.

2) Regarding the additional line:
to_tsvector([ config regconfig , ] document json(b)) tsvector
reduce document text to tsvector to_tsvector('english', '{"a": "The
Fat Rats"}'::json) 'fat':2 'rat':3

Maybe change "reduce document text to tsvector" to "extracting JSON
values <in what order> and reduce to tsvector"?

OK, I will do something along those lines.

cheers

andrew

--
Andrew Dunstan https://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#18

Sven R. Kunze

srkunze@mail.de

almost 9 years ago

In reply to: Andrew Dunstan (#17)

Re: [PATCH] few fts functions for jsonb

On 03.04.2017 21:30, Andrew Dunstan wrote:

On 04/03/2017 02:44 PM, Sven R. Kunze wrote:

On 01.04.2017 22:20, Andrew Dunstan wrote:

I added documentation when I committed it for the new functions, in the
FTS section. I'm not sure what we need to add to the JSON section if
anything.

Not sure, if this is related but the formatting of
https://www.postgresql.org/docs/devel/static/functions-textsearch.html
looks a bit strange.

Just 2 questions/notes:
1) in what order are the values of the JSON extracted?

In the order they exist in the underlying document.

Just asking as the order can have implications for fulltext searches.
So, might be valuable for the docs.

Are these documents equally ordered in this sense?

srkunze=# select '{"a": "abc", "b": "def"}'::jsonb;
jsonb
--------------------------
{"a": "abc", "b": "def"}
(1 row)

srkunze=# select '{"b": "def", "a": "abc"}'::jsonb;
jsonb
--------------------------
{"a": "abc", "b": "def"}
(1 row)

Also what about non-ascii keys? Are they ordered by the default locale
of the PostgreSQL cluster (say de_DE.utf-8)?

2) Regarding the additional line:
to_tsvector([ config regconfig , ] document json(b)) tsvector
reduce document text to tsvector to_tsvector('english', '{"a": "The
Fat Rats"}'::json) 'fat':2 'rat':3

Maybe change "reduce document text to tsvector" to "extracting JSON
values <in what order> and reduce to tsvector"?

OK, I will do something along those lines.

cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#19

Andrew Dunstan

andrew.dunstan@2ndquadrant.com

almost 9 years ago

In reply to: Sven R. Kunze (#18)

Re: [PATCH] few fts functions for jsonb

On 04/03/2017 03:41 PM, Sven R. Kunze wrote:

On 03.04.2017 21:30, Andrew Dunstan wrote:

On 04/03/2017 02:44 PM, Sven R. Kunze wrote:

On 01.04.2017 22:20, Andrew Dunstan wrote:

I added documentation when I committed it for the new functions, in
the
FTS section. I'm not sure what we need to add to the JSON section if
anything.

Not sure, if this is related but the formatting of
https://www.postgresql.org/docs/devel/static/functions-textsearch.html
looks a bit strange.

Just 2 questions/notes:
1) in what order are the values of the JSON extracted?

In the order they exist in the underlying document.

Just asking as the order can have implications for fulltext searches.
So, might be valuable for the docs.

Are these documents equally ordered in this sense?

srkunze=# select '{"a": "abc", "b": "def"}'::jsonb;
jsonb
--------------------------
{"a": "abc", "b": "def"}
(1 row)

srkunze=# select '{"b": "def", "a": "abc"}'::jsonb;
jsonb
--------------------------
{"a": "abc", "b": "def"}
(1 row)

Yes, when converted to jsonb these two documents are identical.

Also what about non-ascii keys? Are they ordered by the default locale
of the PostgreSQL cluster (say de_DE.utf-8)?

Yes, I believe so.

cheers

andrew

--
Andrew Dunstan https://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers