? GNUmakefile
? config.log
? config.status
? contrib/tsearch2/rank.h
? src/Makefile.global
? src/include/pg_config.h
? src/include/stamp-h
? src/interfaces/ecpg/include/ecpg_config.h
Index: contrib/tsearch2/rank.c
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/tsearch2/rank.c,v
retrieving revision 1.23
diff -c -r1.23 rank.c
*** contrib/tsearch2/rank.c	27 Feb 2007 23:48:06 -0000	1.23
--- contrib/tsearch2/rank.c	12 Nov 2007 03:28:20 -0000
***************
*** 21,26 ****
--- 21,27 ----
  #include "tsvector.h"
  #include "query.h"
  #include "common.h"
+ #include "rank.h"
  
  PG_FUNCTION_INFO_V1(rank);
  Datum		rank(PG_FUNCTION_ARGS);
***************
*** 419,433 ****
  }
  
  
- typedef struct
- {
- 	ITEM	  **item;
- 	int16		nitem;
- 	bool		needfree;
- 	uint8		wclass;
- 	int32		pos;
- }	DocRepresentation;
- 
  static int
  compareDocR(const void *a, const void *b)
  {
--- 420,425 ----
***************
*** 457,473 ****
  	}
  }
  
! typedef struct
! {
! 	int			pos;
! 	int			p;
! 	int			q;
! 	DocRepresentation *begin;
! 	DocRepresentation *end;
! }	Extention;
! 
! 
! static bool
  Cover(DocRepresentation * doc, int len, QUERYTYPE * query, Extention * ext)
  {
  	DocRepresentation *ptr;
--- 449,455 ----
  	}
  }
  
! bool
  Cover(DocRepresentation * doc, int len, QUERYTYPE * query, Extention * ext)
  {
  	DocRepresentation *ptr;
***************
*** 538,544 ****
  	return Cover(doc, len, query, ext);
  }
  
! static DocRepresentation *
  get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
  {
  	ITEM	   *item = GETQUERY(query);
--- 520,526 ----
  	return Cover(doc, len, query, ext);
  }
  
! DocRepresentation *
  get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
  {
  	ITEM	   *item = GETQUERY(query);
Index: contrib/tsearch2/ts_cfg.c
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/tsearch2/ts_cfg.c,v
retrieving revision 1.24
diff -c -r1.24 ts_cfg.c
*** contrib/tsearch2/ts_cfg.c	6 Apr 2007 04:21:41 -0000	1.24
--- contrib/tsearch2/ts_cfg.c	12 Nov 2007 03:28:20 -0000
***************
*** 646,648 ****
--- 646,715 ----
  	ts_error(NOTICE, "TSearch cache cleaned");
  	PG_RETURN_VOID();
  }
+ 
+ void
+ add_cover_to_hl(WParserInfo *prsobj, HLPRSTEXT *prs, QUERYTYPE *query, LexizeData *ldata, int4 *currentpos, int4 startpos, int4 endpos)
+ {
+ 	char      *lemm = NULL;
+ 	int4      lenlemm = 0;
+    	ParsedLex *lexs;
+   	int4       type, startHL = 0;
+    	TSLexeme  *norms;
+    	char      *coversep = " ... ";
+    	int4      coverseplen = strlen(coversep);
+    	int4      oldnumwords, newnumwords, i;
+    	if (*currentpos > endpos)
+    	{
+     	/* XXX - something wrong ... we have gone past the cover */
+     	return;
+    	}
+    	/* see if we need to add a cover seperator */
+    	if (*currentpos < startpos && startpos > 0)
+    	{
+        	hladdword(prs, coversep, coverseplen, 3);
+        	prs->words[prs->curwords - 1].in = 1;
+    	}
+ 
+   	do
+    	{
+        	type = DatumGetInt32(FunctionCall3(
+         									&(prsobj->getlexeme_info),
+ 											PointerGetDatum(prsobj->prs),
+ 											PointerGetDatum(&lemm),
+ 				 							PointerGetDatum(&lenlemm)));
+ 		LexizeAddLemm(ldata, type, lemm, lenlemm);
+        	do 
+        	{
+ 			if ((norms = LexizeExec(ldata, &lexs)) != NULL)
+            	{
+             	TSLexeme *ptr = norms;
+                	*currentpos += 1;
+                	while(ptr->lexeme)
+                	{
+ 					if (ptr->flags & TSL_ADDPOS)
+ 						*currentpos += 1;
+ 					ptr++;
+   				}       
+ 			}
+ 			// start check
+ 			if (!startHL && *currentpos >= startpos)
+ 				startHL = 1;
+ 
+ 		 	if (startHL)
+ 			{
+ 				oldnumwords = prs->curwords;
+ 				addHLParsedLex(prs, query, lexs, norms);
+ 				newnumwords = prs->curwords;
+ 				for (i = oldnumwords; i < newnumwords; i++)
+ 				{
+ 					prs->words[i].in = 1;
+ 					if (prs->words[i].item)
+ 						prs->words[i].selected = 1;
+ 				}
+ 			}
+ 		} while(norms && *currentpos < endpos);
+ 		if (*currentpos >= endpos)
+ 			break;
+ 	} while (type > 0);
+ }
+ 
Index: contrib/tsearch2/tsearch.sql.in
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/tsearch2/tsearch.sql.in,v
retrieving revision 1.21
diff -c -r1.21 tsearch.sql.in
*** contrib/tsearch2/tsearch.sql.in	11 Nov 2007 03:25:34 -0000	1.21
--- contrib/tsearch2/tsearch.sql.in	12 Nov 2007 03:28:20 -0000
***************
*** 571,576 ****
--- 571,582 ----
  AS 'MODULE_PATHNAME', 'rank_cd_def'
  LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE;
  
+ CREATE FUNCTION headline_with_fragments(text, tsvector, text, tsquery, int4, int4, int4)
+ RETURNS text 
+ AS 'MODULE_PATHNAME', 'headline_with_fragments'
+ LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE;
+ 
+ 
  CREATE OR REPLACE FUNCTION headline(oid, text, tsquery, text)
  RETURNS text
  AS 'MODULE_PATHNAME', 'headline'
Index: contrib/tsearch2/wparser.c
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/tsearch2/wparser.c,v
retrieving revision 1.13
diff -c -r1.13 wparser.c
*** contrib/tsearch2/wparser.c	2 Apr 2007 11:42:04 -0000	1.13
--- contrib/tsearch2/wparser.c	12 Nov 2007 03:28:20 -0000
***************
*** 18,23 ****
--- 18,27 ----
  #include "snmap.h"
  #include "common.h"
  
+ #include "rank.h"
+ #include "tsvector.h"
+ #include "dict.h"
+ 
  /*********top interface**********/
  
  static Oid	current_parser_id = InvalidOid;
***************
*** 609,611 ****
--- 613,799 ----
  				(PG_NARGS() > 2) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
  										));
  }
+ 
+ 
+        
+ /* headline generation 
+  * Input:  parser, tsvector of a doc, doc, tsquery, min size of an excerpt, max size of an excerpt, maxWords in a headline
+  * Output: multiple excerpts of doc that contains query words
+  */
+ 
+ struct coverpos{
+   	int4 startpos;
+    	int4 endpos;
+    	int4 in;
+ };
+ 
+ PG_FUNCTION_INFO_V1(headline_with_fragments);
+ Datum      headline_with_fragments(PG_FUNCTION_ARGS);
+ 
+ Datum
+ headline_with_fragments(PG_FUNCTION_ARGS)
+ {
+    	DocRepresentation* doc;
+    	Extention          ext;
+    	char*              textdata;
+    	int4               coverlen, doclen, textlen;
+    	int4               startpos = 0, endpos = 0, currentpos = 0;
+    	int4               numWords = 0;
+    	TSCfgInfo*         cfg;
+    	WParserInfo*       prsobj;
+    	text*              out;
+    	LexizeData         ldata;
+    	HLPRSTEXT          prs;
+   	int4               i, numcovers = 0, maxcovers = 32, maxstretch;
+    	int4               min, minI = 0;
+    	struct coverpos* covers = palloc(maxcovers*sizeof(struct coverpos));
+    	/* get the input parameters */
+    	text       *name     = PG_GETARG_TEXT_P(0);
+    	tsvector   *t        = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+    	text       *in       = PG_GETARG_TEXT_P(2);
+    	QUERYTYPE  *query    = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(3));
+    	int4       maxcoverSize = PG_GETARG_INT32(4);
+    	int4       mincoverSize = PG_GETARG_INT32(5);
+    	int4       maxWords  = PG_GETARG_INT32(6);
+ 
+   	SET_FUNCOID();
+    	cfg = findcfg(name2id_cfg(name));
+    	prsobj = findprs(cfg->prs_id);
+ 
+    	textdata = VARDATA(in);
+    	textlen  = VARSIZE(in) - VARHDRSZ;
+    	/* start generating covers for the query */
+    	doc = get_docrep(t, query, &doclen);
+   	if (!doc)
+    	{
+ 		pfree(covers);
+   		PG_FREE_IF_COPY(name, 0);
+   		PG_FREE_IF_COPY(t, 1);
+   		PG_FREE_IF_COPY(in, 2);
+  		PG_FREE_IF_COPY(query, 3);
+  		/* cannot do anything */
+  		out = (text*) palloc(4);
+ 		SET_VARSIZE(out, 4);
+ 		PG_RETURN_POINTER(out);
+  	}
+ 
+ 
+   	memset(&prs, 0, sizeof(HLPRSTEXT));
+   	prs.lenwords = 32;
+    	prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+    	prs.startsel = "<b>";
+    	prs.stopsel  = "</b>";
+    	prs.startsellen = strlen(prs.startsel);
+    	prs.stopsellen = strlen(prs.stopsel);
+ 
+    	prsobj->prs = (void *) DatumGetPointer(
+    											FunctionCall2(
+ 															&(prsobj->start_info),
+ 															PointerGetDatum(textdata),
+ 															Int32GetDatum(textlen)));
+ 
+ 	LexizeInit(&ldata, cfg);
+ 
+    	MemSet(&ext, 0, sizeof(Extention));
+   	/* get all covers */
+    	while (Cover(doc, doclen, query, &ext))
+    	{
+   		if (numcovers >= maxcovers)
+ 		{
+ 	 		maxcovers *= 2;
+ 			covers     = repalloc(covers, sizeof(struct coverpos) * maxcovers);
+ 		}   
+ 		covers[numcovers].startpos = ext.p;
+ 		covers[numcovers].endpos   = ext.q;
+ 		covers[numcovers].in       = 0;
+  		numcovers ++;
+   	}
+    	/* choose best covers */
+    	while (maxWords - numWords > mincoverSize)
+    	{
+ 		min = 9999999;/* XXX - will not display headlines that exceed 9999999 */
+  		for (i = 0; i < numcovers; i ++)
+ 		{
+ 			coverlen = covers[i].endpos - covers[i].startpos + 1;
+ 			if (!covers[i].in && min > coverlen)
+ 			{
+ 				min  = coverlen; 
+ 				minI = i;
+ 			}   
+ 		}
+ 		if (min < 9999999)
+ 		{
+ 			covers[minI].in = 1;
+ 			/* adjust the size of cover  
+ 			* if maxcoverSize >= len 
+ 			*      then headline from ext.p - (maxcoverSize-len)/2 to ext.q + (maxcoverSize-len) /2
+ 			* if maxcoverSize < len 
+ 			*      then headline from ext.p to ext.p +  maxcoverSize 
+ 			*      (ensures starting lexeme is in the headline)
+ 			*/         
+ 			/* cut down endpos if it crosses maxWords */
+ 			startpos = covers[minI].startpos;
+ 			endpos   = covers[minI].endpos;
+ 			coverlen = endpos - startpos + 1;
+ 	
+ 			/* truncate the cover if it exceeds max words */
+ 			if(numWords + coverlen > maxWords)
+ 				endpos = startpos + maxWords - numWords;
+ 			else
+ 			{
+ 				if (maxcoverSize >= coverlen)
+ 				{
+ 					/* what is the max we can stretch: min of 
+ 					* 1. maxcoverSize
+ 					* 2. maxWords - numWords
+ 					*/
+ 					if (maxcoverSize > maxWords - numWords)
+ 						maxstretch = maxWords - numWords;
+ 					else    
+ 						maxstretch = maxcoverSize;
+ 	
+ 					/* divide the stretch on both sides of cover */
+ 					startpos -= (maxstretch - coverlen)/2;
+ 					endpos   += (maxstretch - coverlen)/2;
+ 					if (startpos < 1)
+ 						startpos = 1;
+ 					/* XXX - do we need to check whether endpos crosses the document 
+ 					* the other function would return if the document ends or the 
+ 					* endpos is reached.
+ 					* Dropping this check for time being 
+ 					*/	
+ 				}       
+ 				else if (maxcoverSize < coverlen)
+ 					endpos   = startpos + maxcoverSize;
+ 			}
+ 			covers[minI].startpos = startpos;
+ 			covers[minI].endpos   = endpos;
+ 			numWords += endpos - startpos + 1;
+  		}
+        	else 
+ 			break;
+   	}   
+ 
+    	/* start rendering the headline */
+    	numWords = 0;
+ 	for (i = 0; i < numcovers && numWords < maxWords; i++)
+ 	{
+ 		if (covers[i].in)
+ 			add_cover_to_hl(prsobj, &prs, query, &ldata, &currentpos, covers[i].startpos, covers[i].endpos);
+ 	}
+    
+    	FunctionCall1(
+ 					&(prsobj->end_info),
+ 					PointerGetDatum(prsobj->prs)
+ 				);
+    
+    	out = genhl(&prs);
+    	/* clean up */
+   	pfree(covers);
+    	PG_FREE_IF_COPY(name, 0);
+    	PG_FREE_IF_COPY(t, 1);
+    	PG_FREE_IF_COPY(in, 2);
+    	PG_FREE_IF_COPY(query, 3);
+ 	
+    	PG_RETURN_POINTER(out);
+ }
