Index: doc/src/sgml/textsearch.sgml
===================================================================
RCS file: /cvsroot/pgsql/doc/src/sgml/textsearch.sgml,v
retrieving revision 1.36
diff -c -r1.36 textsearch.sgml
*** doc/src/sgml/textsearch.sgml	16 Nov 2007 03:23:07 -0000	1.36
--- doc/src/sgml/textsearch.sgml	19 Nov 2007 13:22:11 -0000
***************
*** 1862,1873 ****
       </row>
       <row>
        <entry><literal>tag</></entry>
!       <entry>HTML tag</entry>
        <entry><literal>&lt;A HREF="dictionaries.html"&gt;</literal></entry>
       </row>
       <row>
        <entry><literal>entity</></entry>
!       <entry>HTML entity</entry>
        <entry><literal>&amp;amp;</literal></entry>
       </row>
       <row>
--- 1862,1873 ----
       </row>
       <row>
        <entry><literal>tag</></entry>
!       <entry>HTML-type tag</entry>
        <entry><literal>&lt;A HREF="dictionaries.html"&gt;</literal></entry>
       </row>
       <row>
        <entry><literal>entity</></entry>
!       <entry>HTML-type entity</entry>
        <entry><literal>&amp;amp;</literal></entry>
       </row>
       <row>
Index: src/backend/tsearch/wparser_def.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/tsearch/wparser_def.c,v
retrieving revision 1.10
diff -c -r1.10 wparser_def.c
*** src/backend/tsearch/wparser_def.c	15 Nov 2007 22:25:16 -0000	1.10
--- src/backend/tsearch/wparser_def.c	19 Nov 2007 13:22:11 -0000
***************
*** 95,101 ****
  	"Hyphenated word part, all letters",
  	"Hyphenated word part, all ASCII",
  	"Space symbols",
! 	"HTML tag",
  	"Protocol head",
  	"Hyphenated word, letters and digits",
  	"Hyphenated word, all ASCII",
--- 95,101 ----
  	"Hyphenated word part, all letters",
  	"Hyphenated word part, all ASCII",
  	"Space symbols",
! 	"HTML-type tag",
  	"Protocol head",
  	"Hyphenated word, letters and digits",
  	"Hyphenated word, all ASCII",
***************
*** 105,111 ****
  	"Decimal notation",
  	"Signed integer",
  	"Unsigned integer",
! 	"HTML entity"
  };
  
  
--- 105,111 ----
  	"Decimal notation",
  	"Signed integer",
  	"Unsigned integer",
! 	"HTML-type entity"
  };
  
  
***************
*** 136,141 ****
--- 136,143 ----
  	TPS_InHTMLEntity,
  	TPS_InHTMLEntityNumFirst,
  	TPS_InHTMLEntityNum,
+ 	TPS_InHTMLEntityHexNumFirst,
+ 	TPS_InHTMLEntityHexNum,
  	TPS_InHTMLEntityEnd,
  	TPS_InTagFirst,
  	TPS_InXMLBegin,
***************
*** 815,836 ****
  	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
  	{p_iseqC, '#', A_NEXT, TPS_InHTMLEntityNumFirst, 0, NULL},
  	{p_isasclet, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
  	{NULL, 0, A_POP, TPS_Null, 0, NULL}
  };
  
  static const TParserStateActionItem actionTPS_InHTMLEntity[] = {
  	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
! 	{p_isasclet, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
  	{p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL},
  	{NULL, 0, A_POP, TPS_Null, 0, NULL}
  };
  
  static const TParserStateActionItem actionTPS_InHTMLEntityNumFirst[] = {
  	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
  	{p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
  	{NULL, 0, A_POP, TPS_Null, 0, NULL}
  };
  
  static const TParserStateActionItem actionTPS_InHTMLEntityNum[] = {
  	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
  	{p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
--- 817,852 ----
  	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
  	{p_iseqC, '#', A_NEXT, TPS_InHTMLEntityNumFirst, 0, NULL},
  	{p_isasclet, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
+ 	{p_iseqC, ':', A_NEXT, TPS_InHTMLEntity, 0, NULL},
+ 	{p_iseqC, '_', A_NEXT, TPS_InHTMLEntity, 0, NULL},
  	{NULL, 0, A_POP, TPS_Null, 0, NULL}
  };
  
  static const TParserStateActionItem actionTPS_InHTMLEntity[] = {
  	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
! 	{p_isalnum, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
! 	{p_iseqC, ':', A_NEXT, TPS_InHTMLEntity, 0, NULL},
! 	{p_iseqC, '_', A_NEXT, TPS_InHTMLEntity, 0, NULL},
! 	{p_iseqC, ':', A_NEXT, TPS_InHTMLEntity, 0, NULL},
! 	{p_iseqC, '.', A_NEXT, TPS_InHTMLEntity, 0, NULL},
! 	{p_iseqC, '-', A_NEXT, TPS_InHTMLEntity, 0, NULL},
  	{p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL},
  	{NULL, 0, A_POP, TPS_Null, 0, NULL}
  };
  
  static const TParserStateActionItem actionTPS_InHTMLEntityNumFirst[] = {
  	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ 	{p_iseqC, 'x', A_NEXT, TPS_InHTMLEntityHexNumFirst, 0, NULL},
  	{p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
  	{NULL, 0, A_POP, TPS_Null, 0, NULL}
  };
  
+ static const TParserStateActionItem actionTPS_InHTMLEntityHexNumFirst[] = {
+ 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ 	{p_isxdigit, 0, A_NEXT, TPS_InHTMLEntityHexNum, 0, NULL},
+ 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+ };
+ 
  static const TParserStateActionItem actionTPS_InHTMLEntityNum[] = {
  	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
  	{p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
***************
*** 838,843 ****
--- 854,866 ----
  	{NULL, 0, A_POP, TPS_Null, 0, NULL}
  };
  
+ static const TParserStateActionItem actionTPS_InHTMLEntityHexNum[] = {
+ 	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ 	{p_isxdigit, 0, A_NEXT, TPS_InHTMLEntityHexNum, 0, NULL},
+ 	{p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL},
+ 	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+ };
+ 
  static const TParserStateActionItem actionTPS_InHTMLEntityEnd[] = {
  	{NULL, 0, A_BINGO | A_CLEAR, TPS_Base, HTMLENTITY, NULL}
  };
***************
*** 854,861 ****
  static const TParserStateActionItem actionTPS_InXMLBegin[] = {
  	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
  	/* <?xml ... */
  	{p_iseqC, 'x', A_NEXT, TPS_InTag, 0, NULL},
- 	{p_iseqC, 'X', A_NEXT, TPS_InTag, 0, NULL},
  	{NULL, 0, A_POP, TPS_Null, 0, NULL}
  };
  
--- 877,884 ----
  static const TParserStateActionItem actionTPS_InXMLBegin[] = {
  	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
  	/* <?xml ... */
+     /* XXX do we wants states for the m and l ?  Right now this accepts <?xZ */
  	{p_iseqC, 'x', A_NEXT, TPS_InTag, 0, NULL},
  	{NULL, 0, A_POP, TPS_Null, 0, NULL}
  };
  
***************
*** 1282,1287 ****
--- 1305,1312 ----
  	TPARSERSTATEACTION(TPS_InHTMLEntity),
  	TPARSERSTATEACTION(TPS_InHTMLEntityNumFirst),
  	TPARSERSTATEACTION(TPS_InHTMLEntityNum),
+ 	TPARSERSTATEACTION(TPS_InHTMLEntityHexNumFirst),
+ 	TPARSERSTATEACTION(TPS_InHTMLEntityHexNum),
  	TPARSERSTATEACTION(TPS_InHTMLEntityEnd),
  	TPARSERSTATEACTION(TPS_InTagFirst),
  	TPARSERSTATEACTION(TPS_InXMLBegin),
Index: src/test/regress/expected/tsearch.out
===================================================================
RCS file: /cvsroot/pgsql/src/test/regress/expected/tsearch.out,v
retrieving revision 1.8
diff -c -r1.8 tsearch.out
*** src/test/regress/expected/tsearch.out	27 Oct 2007 19:03:45 -0000	1.8
--- src/test/regress/expected/tsearch.out	19 Nov 2007 13:22:12 -0000
***************
*** 222,228 ****
      10 | hword_part      | Hyphenated word part, all letters
      11 | hword_asciipart | Hyphenated word part, all ASCII
      12 | blank           | Space symbols
!     13 | tag             | HTML tag
      14 | protocol        | Protocol head
      15 | numhword        | Hyphenated word, letters and digits
      16 | asciihword      | Hyphenated word, all ASCII
--- 222,228 ----
      10 | hword_part      | Hyphenated word part, all letters
      11 | hword_asciipart | Hyphenated word part, all ASCII
      12 | blank           | Space symbols
!     13 | tag             | HTML-type tag
      14 | protocol        | Protocol head
      15 | numhword        | Hyphenated word, letters and digits
      16 | asciihword      | Hyphenated word, all ASCII
***************
*** 232,238 ****
      20 | float           | Decimal notation
      21 | int             | Signed integer
      22 | uint            | Unsigned integer
!     23 | entity          | HTML entity
  (23 rows)
  
  SELECT * FROM ts_parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
--- 232,238 ----
      20 | float           | Decimal notation
      21 | int             | Signed integer
      22 | uint            | Unsigned integer
!     23 | entity          | HTML-type entity
  (23 rows)
  
  SELECT * FROM ts_parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
