From cbdeb0bb636f3b7619d0a3019854809ea5565dac Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Sun, 8 Nov 2009 16:30:42 +0100
Subject: [PATCH] Fix TSearch inefficiency because of repeated copying of strings

---
 src/backend/tsearch/wparser_def.c |   63 ++++++++++++++++++++++++++++++++++--
 1 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 301c1eb..7bbd826 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -328,6 +328,41 @@ TParserInit(char *str, int len)
 	return prs;
 }
 
+/*
+ * As an alternative to a full TParserInit one can create a
+ * TParserCopy which basically is a normally TParser without a private
+ * copy of the string - instead it uses the one from another TParser.
+ * This is usefull because at some places TParsers are created
+ * recursively and the repeated copying around of the strings can
+ * cause major inefficiency.
+ * Obviously one may not close the original TParser before the copy.
+ */
+static TParser *
+TParserCopyInit(const TParser const* orig)
+{
+	TParser    *prs = (TParser *) palloc0(sizeof(TParser));
+
+	prs->charmaxlen = orig->charmaxlen;
+	prs->usewide = orig->usewide;
+	prs->lenstr = orig->lenstr - orig->state->posbyte;
+
+	prs->str = orig->str + orig->state->posbyte;
+	if(orig->pgwstr)
+		prs->pgwstr = orig->pgwstr + orig->state->poschar;
+	if(orig->wstr)
+		prs->wstr = orig->wstr + orig->state->poschar;
+
+	prs->state = newTParserPosition(NULL);
+	prs->state->state = TPS_Base;
+
+#ifdef WPARSER_TRACE
+	fprintf(stderr, "parsing copy \"%.*s\"\n", len, str);
+#endif
+
+	return prs;
+}
+
+
 static void
 TParserClose(TParser *prs)
 {
@@ -350,6 +385,26 @@ TParserClose(TParser *prs)
 }
 
 /*
+ * See TParserCopyInit
+ */
+static void
+TParserCopyClose(TParser *prs)
+{
+	while (prs->state)
+	{
+		TParserPosition *ptr = prs->state->prev;
+
+		pfree(prs->state);
+		prs->state = ptr;
+	}
+#ifdef WPARSER_TRACE
+	fprintf(stderr, "closing parser copy");
+#endif
+	pfree(prs);
+}
+
+
+/*
  * Character-type support functions, equivalent to is* macros, but
  * working with any possible encodings and locales. Notes:
  *  - with multibyte encoding and C-locale isw* function may fail
@@ -617,7 +672,7 @@ p_isignore(TParser *prs)
 static int
 p_ishost(TParser *prs)
 {
-	TParser    *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte);
+	TParser *tmpprs = TParserCopyInit(prs);
 	int			res = 0;
 
 	tmpprs->wanthost = true;
@@ -631,7 +686,7 @@ p_ishost(TParser *prs)
 		prs->state->charlen = tmpprs->state->charlen;
 		res = 1;
 	}
-	TParserClose(tmpprs);
+	TParserCopyClose(tmpprs);
 
 	return res;
 }
@@ -639,7 +694,7 @@ p_ishost(TParser *prs)
 static int
 p_isURLPath(TParser *prs)
 {
-	TParser    *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte);
+	TParser *tmpprs = TParserCopyInit(prs);
 	int			res = 0;
 
 	tmpprs->state = newTParserPosition(tmpprs->state);
@@ -654,7 +709,7 @@ p_isURLPath(TParser *prs)
 		prs->state->charlen = tmpprs->state->charlen;
 		res = 1;
 	}
-	TParserClose(tmpprs);
+	TParserCopyClose(tmpprs);
 
 	return res;
 }
-- 
1.6.5.12.gd65df24

