diff --git a/src/backend/snowball/dict_snowball.c b/src/backend/snowball/dict_snowball.c
index 68c9213f69..aaf4ff72b6 100644
--- a/src/backend/snowball/dict_snowball.c
+++ b/src/backend/snowball/dict_snowball.c
@@ -272,11 +272,25 @@ dsnowball_lexize(PG_FUNCTION_ARGS)
 	DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0);
 	char	   *in = (char *) PG_GETARG_POINTER(1);
 	int32		len = PG_GETARG_INT32(2);
-	char	   *txt = lowerstr_with_len(in, len);
 	TSLexeme   *res = palloc0(sizeof(TSLexeme) * 2);
+	char	   *txt;
 
+	/*
+	 * Reject strings exceeding 1000 bytes, as they're surely not words in any
+	 * human language.  This restriction avoids wasting cycles on stuff like
+	 * base64-encoded data, and it protects us against possible inefficiency
+	 * or misbehavior in the stemmers (for example, the Turkish stemmer has an
+	 * indefinite recursion so it can crash on long-enough strings).
+	 */
+	if (len <= 0 || len > 1000)
+		PG_RETURN_POINTER(res);
+
+	txt = lowerstr_with_len(in, len);
+
+	/* txt is probably not zero-length now, but we'll check anyway */
 	if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
 	{
+		/* empty or stopword, so reject */
 		pfree(txt);
 	}
 	else
