[PATCH] Add support for leading/trailing bytea trim()ing

Started by Joel Jacobsonabout 5 years ago6 messages

Joel Jacobson

joel@compiler.org

about 5 years ago

1 attachment(s)

Dear hackers,

Let's say we want to strip the leading zero bytes from '\x0000beefbabe00'::bytea.

This is currently not supported, since trim() for bytea values only support the BOTH mode:

SELECT trim(LEADING '\x00'::bytea FROM '\x0000beefbabe00'::bytea);
ERROR: function pg_catalog.ltrim(bytea, bytea) does not exist

The attached patch adds LEADING | TRAILING support for the bytea version of trim():

SELECT trim(LEADING '\x00'::bytea FROM '\x0000beefbabe00'::bytea);
ltrim
--------------
\xbeefbabe00

SELECT trim(TRAILING '\x00'::bytea FROM '\x0000beefbabe00'::bytea);
rtrim
----------------
\x0000beefbabe

Best regards,

Joel Jacobson

Attachments:

leading-trailing-trim-bytea.patchapplication/octet-stream; name=leading-trailing-trim-bytea.patchDownload

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index df29af6371..868393f5d0 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -3950,7 +3950,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
         <indexterm>
          <primary>trim</primary>
         </indexterm>
-        <function>trim</function> ( <optional> <literal>BOTH</literal> </optional>
+        <function>trim</function> ( <optional> <literal>LEADING</literal> | <literal>TRAILING</literal> | <literal>BOTH</literal> </optional>
         <parameter>bytesremoved</parameter> <type>bytea</type> <literal>FROM</literal>
         <parameter>bytes</parameter> <type>bytea</type> )
         <returnvalue>bytea</returnvalue>
@@ -3968,7 +3968,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
 
       <row>
        <entry role="func_table_entry"><para role="func_signature">
-        <function>trim</function> ( <optional> <literal>BOTH</literal> </optional> <optional> <literal>FROM</literal> </optional>
+        <function>trim</function> ( <optional> <literal>LEADING</literal> | <literal>TRAILING</literal> | <literal>BOTH</literal> </optional> <literal>FROM</literal> </optional>
         <parameter>bytes</parameter> <type>bytea</type>,
         <parameter>bytesremoved</parameter> <type>bytea</type> )
         <returnvalue>bytea</returnvalue>
diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c
index 76e666474e..e62913d7c4 100644
--- a/src/backend/utils/adt/oracle_compat.c
+++ b/src/backend/utils/adt/oracle_compat.c
@@ -25,6 +25,8 @@ static text *dotrim(const char *string, int stringlen,
 					const char *set, int setlen,
 					bool doltrim, bool dortrim);
 
+static bytea *dobyteatrim(bytea *string, bytea *set,
+					bool doltrim, bool dortrim);
 
 /********************************************************************
  *
@@ -521,27 +523,12 @@ dotrim(const char *string, int stringlen,
 	return cstring_to_text_with_len(string, stringlen);
 }
 
-/********************************************************************
- *
- * byteatrim
- *
- * Syntax:
- *
- *	 bytea byteatrim(bytea string, bytea set)
- *
- * Purpose:
- *
- *	 Returns string with characters removed from the front and back
- *	 up to the first character not in set.
- *
- * Cloned from btrim and modified as required.
- ********************************************************************/
-
-Datum
-byteatrim(PG_FUNCTION_ARGS)
+/*
+ * Common implementation for bytea versions of btrim, ltrim, rtrim
+ */
+bytea *
+dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
 {
-	bytea	   *string = PG_GETARG_BYTEA_PP(0);
-	bytea	   *set = PG_GETARG_BYTEA_PP(1);
 	bytea	   *ret;
 	char	   *ptr,
 			   *end,
@@ -556,7 +543,7 @@ byteatrim(PG_FUNCTION_ARGS)
 	setlen = VARSIZE_ANY_EXHDR(set);
 
 	if (stringlen <= 0 || setlen <= 0)
-		PG_RETURN_BYTEA_P(string);
+		return string;
 
 	m = stringlen;
 	ptr = VARDATA_ANY(string);
@@ -564,39 +551,124 @@ byteatrim(PG_FUNCTION_ARGS)
 	ptr2start = VARDATA_ANY(set);
 	end2 = ptr2start + setlen - 1;
 
-	while (m > 0)
-	{
-		ptr2 = ptr2start;
-		while (ptr2 <= end2)
+	if (doltrim) {
+		while (m > 0)
 		{
-			if (*ptr == *ptr2)
+			ptr2 = ptr2start;
+			while (ptr2 <= end2)
+			{
+				if (*ptr == *ptr2)
+					break;
+				++ptr2;
+			}
+			if (ptr2 > end2)
 				break;
-			++ptr2;
+			ptr++;
+			m--;
 		}
-		if (ptr2 > end2)
-			break;
-		ptr++;
-		m--;
 	}
 
-	while (m > 0)
-	{
-		ptr2 = ptr2start;
-		while (ptr2 <= end2)
+	if (dortrim) {
+		while (m > 0)
 		{
-			if (*end == *ptr2)
+			ptr2 = ptr2start;
+			while (ptr2 <= end2)
+			{
+				if (*end == *ptr2)
+					break;
+				++ptr2;
+			}
+			if (ptr2 > end2)
 				break;
-			++ptr2;
+			end--;
+			m--;
 		}
-		if (ptr2 > end2)
-			break;
-		end--;
-		m--;
 	}
 
 	ret = (bytea *) palloc(VARHDRSZ + m);
 	SET_VARSIZE(ret, VARHDRSZ + m);
 	memcpy(VARDATA(ret), ptr, m);
+	return ret;
+}
+
+/********************************************************************
+ *
+ * byteatrim
+ *
+ * Syntax:
+ *
+ *	 bytea byteatrim(bytea string, bytea set)
+ *
+ * Purpose:
+ *
+ *	 Returns string with characters removed from the front and back
+ *	 up to the first character not in set.
+ *
+ * Cloned from btrim and modified as required.
+ ********************************************************************/
+
+Datum
+byteatrim(PG_FUNCTION_ARGS)
+{
+	bytea	   *string = PG_GETARG_BYTEA_PP(0);
+	bytea	   *set = PG_GETARG_BYTEA_PP(1);
+	bytea	   *ret;
+
+	ret = dobyteatrim(string, set, true, true);
+
+	PG_RETURN_BYTEA_P(ret);
+}
+
+/********************************************************************
+ *
+ * bytealtrim
+ *
+ * Syntax:
+ *
+ *	 bytea bytealtrim(bytea string, bytea set)
+ *
+ * Purpose:
+ *
+ *	 Returns string with initial characters removed up to the first
+ *	 character not in set.
+ *
+ ********************************************************************/
+
+Datum
+bytealtrim(PG_FUNCTION_ARGS)
+{
+	bytea	   *string = PG_GETARG_BYTEA_PP(0);
+	bytea	   *set = PG_GETARG_BYTEA_PP(1);
+	bytea    *ret;
+
+	ret = dobyteatrim(string, set, true, false);
+
+	PG_RETURN_BYTEA_P(ret);
+}
+
+/********************************************************************
+ *
+ * byteartrim
+ *
+ * Syntax:
+ *
+ *	 bytea byteartrim(bytea string, bytea set)
+ *
+ * Purpose:
+ *
+ *	 Returns string with final characters removed after the last
+ *	 character not in set.
+ *
+ ********************************************************************/
+
+Datum
+byteartrim(PG_FUNCTION_ARGS)
+{
+	bytea	   *string = PG_GETARG_BYTEA_PP(0);
+	bytea	   *set = PG_GETARG_BYTEA_PP(1);
+	bytea    *ret;
+
+	ret = dobyteatrim(string, set, false, true);
 
 	PG_RETURN_BYTEA_P(ret);
 }
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index fc2202b843..6eec551105 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5748,6 +5748,12 @@
 { oid => '2015', descr => 'trim both ends of string',
   proname => 'btrim', prorettype => 'bytea', proargtypes => 'bytea bytea',
   prosrc => 'byteatrim' },
+{ oid => '9612', descr => 'trim left ends of string',
+  proname => 'ltrim', prorettype => 'bytea', proargtypes => 'bytea bytea',
+  prosrc => 'bytealtrim' },
+{ oid => '9613', descr => 'trim right ends of string',
+  proname => 'rtrim', prorettype => 'bytea', proargtypes => 'bytea bytea',
+  prosrc => 'byteartrim' },
 
 { oid => '2019', descr => 'convert timestamp with time zone to time',
   proname => 'time', provolatile => 's', prorettype => 'time',
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 298b6c48c2..a2bdd0dd36 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -2090,6 +2090,18 @@ SELECT trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea);
  Tom
 (1 row)
 
+SELECT trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+  ltrim  
+---------
+ Tom\000
+(1 row)
+
+SELECT trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+  rtrim  
+---------
+ \000Tom
+(1 row)
+
 SELECT btrim(E'\\000trim\\000'::bytea, E'\\000'::bytea);
  btrim 
 -------
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index ad5221ab6b..49f8ee2d51 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -711,6 +711,8 @@ SELECT repeat('Pg', 4);
 SELECT repeat('Pg', -4);
 
 SELECT trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+SELECT trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+SELECT trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea);
 SELECT btrim(E'\\000trim\\000'::bytea, E'\\000'::bytea);
 SELECT btrim(''::bytea, E'\\000'::bytea);
 SELECT btrim(E'\\000trim\\000'::bytea, ''::bytea);

Tom Lane

tgl@sss.pgh.pa.us

about 5 years ago

In reply to: Joel Jacobson (#1)

Re: [PATCH] Add support for leading/trailing bytea trim()ing

"Joel Jacobson" <joel@compiler.org> writes:

The attached patch adds LEADING | TRAILING support for the bytea version of trim():

No objection in principle, but you need to extend the code added by
commit 40c24bfef to know about these functions.

The grammar in the functions' descr strings seems a bit shaky too.

regards, tom lane

Joel Jacobson

joel@compiler.org

about 5 years ago

In reply to: Tom Lane (#2)

1 attachment(s)

Re: [PATCH] Add support for leading/trailing bytea trim()ing

On Fri, Dec 4, 2020, at 17:37, Tom Lane wrote:

No objection in principle, but you need to extend the code added by
commit 40c24bfef to know about these functions.

Oh, I see, that's a very nice improvement.

I've now added F_LTRIM_BYTEA_BYTEA and F_RTRIM_BYTEA_BYTEA to ruleutils.c accordingly,
and also added regress tests to create_view.sql.

The grammar in the functions' descr strings seems a bit shaky too.

Not sure what you mean? The grammar is unchanged, since it was already supported,
but the overloaded bytea functions were missing.

I did however notice I forgot to update the description in func.sgml
for the bytea version of trim(). Maybe that's what you meant was shaky?
I've changed the description to read:

-        <parameter>bytesremoved</parameter> from the start
-        and end of <parameter>bytes</parameter>.
+        <parameter>bytesremoved</parameter> from the start,
+        the end, or both ends of <parameter>bytes</parameter>.
+        (<literal>BOTH</literal> is the default)

New patch attached.

/Joel

Attachments:

leading-trailing-trim-bytea-002.patchapplication/octet-stream; name=leading-trailing-trim-bytea-002.patchDownload

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index df29af6371..2ff45256ea 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -3950,15 +3950,16 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
         <indexterm>
          <primary>trim</primary>
         </indexterm>
-        <function>trim</function> ( <optional> <literal>BOTH</literal> </optional>
+        <function>trim</function> ( <optional> <literal>LEADING</literal> | <literal>TRAILING</literal> | <literal>BOTH</literal> </optional>
         <parameter>bytesremoved</parameter> <type>bytea</type> <literal>FROM</literal>
         <parameter>bytes</parameter> <type>bytea</type> )
         <returnvalue>bytea</returnvalue>
        </para>
        <para>
         Removes the longest string containing only bytes appearing in
-        <parameter>bytesremoved</parameter> from the start
-        and end of <parameter>bytes</parameter>.
+        <parameter>bytesremoved</parameter> from the start,
+        the end, or both ends of <parameter>bytes</parameter>.
+        (<literal>BOTH</literal> is the default)
        </para>
        <para>
         <literal>trim('\x9012'::bytea from '\x1234567890'::bytea)</literal>
@@ -3968,7 +3969,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
 
       <row>
        <entry role="func_table_entry"><para role="func_signature">
-        <function>trim</function> ( <optional> <literal>BOTH</literal> </optional> <optional> <literal>FROM</literal> </optional>
+        <function>trim</function> ( <optional> <literal>LEADING</literal> | <literal>TRAILING</literal> | <literal>BOTH</literal> </optional> <literal>FROM</literal> </optional>
         <parameter>bytes</parameter> <type>bytea</type>,
         <parameter>bytesremoved</parameter> <type>bytea</type> )
         <returnvalue>bytea</returnvalue>
diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c
index 76e666474e..e62913d7c4 100644
--- a/src/backend/utils/adt/oracle_compat.c
+++ b/src/backend/utils/adt/oracle_compat.c
@@ -25,6 +25,8 @@ static text *dotrim(const char *string, int stringlen,
 					const char *set, int setlen,
 					bool doltrim, bool dortrim);
 
+static bytea *dobyteatrim(bytea *string, bytea *set,
+					bool doltrim, bool dortrim);
 
 /********************************************************************
  *
@@ -521,27 +523,12 @@ dotrim(const char *string, int stringlen,
 	return cstring_to_text_with_len(string, stringlen);
 }
 
-/********************************************************************
- *
- * byteatrim
- *
- * Syntax:
- *
- *	 bytea byteatrim(bytea string, bytea set)
- *
- * Purpose:
- *
- *	 Returns string with characters removed from the front and back
- *	 up to the first character not in set.
- *
- * Cloned from btrim and modified as required.
- ********************************************************************/
-
-Datum
-byteatrim(PG_FUNCTION_ARGS)
+/*
+ * Common implementation for bytea versions of btrim, ltrim, rtrim
+ */
+bytea *
+dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
 {
-	bytea	   *string = PG_GETARG_BYTEA_PP(0);
-	bytea	   *set = PG_GETARG_BYTEA_PP(1);
 	bytea	   *ret;
 	char	   *ptr,
 			   *end,
@@ -556,7 +543,7 @@ byteatrim(PG_FUNCTION_ARGS)
 	setlen = VARSIZE_ANY_EXHDR(set);
 
 	if (stringlen <= 0 || setlen <= 0)
-		PG_RETURN_BYTEA_P(string);
+		return string;
 
 	m = stringlen;
 	ptr = VARDATA_ANY(string);
@@ -564,39 +551,124 @@ byteatrim(PG_FUNCTION_ARGS)
 	ptr2start = VARDATA_ANY(set);
 	end2 = ptr2start + setlen - 1;
 
-	while (m > 0)
-	{
-		ptr2 = ptr2start;
-		while (ptr2 <= end2)
+	if (doltrim) {
+		while (m > 0)
 		{
-			if (*ptr == *ptr2)
+			ptr2 = ptr2start;
+			while (ptr2 <= end2)
+			{
+				if (*ptr == *ptr2)
+					break;
+				++ptr2;
+			}
+			if (ptr2 > end2)
 				break;
-			++ptr2;
+			ptr++;
+			m--;
 		}
-		if (ptr2 > end2)
-			break;
-		ptr++;
-		m--;
 	}
 
-	while (m > 0)
-	{
-		ptr2 = ptr2start;
-		while (ptr2 <= end2)
+	if (dortrim) {
+		while (m > 0)
 		{
-			if (*end == *ptr2)
+			ptr2 = ptr2start;
+			while (ptr2 <= end2)
+			{
+				if (*end == *ptr2)
+					break;
+				++ptr2;
+			}
+			if (ptr2 > end2)
 				break;
-			++ptr2;
+			end--;
+			m--;
 		}
-		if (ptr2 > end2)
-			break;
-		end--;
-		m--;
 	}
 
 	ret = (bytea *) palloc(VARHDRSZ + m);
 	SET_VARSIZE(ret, VARHDRSZ + m);
 	memcpy(VARDATA(ret), ptr, m);
+	return ret;
+}
+
+/********************************************************************
+ *
+ * byteatrim
+ *
+ * Syntax:
+ *
+ *	 bytea byteatrim(bytea string, bytea set)
+ *
+ * Purpose:
+ *
+ *	 Returns string with characters removed from the front and back
+ *	 up to the first character not in set.
+ *
+ * Cloned from btrim and modified as required.
+ ********************************************************************/
+
+Datum
+byteatrim(PG_FUNCTION_ARGS)
+{
+	bytea	   *string = PG_GETARG_BYTEA_PP(0);
+	bytea	   *set = PG_GETARG_BYTEA_PP(1);
+	bytea	   *ret;
+
+	ret = dobyteatrim(string, set, true, true);
+
+	PG_RETURN_BYTEA_P(ret);
+}
+
+/********************************************************************
+ *
+ * bytealtrim
+ *
+ * Syntax:
+ *
+ *	 bytea bytealtrim(bytea string, bytea set)
+ *
+ * Purpose:
+ *
+ *	 Returns string with initial characters removed up to the first
+ *	 character not in set.
+ *
+ ********************************************************************/
+
+Datum
+bytealtrim(PG_FUNCTION_ARGS)
+{
+	bytea	   *string = PG_GETARG_BYTEA_PP(0);
+	bytea	   *set = PG_GETARG_BYTEA_PP(1);
+	bytea    *ret;
+
+	ret = dobyteatrim(string, set, true, false);
+
+	PG_RETURN_BYTEA_P(ret);
+}
+
+/********************************************************************
+ *
+ * byteartrim
+ *
+ * Syntax:
+ *
+ *	 bytea byteartrim(bytea string, bytea set)
+ *
+ * Purpose:
+ *
+ *	 Returns string with final characters removed after the last
+ *	 character not in set.
+ *
+ ********************************************************************/
+
+Datum
+byteartrim(PG_FUNCTION_ARGS)
+{
+	bytea	   *string = PG_GETARG_BYTEA_PP(0);
+	bytea	   *set = PG_GETARG_BYTEA_PP(1);
+	bytea    *ret;
+
+	ret = dobyteatrim(string, set, false, true);
 
 	PG_RETURN_BYTEA_P(ret);
 }
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index c2c6df2a4f..7dfd9886a8 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -9681,6 +9681,7 @@ get_func_sql_syntax(FuncExpr *expr, deparse_context *context)
 			appendStringInfoChar(buf, ')');
 			return true;
 
+		case F_LTRIM_BYTEA_BYTEA:
 		case F_LTRIM_TEXT:
 		case F_LTRIM_TEXT_TEXT:
 			/* TRIM() */
@@ -9695,6 +9696,7 @@ get_func_sql_syntax(FuncExpr *expr, deparse_context *context)
 			appendStringInfoChar(buf, ')');
 			return true;
 
+		case F_RTRIM_BYTEA_BYTEA:
 		case F_RTRIM_TEXT:
 		case F_RTRIM_TEXT_TEXT:
 			/* TRIM() */
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index fc2202b843..6eec551105 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5748,6 +5748,12 @@
 { oid => '2015', descr => 'trim both ends of string',
   proname => 'btrim', prorettype => 'bytea', proargtypes => 'bytea bytea',
   prosrc => 'byteatrim' },
+{ oid => '9612', descr => 'trim left ends of string',
+  proname => 'ltrim', prorettype => 'bytea', proargtypes => 'bytea bytea',
+  prosrc => 'bytealtrim' },
+{ oid => '9613', descr => 'trim right ends of string',
+  proname => 'rtrim', prorettype => 'bytea', proargtypes => 'bytea bytea',
+  prosrc => 'byteartrim' },
 
 { oid => '2019', descr => 'convert timestamp with time zone to time',
   proname => 'time', provolatile => 's', prorettype => 'time',
diff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out
index b234d2d4f9..bd5fe60450 100644
--- a/src/test/regress/expected/create_view.out
+++ b/src/test/regress/expected/create_view.out
@@ -1735,7 +1735,10 @@ select
   substring('foo' from 'oo') as ssf,  -- historically-permitted abuse
   trim(' ' from ' foo ') as bt,
   trim(leading ' ' from ' foo ') as lt,
-  trim(trailing ' foo ') as rt;
+  trim(trailing ' foo ') as rt,
+  trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea) as btb,
+  trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea) as ltb,
+  trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea) as rtb;
 select pg_get_viewdef('tt201v', true);
                                         pg_get_viewdef                                         
 -----------------------------------------------------------------------------------------------
@@ -1753,7 +1756,10 @@ select pg_get_viewdef('tt201v', true);
      "substring"('foo'::text, 'oo'::text) AS ssf,                                             +
      TRIM(BOTH ' '::text FROM ' foo '::text) AS bt,                                           +
      TRIM(LEADING ' '::text FROM ' foo '::text) AS lt,                                        +
-     TRIM(TRAILING FROM ' foo '::text) AS rt;
+     TRIM(TRAILING FROM ' foo '::text) AS rt,                                                 +
+     TRIM(BOTH '\x00'::bytea FROM '\x00546f6d00'::bytea) AS btb,                              +
+     TRIM(LEADING '\x00'::bytea FROM '\x00546f6d00'::bytea) AS ltb,                           +
+     TRIM(TRAILING '\x00'::bytea FROM '\x00546f6d00'::bytea) AS rtb;
 (1 row)
 
 -- corner cases with empty join conditions
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 298b6c48c2..a2bdd0dd36 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -2090,6 +2090,18 @@ SELECT trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea);
  Tom
 (1 row)
 
+SELECT trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+  ltrim  
+---------
+ Tom\000
+(1 row)
+
+SELECT trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+  rtrim  
+---------
+ \000Tom
+(1 row)
+
 SELECT btrim(E'\\000trim\\000'::bytea, E'\\000'::bytea);
  btrim 
 -------
diff --git a/src/test/regress/sql/create_view.sql b/src/test/regress/sql/create_view.sql
index 6d4dd53965..fbd1313b9c 100644
--- a/src/test/regress/sql/create_view.sql
+++ b/src/test/regress/sql/create_view.sql
@@ -605,7 +605,10 @@ select
   substring('foo' from 'oo') as ssf,  -- historically-permitted abuse
   trim(' ' from ' foo ') as bt,
   trim(leading ' ' from ' foo ') as lt,
-  trim(trailing ' foo ') as rt;
+  trim(trailing ' foo ') as rt,
+  trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea) as btb,
+  trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea) as ltb,
+  trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea) as rtb;
 select pg_get_viewdef('tt201v', true);
 
 -- corner cases with empty join conditions
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index ad5221ab6b..49f8ee2d51 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -711,6 +711,8 @@ SELECT repeat('Pg', 4);
 SELECT repeat('Pg', -4);
 
 SELECT trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+SELECT trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+SELECT trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea);
 SELECT btrim(E'\\000trim\\000'::bytea, E'\\000'::bytea);
 SELECT btrim(''::bytea, E'\\000'::bytea);
 SELECT btrim(E'\\000trim\\000'::bytea, ''::bytea);

Tom Lane

tgl@sss.pgh.pa.us

about 5 years ago

In reply to: Joel Jacobson (#3)

Re: [PATCH] Add support for leading/trailing bytea trim()ing

"Joel Jacobson" <joel@compiler.org> writes:

On Fri, Dec 4, 2020, at 17:37, Tom Lane wrote:

The grammar in the functions' descr strings seems a bit shaky too.

Not sure what you mean?

"trim left ends" (plural) seems wrong. A string only has one left end,
at least in my universe.

(Maybe the existing ltrim/rtrim descrs are also like this, but if so
I'd change them too.)

regards, tom lane

Joel Jacobson

joel@compiler.org

about 5 years ago

In reply to: Tom Lane (#4)

2 attachment(s)

Re: [PATCH] Add support for leading/trailing bytea trim()ing

On Fri, Dec 4, 2020, at 22:02, Tom Lane wrote:

"trim left ends" (plural) seems wrong. A string only has one left end,
at least in my universe.

Fixed, the extra "s" came from copying from btrim()'s description.

(Maybe the existing ltrim/rtrim descrs are also like this, but if so

I>'d change them too.)

They weren't, but I think the description for the bytea functions
can be improved to have a more precise description
if we take inspiration from the the text functions.

Here is an overview of all functions containing "trim" in the function name,
to get the full picture of the trim description terminology:

SELECT
oid,
pg_describe_object('pg_proc'::regclass,oid,0),
pg_catalog.obj_description(oid, 'pg_proc')
FROM pg_proc
WHERE proname LIKE '%trim%'
ORDER BY oid;

Do we want the two new functions to derive their description from the existing bytea function?

9612 | function ltrim(bytea,bytea) | trim left end of string
9613 | function rtrim(bytea,bytea) | trim right end of string

Patch with this wording: leading-trailing-trim-bytea-left-right-end-of-string.patch

Or would it be better to be inspired by the more precise descriptions for the two parameter text functions,
and to change the existing btrim() function's description as well?

Patch with this wording: leading-trailing-trim-bytea-selected-bytes.patch

Best regards,

Joel

Attachments:

leading-trailing-trim-bytea-left-right-end-of-string.patchapplication/octet-stream; name=leading-trailing-trim-bytea-left-right-end-of-string.patchDownload

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index df29af6371..70eedf1e7d 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -3950,15 +3950,16 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
         <indexterm>
          <primary>trim</primary>
         </indexterm>
-        <function>trim</function> ( <optional> <literal>BOTH</literal> </optional>
+        <function>trim</function> ( <optional> <literal>LEADING</literal> | <literal>TRAILING</literal> | <literal>BOTH</literal> </optional>
         <parameter>bytesremoved</parameter> <type>bytea</type> <literal>FROM</literal>
         <parameter>bytes</parameter> <type>bytea</type> )
         <returnvalue>bytea</returnvalue>
        </para>
        <para>
         Removes the longest string containing only bytes appearing in
-        <parameter>bytesremoved</parameter> from the start
-        and end of <parameter>bytes</parameter>.
+        <parameter>bytesremoved</parameter> from the start,
+        end, or both ends of <parameter>bytes</parameter>.
+        (<literal>BOTH</literal> is the default)
        </para>
        <para>
         <literal>trim('\x9012'::bytea from '\x1234567890'::bytea)</literal>
@@ -3968,7 +3969,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
 
       <row>
        <entry role="func_table_entry"><para role="func_signature">
-        <function>trim</function> ( <optional> <literal>BOTH</literal> </optional> <optional> <literal>FROM</literal> </optional>
+        <function>trim</function> ( <optional> <literal>LEADING</literal> | <literal>TRAILING</literal> | <literal>BOTH</literal> </optional> <literal>FROM</literal> </optional>
         <parameter>bytes</parameter> <type>bytea</type>,
         <parameter>bytesremoved</parameter> <type>bytea</type> )
         <returnvalue>bytea</returnvalue>
@@ -4111,6 +4112,26 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
        </para></entry>
       </row>
 
+      <row>
+        <entry role="func_table_entry"><para role="func_signature">
+         <indexterm>
+          <primary>ltrim</primary>
+         </indexterm>
+         <function>ltrim</function> ( <parameter>bytes</parameter> <type>bytea</type>,
+         <parameter>bytesremoved</parameter> <type>bytea</type> )
+         <returnvalue>bytes</returnvalue>
+        </para>
+        <para>
+          Removes the longest string containing only bytes appearing in
+          <parameter>bytesremoved</parameter> from the start of
+          <parameter>bytes</parameter>.
+        </para>
+        <para>
+         <literal>ltrim('\x1234567890'::bytea, '\x9012'::bytea)</literal>
+         <returnvalue>\x34567890</returnvalue>
+        </para></entry>
+       </row>
+ 
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
@@ -4129,6 +4150,26 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
        </para></entry>
       </row>
 
+      <row>
+        <entry role="func_table_entry"><para role="func_signature">
+         <indexterm>
+          <primary>rtrim</primary>
+         </indexterm>
+         <function>rtrim</function> ( <parameter>bytes</parameter> <type>bytea</type>,
+         <parameter>bytesremoved</parameter> <type>bytea</type> )
+         <returnvalue>bytes</returnvalue>
+        </para>
+        <para>
+          Removes the longest string containing only bytes appearing in
+          <parameter>bytesremoved</parameter> from the end of
+          <parameter>bytes</parameter>.
+        </para>
+        <para>
+         <literal>rtrim('\x1234567890'::bytea, '\x9012'::bytea)</literal>
+         <returnvalue>\x12345678</returnvalue>
+        </para></entry>
+       </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c
index 76e666474e..e62913d7c4 100644
--- a/src/backend/utils/adt/oracle_compat.c
+++ b/src/backend/utils/adt/oracle_compat.c
@@ -25,6 +25,8 @@ static text *dotrim(const char *string, int stringlen,
 					const char *set, int setlen,
 					bool doltrim, bool dortrim);
 
+static bytea *dobyteatrim(bytea *string, bytea *set,
+					bool doltrim, bool dortrim);
 
 /********************************************************************
  *
@@ -521,27 +523,12 @@ dotrim(const char *string, int stringlen,
 	return cstring_to_text_with_len(string, stringlen);
 }
 
-/********************************************************************
- *
- * byteatrim
- *
- * Syntax:
- *
- *	 bytea byteatrim(bytea string, bytea set)
- *
- * Purpose:
- *
- *	 Returns string with characters removed from the front and back
- *	 up to the first character not in set.
- *
- * Cloned from btrim and modified as required.
- ********************************************************************/
-
-Datum
-byteatrim(PG_FUNCTION_ARGS)
+/*
+ * Common implementation for bytea versions of btrim, ltrim, rtrim
+ */
+bytea *
+dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
 {
-	bytea	   *string = PG_GETARG_BYTEA_PP(0);
-	bytea	   *set = PG_GETARG_BYTEA_PP(1);
 	bytea	   *ret;
 	char	   *ptr,
 			   *end,
@@ -556,7 +543,7 @@ byteatrim(PG_FUNCTION_ARGS)
 	setlen = VARSIZE_ANY_EXHDR(set);
 
 	if (stringlen <= 0 || setlen <= 0)
-		PG_RETURN_BYTEA_P(string);
+		return string;
 
 	m = stringlen;
 	ptr = VARDATA_ANY(string);
@@ -564,39 +551,124 @@ byteatrim(PG_FUNCTION_ARGS)
 	ptr2start = VARDATA_ANY(set);
 	end2 = ptr2start + setlen - 1;
 
-	while (m > 0)
-	{
-		ptr2 = ptr2start;
-		while (ptr2 <= end2)
+	if (doltrim) {
+		while (m > 0)
 		{
-			if (*ptr == *ptr2)
+			ptr2 = ptr2start;
+			while (ptr2 <= end2)
+			{
+				if (*ptr == *ptr2)
+					break;
+				++ptr2;
+			}
+			if (ptr2 > end2)
 				break;
-			++ptr2;
+			ptr++;
+			m--;
 		}
-		if (ptr2 > end2)
-			break;
-		ptr++;
-		m--;
 	}
 
-	while (m > 0)
-	{
-		ptr2 = ptr2start;
-		while (ptr2 <= end2)
+	if (dortrim) {
+		while (m > 0)
 		{
-			if (*end == *ptr2)
+			ptr2 = ptr2start;
+			while (ptr2 <= end2)
+			{
+				if (*end == *ptr2)
+					break;
+				++ptr2;
+			}
+			if (ptr2 > end2)
 				break;
-			++ptr2;
+			end--;
+			m--;
 		}
-		if (ptr2 > end2)
-			break;
-		end--;
-		m--;
 	}
 
 	ret = (bytea *) palloc(VARHDRSZ + m);
 	SET_VARSIZE(ret, VARHDRSZ + m);
 	memcpy(VARDATA(ret), ptr, m);
+	return ret;
+}
+
+/********************************************************************
+ *
+ * byteatrim
+ *
+ * Syntax:
+ *
+ *	 bytea byteatrim(bytea string, bytea set)
+ *
+ * Purpose:
+ *
+ *	 Returns string with characters removed from the front and back
+ *	 up to the first character not in set.
+ *
+ * Cloned from btrim and modified as required.
+ ********************************************************************/
+
+Datum
+byteatrim(PG_FUNCTION_ARGS)
+{
+	bytea	   *string = PG_GETARG_BYTEA_PP(0);
+	bytea	   *set = PG_GETARG_BYTEA_PP(1);
+	bytea	   *ret;
+
+	ret = dobyteatrim(string, set, true, true);
+
+	PG_RETURN_BYTEA_P(ret);
+}
+
+/********************************************************************
+ *
+ * bytealtrim
+ *
+ * Syntax:
+ *
+ *	 bytea bytealtrim(bytea string, bytea set)
+ *
+ * Purpose:
+ *
+ *	 Returns string with initial characters removed up to the first
+ *	 character not in set.
+ *
+ ********************************************************************/
+
+Datum
+bytealtrim(PG_FUNCTION_ARGS)
+{
+	bytea	   *string = PG_GETARG_BYTEA_PP(0);
+	bytea	   *set = PG_GETARG_BYTEA_PP(1);
+	bytea    *ret;
+
+	ret = dobyteatrim(string, set, true, false);
+
+	PG_RETURN_BYTEA_P(ret);
+}
+
+/********************************************************************
+ *
+ * byteartrim
+ *
+ * Syntax:
+ *
+ *	 bytea byteartrim(bytea string, bytea set)
+ *
+ * Purpose:
+ *
+ *	 Returns string with final characters removed after the last
+ *	 character not in set.
+ *
+ ********************************************************************/
+
+Datum
+byteartrim(PG_FUNCTION_ARGS)
+{
+	bytea	   *string = PG_GETARG_BYTEA_PP(0);
+	bytea	   *set = PG_GETARG_BYTEA_PP(1);
+	bytea    *ret;
+
+	ret = dobyteatrim(string, set, false, true);
 
 	PG_RETURN_BYTEA_P(ret);
 }
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index c2c6df2a4f..7dfd9886a8 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -9681,6 +9681,7 @@ get_func_sql_syntax(FuncExpr *expr, deparse_context *context)
 			appendStringInfoChar(buf, ')');
 			return true;
 
+		case F_LTRIM_BYTEA_BYTEA:
 		case F_LTRIM_TEXT:
 		case F_LTRIM_TEXT_TEXT:
 			/* TRIM() */
@@ -9695,6 +9696,7 @@ get_func_sql_syntax(FuncExpr *expr, deparse_context *context)
 			appendStringInfoChar(buf, ')');
 			return true;
 
+		case F_RTRIM_BYTEA_BYTEA:
 		case F_RTRIM_TEXT:
 		case F_RTRIM_TEXT_TEXT:
 			/* TRIM() */
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index fc2202b843..37e7a12466 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5748,6 +5748,12 @@
 { oid => '2015', descr => 'trim both ends of string',
   proname => 'btrim', prorettype => 'bytea', proargtypes => 'bytea bytea',
   prosrc => 'byteatrim' },
+{ oid => '9612', descr => 'trim left end of string',
+  proname => 'ltrim', prorettype => 'bytea', proargtypes => 'bytea bytea',
+  prosrc => 'bytealtrim' },
+{ oid => '9613', descr => 'trim right end of string',
+  proname => 'rtrim', prorettype => 'bytea', proargtypes => 'bytea bytea',
+  prosrc => 'byteartrim' },
 
 { oid => '2019', descr => 'convert timestamp with time zone to time',
   proname => 'time', provolatile => 's', prorettype => 'time',
diff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out
index b234d2d4f9..bd5fe60450 100644
--- a/src/test/regress/expected/create_view.out
+++ b/src/test/regress/expected/create_view.out
@@ -1735,7 +1735,10 @@ select
   substring('foo' from 'oo') as ssf,  -- historically-permitted abuse
   trim(' ' from ' foo ') as bt,
   trim(leading ' ' from ' foo ') as lt,
-  trim(trailing ' foo ') as rt;
+  trim(trailing ' foo ') as rt,
+  trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea) as btb,
+  trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea) as ltb,
+  trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea) as rtb;
 select pg_get_viewdef('tt201v', true);
                                         pg_get_viewdef                                         
 -----------------------------------------------------------------------------------------------
@@ -1753,7 +1756,10 @@ select pg_get_viewdef('tt201v', true);
      "substring"('foo'::text, 'oo'::text) AS ssf,                                             +
      TRIM(BOTH ' '::text FROM ' foo '::text) AS bt,                                           +
      TRIM(LEADING ' '::text FROM ' foo '::text) AS lt,                                        +
-     TRIM(TRAILING FROM ' foo '::text) AS rt;
+     TRIM(TRAILING FROM ' foo '::text) AS rt,                                                 +
+     TRIM(BOTH '\x00'::bytea FROM '\x00546f6d00'::bytea) AS btb,                              +
+     TRIM(LEADING '\x00'::bytea FROM '\x00546f6d00'::bytea) AS ltb,                           +
+     TRIM(TRAILING '\x00'::bytea FROM '\x00546f6d00'::bytea) AS rtb;
 (1 row)
 
 -- corner cases with empty join conditions
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 298b6c48c2..a2bdd0dd36 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -2090,6 +2090,18 @@ SELECT trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea);
  Tom
 (1 row)
 
+SELECT trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+  ltrim  
+---------
+ Tom\000
+(1 row)
+
+SELECT trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+  rtrim  
+---------
+ \000Tom
+(1 row)
+
 SELECT btrim(E'\\000trim\\000'::bytea, E'\\000'::bytea);
  btrim 
 -------
diff --git a/src/test/regress/sql/create_view.sql b/src/test/regress/sql/create_view.sql
index 6d4dd53965..fbd1313b9c 100644
--- a/src/test/regress/sql/create_view.sql
+++ b/src/test/regress/sql/create_view.sql
@@ -605,7 +605,10 @@ select
   substring('foo' from 'oo') as ssf,  -- historically-permitted abuse
   trim(' ' from ' foo ') as bt,
   trim(leading ' ' from ' foo ') as lt,
-  trim(trailing ' foo ') as rt;
+  trim(trailing ' foo ') as rt,
+  trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea) as btb,
+  trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea) as ltb,
+  trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea) as rtb;
 select pg_get_viewdef('tt201v', true);
 
 -- corner cases with empty join conditions
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index ad5221ab6b..49f8ee2d51 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -711,6 +711,8 @@ SELECT repeat('Pg', 4);
 SELECT repeat('Pg', -4);
 
 SELECT trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+SELECT trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+SELECT trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea);
 SELECT btrim(E'\\000trim\\000'::bytea, E'\\000'::bytea);
 SELECT btrim(''::bytea, E'\\000'::bytea);
 SELECT btrim(E'\\000trim\\000'::bytea, ''::bytea);

leading-trailing-trim-bytea-selected-bytes.patchapplication/octet-stream; name=leading-trailing-trim-bytea-selected-bytes.patchDownload

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index df29af6371..70eedf1e7d 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -3950,15 +3950,16 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
         <indexterm>
          <primary>trim</primary>
         </indexterm>
-        <function>trim</function> ( <optional> <literal>BOTH</literal> </optional>
+        <function>trim</function> ( <optional> <literal>LEADING</literal> | <literal>TRAILING</literal> | <literal>BOTH</literal> </optional>
         <parameter>bytesremoved</parameter> <type>bytea</type> <literal>FROM</literal>
         <parameter>bytes</parameter> <type>bytea</type> )
         <returnvalue>bytea</returnvalue>
        </para>
        <para>
         Removes the longest string containing only bytes appearing in
-        <parameter>bytesremoved</parameter> from the start
-        and end of <parameter>bytes</parameter>.
+        <parameter>bytesremoved</parameter> from the start,
+        end, or both ends of <parameter>bytes</parameter>.
+        (<literal>BOTH</literal> is the default)
        </para>
        <para>
         <literal>trim('\x9012'::bytea from '\x1234567890'::bytea)</literal>
@@ -3968,7 +3969,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
 
       <row>
        <entry role="func_table_entry"><para role="func_signature">
-        <function>trim</function> ( <optional> <literal>BOTH</literal> </optional> <optional> <literal>FROM</literal> </optional>
+        <function>trim</function> ( <optional> <literal>LEADING</literal> | <literal>TRAILING</literal> | <literal>BOTH</literal> </optional> <literal>FROM</literal> </optional>
         <parameter>bytes</parameter> <type>bytea</type>,
         <parameter>bytesremoved</parameter> <type>bytea</type> )
         <returnvalue>bytea</returnvalue>
@@ -4111,6 +4112,26 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
        </para></entry>
       </row>
 
+      <row>
+        <entry role="func_table_entry"><para role="func_signature">
+         <indexterm>
+          <primary>ltrim</primary>
+         </indexterm>
+         <function>ltrim</function> ( <parameter>bytes</parameter> <type>bytea</type>,
+         <parameter>bytesremoved</parameter> <type>bytea</type> )
+         <returnvalue>bytes</returnvalue>
+        </para>
+        <para>
+          Removes the longest string containing only bytes appearing in
+          <parameter>bytesremoved</parameter> from the start of
+          <parameter>bytes</parameter>.
+        </para>
+        <para>
+         <literal>ltrim('\x1234567890'::bytea, '\x9012'::bytea)</literal>
+         <returnvalue>\x34567890</returnvalue>
+        </para></entry>
+       </row>
+ 
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
@@ -4129,6 +4150,26 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
        </para></entry>
       </row>
 
+      <row>
+        <entry role="func_table_entry"><para role="func_signature">
+         <indexterm>
+          <primary>rtrim</primary>
+         </indexterm>
+         <function>rtrim</function> ( <parameter>bytes</parameter> <type>bytea</type>,
+         <parameter>bytesremoved</parameter> <type>bytea</type> )
+         <returnvalue>bytes</returnvalue>
+        </para>
+        <para>
+          Removes the longest string containing only bytes appearing in
+          <parameter>bytesremoved</parameter> from the end of
+          <parameter>bytes</parameter>.
+        </para>
+        <para>
+         <literal>rtrim('\x1234567890'::bytea, '\x9012'::bytea)</literal>
+         <returnvalue>\x12345678</returnvalue>
+        </para></entry>
+       </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c
index 76e666474e..e62913d7c4 100644
--- a/src/backend/utils/adt/oracle_compat.c
+++ b/src/backend/utils/adt/oracle_compat.c
@@ -25,6 +25,8 @@ static text *dotrim(const char *string, int stringlen,
 					const char *set, int setlen,
 					bool doltrim, bool dortrim);
 
+static bytea *dobyteatrim(bytea *string, bytea *set,
+					bool doltrim, bool dortrim);
 
 /********************************************************************
  *
@@ -521,27 +523,12 @@ dotrim(const char *string, int stringlen,
 	return cstring_to_text_with_len(string, stringlen);
 }
 
-/********************************************************************
- *
- * byteatrim
- *
- * Syntax:
- *
- *	 bytea byteatrim(bytea string, bytea set)
- *
- * Purpose:
- *
- *	 Returns string with characters removed from the front and back
- *	 up to the first character not in set.
- *
- * Cloned from btrim and modified as required.
- ********************************************************************/
-
-Datum
-byteatrim(PG_FUNCTION_ARGS)
+/*
+ * Common implementation for bytea versions of btrim, ltrim, rtrim
+ */
+bytea *
+dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
 {
-	bytea	   *string = PG_GETARG_BYTEA_PP(0);
-	bytea	   *set = PG_GETARG_BYTEA_PP(1);
 	bytea	   *ret;
 	char	   *ptr,
 			   *end,
@@ -556,7 +543,7 @@ byteatrim(PG_FUNCTION_ARGS)
 	setlen = VARSIZE_ANY_EXHDR(set);
 
 	if (stringlen <= 0 || setlen <= 0)
-		PG_RETURN_BYTEA_P(string);
+		return string;
 
 	m = stringlen;
 	ptr = VARDATA_ANY(string);
@@ -564,39 +551,124 @@ byteatrim(PG_FUNCTION_ARGS)
 	ptr2start = VARDATA_ANY(set);
 	end2 = ptr2start + setlen - 1;
 
-	while (m > 0)
-	{
-		ptr2 = ptr2start;
-		while (ptr2 <= end2)
+	if (doltrim) {
+		while (m > 0)
 		{
-			if (*ptr == *ptr2)
+			ptr2 = ptr2start;
+			while (ptr2 <= end2)
+			{
+				if (*ptr == *ptr2)
+					break;
+				++ptr2;
+			}
+			if (ptr2 > end2)
 				break;
-			++ptr2;
+			ptr++;
+			m--;
 		}
-		if (ptr2 > end2)
-			break;
-		ptr++;
-		m--;
 	}
 
-	while (m > 0)
-	{
-		ptr2 = ptr2start;
-		while (ptr2 <= end2)
+	if (dortrim) {
+		while (m > 0)
 		{
-			if (*end == *ptr2)
+			ptr2 = ptr2start;
+			while (ptr2 <= end2)
+			{
+				if (*end == *ptr2)
+					break;
+				++ptr2;
+			}
+			if (ptr2 > end2)
 				break;
-			++ptr2;
+			end--;
+			m--;
 		}
-		if (ptr2 > end2)
-			break;
-		end--;
-		m--;
 	}
 
 	ret = (bytea *) palloc(VARHDRSZ + m);
 	SET_VARSIZE(ret, VARHDRSZ + m);
 	memcpy(VARDATA(ret), ptr, m);
+	return ret;
+}
+
+/********************************************************************
+ *
+ * byteatrim
+ *
+ * Syntax:
+ *
+ *	 bytea byteatrim(bytea string, bytea set)
+ *
+ * Purpose:
+ *
+ *	 Returns string with characters removed from the front and back
+ *	 up to the first character not in set.
+ *
+ * Cloned from btrim and modified as required.
+ ********************************************************************/
+
+Datum
+byteatrim(PG_FUNCTION_ARGS)
+{
+	bytea	   *string = PG_GETARG_BYTEA_PP(0);
+	bytea	   *set = PG_GETARG_BYTEA_PP(1);
+	bytea	   *ret;
+
+	ret = dobyteatrim(string, set, true, true);
+
+	PG_RETURN_BYTEA_P(ret);
+}
+
+/********************************************************************
+ *
+ * bytealtrim
+ *
+ * Syntax:
+ *
+ *	 bytea bytealtrim(bytea string, bytea set)
+ *
+ * Purpose:
+ *
+ *	 Returns string with initial characters removed up to the first
+ *	 character not in set.
+ *
+ ********************************************************************/
+
+Datum
+bytealtrim(PG_FUNCTION_ARGS)
+{
+	bytea	   *string = PG_GETARG_BYTEA_PP(0);
+	bytea	   *set = PG_GETARG_BYTEA_PP(1);
+	bytea    *ret;
+
+	ret = dobyteatrim(string, set, true, false);
+
+	PG_RETURN_BYTEA_P(ret);
+}
+
+/********************************************************************
+ *
+ * byteartrim
+ *
+ * Syntax:
+ *
+ *	 bytea byteartrim(bytea string, bytea set)
+ *
+ * Purpose:
+ *
+ *	 Returns string with final characters removed after the last
+ *	 character not in set.
+ *
+ ********************************************************************/
+
+Datum
+byteartrim(PG_FUNCTION_ARGS)
+{
+	bytea	   *string = PG_GETARG_BYTEA_PP(0);
+	bytea	   *set = PG_GETARG_BYTEA_PP(1);
+	bytea    *ret;
+
+	ret = dobyteatrim(string, set, false, true);
 
 	PG_RETURN_BYTEA_P(ret);
 }
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index c2c6df2a4f..7dfd9886a8 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -9681,6 +9681,7 @@ get_func_sql_syntax(FuncExpr *expr, deparse_context *context)
 			appendStringInfoChar(buf, ')');
 			return true;
 
+		case F_LTRIM_BYTEA_BYTEA:
 		case F_LTRIM_TEXT:
 		case F_LTRIM_TEXT_TEXT:
 			/* TRIM() */
@@ -9695,6 +9696,7 @@ get_func_sql_syntax(FuncExpr *expr, deparse_context *context)
 			appendStringInfoChar(buf, ')');
 			return true;
 
+		case F_RTRIM_BYTEA_BYTEA:
 		case F_RTRIM_TEXT:
 		case F_RTRIM_TEXT_TEXT:
 			/* TRIM() */
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index fc2202b843..8b74a6f420 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5745,9 +5745,15 @@
 { oid => '2014', descr => 'position of substring',
   proname => 'position', prorettype => 'int4', proargtypes => 'bytea bytea',
   prosrc => 'byteapos' },
-{ oid => '2015', descr => 'trim both ends of string',
+{ oid => '2015', descr => 'trim selected bytes from both ends of string',
   proname => 'btrim', prorettype => 'bytea', proargtypes => 'bytea bytea',
   prosrc => 'byteatrim' },
+{ oid => '9612', descr => 'trim selected bytes from left end of string',
+  proname => 'ltrim', prorettype => 'bytea', proargtypes => 'bytea bytea',
+  prosrc => 'bytealtrim' },
+{ oid => '9613', descr => 'trim selected bytes from right end of string',
+  proname => 'rtrim', prorettype => 'bytea', proargtypes => 'bytea bytea',
+  prosrc => 'byteartrim' },
 
 { oid => '2019', descr => 'convert timestamp with time zone to time',
   proname => 'time', provolatile => 's', prorettype => 'time',
diff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out
index b234d2d4f9..bd5fe60450 100644
--- a/src/test/regress/expected/create_view.out
+++ b/src/test/regress/expected/create_view.out
@@ -1735,7 +1735,10 @@ select
   substring('foo' from 'oo') as ssf,  -- historically-permitted abuse
   trim(' ' from ' foo ') as bt,
   trim(leading ' ' from ' foo ') as lt,
-  trim(trailing ' foo ') as rt;
+  trim(trailing ' foo ') as rt,
+  trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea) as btb,
+  trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea) as ltb,
+  trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea) as rtb;
 select pg_get_viewdef('tt201v', true);
                                         pg_get_viewdef                                         
 -----------------------------------------------------------------------------------------------
@@ -1753,7 +1756,10 @@ select pg_get_viewdef('tt201v', true);
      "substring"('foo'::text, 'oo'::text) AS ssf,                                             +
      TRIM(BOTH ' '::text FROM ' foo '::text) AS bt,                                           +
      TRIM(LEADING ' '::text FROM ' foo '::text) AS lt,                                        +
-     TRIM(TRAILING FROM ' foo '::text) AS rt;
+     TRIM(TRAILING FROM ' foo '::text) AS rt,                                                 +
+     TRIM(BOTH '\x00'::bytea FROM '\x00546f6d00'::bytea) AS btb,                              +
+     TRIM(LEADING '\x00'::bytea FROM '\x00546f6d00'::bytea) AS ltb,                           +
+     TRIM(TRAILING '\x00'::bytea FROM '\x00546f6d00'::bytea) AS rtb;
 (1 row)
 
 -- corner cases with empty join conditions
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 298b6c48c2..a2bdd0dd36 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -2090,6 +2090,18 @@ SELECT trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea);
  Tom
 (1 row)
 
+SELECT trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+  ltrim  
+---------
+ Tom\000
+(1 row)
+
+SELECT trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+  rtrim  
+---------
+ \000Tom
+(1 row)
+
 SELECT btrim(E'\\000trim\\000'::bytea, E'\\000'::bytea);
  btrim 
 -------
diff --git a/src/test/regress/sql/create_view.sql b/src/test/regress/sql/create_view.sql
index 6d4dd53965..fbd1313b9c 100644
--- a/src/test/regress/sql/create_view.sql
+++ b/src/test/regress/sql/create_view.sql
@@ -605,7 +605,10 @@ select
   substring('foo' from 'oo') as ssf,  -- historically-permitted abuse
   trim(' ' from ' foo ') as bt,
   trim(leading ' ' from ' foo ') as lt,
-  trim(trailing ' foo ') as rt;
+  trim(trailing ' foo ') as rt,
+  trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea) as btb,
+  trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea) as ltb,
+  trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea) as rtb;
 select pg_get_viewdef('tt201v', true);
 
 -- corner cases with empty join conditions
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index ad5221ab6b..49f8ee2d51 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -711,6 +711,8 @@ SELECT repeat('Pg', 4);
 SELECT repeat('Pg', -4);
 
 SELECT trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+SELECT trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea);
+SELECT trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea);
 SELECT btrim(E'\\000trim\\000'::bytea, E'\\000'::bytea);
 SELECT btrim(''::bytea, E'\\000'::bytea);
 SELECT btrim(E'\\000trim\\000'::bytea, ''::bytea);

Tom Lane

tgl@sss.pgh.pa.us

almost 5 years ago

In reply to: Joel Jacobson (#5)

Re: [PATCH] Add support for leading/trailing bytea trim()ing

"Joel Jacobson" <joel@compiler.org> writes:

On Fri, Dec 4, 2020, at 22:02, Tom Lane wrote:

(Maybe the existing ltrim/rtrim descrs are also like this, but if so
I'd change them too.)

They weren't, but I think the description for the bytea functions
can be improved to have a more precise description
if we take inspiration from the the text functions.

Yeah, I agree with making the bytea descriptions look like the
text ones. Pushed with minor additional doc fixes.

regards, tom lane