Add SQL function for SHA1

Started by Michael Paquieralmost 5 years ago12 messages
#1Michael Paquier
michael@paquier.xyz
1 attachment(s)

Hi all,

SHA-1 is now an option available for cryptohashes, and like the
existing set of functions of SHA-2, I don't really see a reason why we
should not have a SQL function for SHA1. Attached is a patch doing
that.

The same code pattern was repeated 4 times on HEAD for the SHA-2
functions for the bytea -> bytea hashing, so I have refactored the
whole thing while integrating the new function, shaving some code from
cryptohashfuncs.c.

Thoughts?
--
Michael

Attachments:

sha1-sql-v1.patchtext/x-diff; charset=us-asciiDownload
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index b5f52d4e4a..e6f22a5873 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -7317,6 +7317,9 @@
 { oid => '2321', descr => 'MD5 hash',
   proname => 'md5', proleakproof => 't', prorettype => 'text',
   proargtypes => 'bytea', prosrc => 'md5_bytea' },
+{ oid => '8935', descr => 'SHA-1 hash',
+  proname => 'sha1', proleakproof => 't', prorettype => 'bytea',
+  proargtypes => 'bytea', prosrc => 'sha1_bytea' },
 { oid => '3419', descr => 'SHA-224 hash',
   proname => 'sha224', proleakproof => 't', prorettype => 'bytea',
   proargtypes => 'bytea', prosrc => 'sha224_bytea' },
diff --git a/src/backend/utils/adt/cryptohashfuncs.c b/src/backend/utils/adt/cryptohashfuncs.c
index d99485f4c6..5bdc2da531 100644
--- a/src/backend/utils/adt/cryptohashfuncs.c
+++ b/src/backend/utils/adt/cryptohashfuncs.c
@@ -15,6 +15,7 @@
 
 #include "common/cryptohash.h"
 #include "common/md5.h"
+#include "common/sha1.h"
 #include "common/sha2.h"
 #include "utils/builtins.h"
 
@@ -68,6 +69,74 @@ md5_bytea(PG_FUNCTION_ARGS)
 	PG_RETURN_TEXT_P(cstring_to_text(hexsum));
 }
 
+/*
+ * Internal routine to compute a cryptohash with the given bytea input.
+ */
+static inline bytea *
+cryptohash_internal(bytea *input, int digest_len,
+					pg_cryptohash_type type)
+{
+	const uint8 *data;
+	const char  *typestr;
+	size_t		len;
+	pg_cryptohash_ctx *ctx;
+	unsigned char *buf;
+	bytea	   *result;
+
+	switch (type)
+	{
+		case PG_MD5:
+			typestr = "MD5";
+			break;
+		case PG_SHA1:
+			typestr = "SHA1";
+			break;
+		case PG_SHA224:
+			typestr = "SHA224";
+			break;
+		case PG_SHA256:
+			typestr = "SHA256";
+			break;
+		case PG_SHA384:
+			typestr = "SHA384";
+			break;
+		case PG_SHA512:
+			typestr = "SHA512";
+			break;
+	}
+
+	buf = palloc0(digest_len);
+	len = VARSIZE_ANY_EXHDR(input);
+	data = (unsigned char *) VARDATA_ANY(input);
+
+	ctx = pg_cryptohash_create(type);
+	if (pg_cryptohash_init(ctx) < 0)
+		elog(ERROR, "could not initialize %s context", typestr);
+	if (pg_cryptohash_update(ctx, data, len) < 0)
+		elog(ERROR, "could not update %s context", typestr);
+	if (pg_cryptohash_final(ctx, buf) < 0)
+		elog(ERROR, "could not finalize %s context", typestr);
+	pg_cryptohash_free(ctx);
+
+	result = palloc(digest_len + VARHDRSZ);
+	SET_VARSIZE(result, digest_len + VARHDRSZ);
+	memcpy(VARDATA(result), buf, digest_len);
+
+	return result;
+}
+
+/*
+ * SHA-1
+ */
+Datum
+sha1_bytea(PG_FUNCTION_ARGS)
+{
+	bytea   *result = cryptohash_internal(PG_GETARG_BYTEA_PP(0),
+										  SHA1_DIGEST_LENGTH,
+										  PG_SHA1);
+	PG_RETURN_BYTEA_P(result);
+}
+
 
 /*
  * SHA-2 variants
@@ -76,115 +145,35 @@ md5_bytea(PG_FUNCTION_ARGS)
 Datum
 sha224_bytea(PG_FUNCTION_ARGS)
 {
-	bytea	   *in = PG_GETARG_BYTEA_PP(0);
-	const uint8 *data;
-	size_t		len;
-	pg_cryptohash_ctx *ctx;
-	unsigned char buf[PG_SHA224_DIGEST_LENGTH];
-	bytea	   *result;
-
-	len = VARSIZE_ANY_EXHDR(in);
-	data = (unsigned char *) VARDATA_ANY(in);
-
-	ctx = pg_cryptohash_create(PG_SHA224);
-	if (pg_cryptohash_init(ctx) < 0)
-		elog(ERROR, "could not initialize %s context", "SHA224");
-	if (pg_cryptohash_update(ctx, data, len) < 0)
-		elog(ERROR, "could not update %s context", "SHA224");
-	if (pg_cryptohash_final(ctx, buf) < 0)
-		elog(ERROR, "could not finalize %s context", "SHA224");
-	pg_cryptohash_free(ctx);
-
-	result = palloc(sizeof(buf) + VARHDRSZ);
-	SET_VARSIZE(result, sizeof(buf) + VARHDRSZ);
-	memcpy(VARDATA(result), buf, sizeof(buf));
-
+	bytea   *result = cryptohash_internal(PG_GETARG_BYTEA_PP(0),
+										  PG_SHA224_DIGEST_LENGTH,
+										  PG_SHA224);
 	PG_RETURN_BYTEA_P(result);
 }
 
 Datum
 sha256_bytea(PG_FUNCTION_ARGS)
 {
-	bytea	   *in = PG_GETARG_BYTEA_PP(0);
-	const uint8 *data;
-	size_t		len;
-	pg_cryptohash_ctx *ctx;
-	unsigned char buf[PG_SHA256_DIGEST_LENGTH];
-	bytea	   *result;
-
-	len = VARSIZE_ANY_EXHDR(in);
-	data = (unsigned char *) VARDATA_ANY(in);
-
-	ctx = pg_cryptohash_create(PG_SHA256);
-	if (pg_cryptohash_init(ctx) < 0)
-		elog(ERROR, "could not initialize %s context", "SHA256");
-	if (pg_cryptohash_update(ctx, data, len) < 0)
-		elog(ERROR, "could not update %s context", "SHA256");
-	if (pg_cryptohash_final(ctx, buf) < 0)
-		elog(ERROR, "could not finalize %s context", "SHA256");
-	pg_cryptohash_free(ctx);
-
-	result = palloc(sizeof(buf) + VARHDRSZ);
-	SET_VARSIZE(result, sizeof(buf) + VARHDRSZ);
-	memcpy(VARDATA(result), buf, sizeof(buf));
-
+	bytea   *result = cryptohash_internal(PG_GETARG_BYTEA_PP(0),
+										  PG_SHA256_DIGEST_LENGTH,
+										  PG_SHA256);
 	PG_RETURN_BYTEA_P(result);
 }
 
 Datum
 sha384_bytea(PG_FUNCTION_ARGS)
 {
-	bytea	   *in = PG_GETARG_BYTEA_PP(0);
-	const uint8 *data;
-	size_t		len;
-	pg_cryptohash_ctx *ctx;
-	unsigned char buf[PG_SHA384_DIGEST_LENGTH];
-	bytea	   *result;
-
-	len = VARSIZE_ANY_EXHDR(in);
-	data = (unsigned char *) VARDATA_ANY(in);
-
-	ctx = pg_cryptohash_create(PG_SHA384);
-	if (pg_cryptohash_init(ctx) < 0)
-		elog(ERROR, "could not initialize %s context", "SHA384");
-	if (pg_cryptohash_update(ctx, data, len) < 0)
-		elog(ERROR, "could not update %s context", "SHA384");
-	if (pg_cryptohash_final(ctx, buf) < 0)
-		elog(ERROR, "could not finalize %s context", "SHA384");
-	pg_cryptohash_free(ctx);
-
-	result = palloc(sizeof(buf) + VARHDRSZ);
-	SET_VARSIZE(result, sizeof(buf) + VARHDRSZ);
-	memcpy(VARDATA(result), buf, sizeof(buf));
-
+	bytea   *result = cryptohash_internal(PG_GETARG_BYTEA_PP(0),
+										  PG_SHA384_DIGEST_LENGTH,
+										  PG_SHA384);
 	PG_RETURN_BYTEA_P(result);
 }
 
 Datum
 sha512_bytea(PG_FUNCTION_ARGS)
 {
-	bytea	   *in = PG_GETARG_BYTEA_PP(0);
-	const uint8 *data;
-	size_t		len;
-	pg_cryptohash_ctx *ctx;
-	unsigned char buf[PG_SHA512_DIGEST_LENGTH];
-	bytea	   *result;
-
-	len = VARSIZE_ANY_EXHDR(in);
-	data = (unsigned char *) VARDATA_ANY(in);
-
-	ctx = pg_cryptohash_create(PG_SHA512);
-	if (pg_cryptohash_init(ctx) < 0)
-		elog(ERROR, "could not initialize %s context", "SHA512");
-	if (pg_cryptohash_update(ctx, data, len) < 0)
-		elog(ERROR, "could not update %s context", "SHA512");
-	if (pg_cryptohash_final(ctx, buf) < 0)
-		elog(ERROR, "could not finalize %s context", "SHA512");
-	pg_cryptohash_free(ctx);
-
-	result = palloc(sizeof(buf) + VARHDRSZ);
-	SET_VARSIZE(result, sizeof(buf) + VARHDRSZ);
-	memcpy(VARDATA(result), buf, sizeof(buf));
-
+	bytea   *result = cryptohash_internal(PG_GETARG_BYTEA_PP(0),
+										  PG_SHA512_DIGEST_LENGTH,
+										  PG_SHA512);
 	PG_RETURN_BYTEA_P(result);
 }
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 254ca06d3d..4660313fcb 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -888,6 +888,7 @@ xid8ge(xid8,xid8)
 xid8eq(xid8,xid8)
 xid8ne(xid8,xid8)
 xid8cmp(xid8,xid8)
+sha1(bytea)
 -- restore normal output mode
 \a\t
 -- List of functions used by libpq's fe-lobj.c
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 7c91afa6e4..070b41dccd 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -1769,9 +1769,21 @@ select md5('12345678901234567890123456789012345678901234567890123456789012345678
 (1 row)
 
 --
--- SHA-2
+-- SHA-1 and SHA-2
 --
 SET bytea_output TO hex;
+SELECT sha1('');
+                    sha1                    
+--------------------------------------------
+ \xda39a3ee5e6b4b0d3255bfef95601890afd80709
+(1 row)
+
+SELECT sha1('The quick brown fox jumps over the lazy dog.');
+                    sha1                    
+--------------------------------------------
+ \x408d94384216f890ff7a0c3528e8bed1e0b01621
+(1 row)
+
 SELECT sha224('');
                            sha224                           
 ------------------------------------------------------------
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index ef4bfb008a..2e5f16a267 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -612,10 +612,13 @@ select md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'::byt
 select md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890'::bytea) = '57edf4a22be3c955ac49da2e2107b67a' AS "TRUE";
 
 --
--- SHA-2
+-- SHA-1 and SHA-2
 --
 SET bytea_output TO hex;
 
+SELECT sha1('');
+SELECT sha1('The quick brown fox jumps over the lazy dog.');
+
 SELECT sha224('');
 SELECT sha224('The quick brown fox jumps over the lazy dog.');
 
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index aa99665e2e..d9dff6d836 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -4208,6 +4208,24 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>sha1</primary>
+        </indexterm>
+        <function>sha1</function> ( <type>bytea</type> )
+        <returnvalue>bytea</returnvalue>
+       </para>
+       <para>
+        Computes the SHA-1 <link linkend="functions-hash-note">hash</link>
+        of the binary string.
+       </para>
+       <para>
+        <literal>sha1('abc'::bytea)</literal>
+        <returnvalue>\xa9993e364706816aba3e25717850&zwsp;c26c9cd0d89d</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
#2Noah Misch
noah@leadboat.com
In reply to: Michael Paquier (#1)
Re: Add SQL function for SHA1

On Mon, Jan 25, 2021 at 10:12:28PM +0900, Michael Paquier wrote:

SHA-1 is now an option available for cryptohashes, and like the
existing set of functions of SHA-2, I don't really see a reason why we
should not have a SQL function for SHA1.

NIST deprecated SHA1 over ten years ago. It's too late to be adding this.

#3Sehrope Sarkuni
sehrope@jackdb.com
In reply to: Michael Paquier (#1)
1 attachment(s)
Re: Add SQL function for SHA1

+1 to adding a SHA1 SQL function. Even if it's deprecated, there's plenty
of historical usage that I can see it being useful.

Either way, the rest of the refactor can be improved a bit to perform a
single palloc() and remove the memcpy(). Attached is a diff for
cryptohashfuncs.c that does that by writing the digest final directly to
the result. It also removes the digest length arg and determines it in the
switch block. There's only one correct digest length for each type so
there's no reason to give callers the option to give the wrong one.

Regards,
-- Sehrope Sarkuni
Founder & CEO | JackDB, Inc. | https://www.jackdb.com/

Attachments:

cryptohash-refactor-and-add-sha1.diffapplication/x-patch; name=cryptohash-refactor-and-add-sha1.diffDownload
diff --git a/src/backend/utils/adt/cryptohashfuncs.c b/src/backend/utils/adt/cryptohashfuncs.c
index d99485f4c6..8dc695e80c 100644
--- a/src/backend/utils/adt/cryptohashfuncs.c
+++ b/src/backend/utils/adt/cryptohashfuncs.c
@@ -15,6 +15,7 @@
 
 #include "common/cryptohash.h"
 #include "common/md5.h"
+#include "common/sha1.h"
 #include "common/sha2.h"
 #include "utils/builtins.h"
 
@@ -68,6 +69,77 @@ md5_bytea(PG_FUNCTION_ARGS)
 	PG_RETURN_TEXT_P(cstring_to_text(hexsum));
 }
 
+/*
+ * Internal routine to compute a cryptohash with the given bytea input.
+ */
+static inline bytea *
+cryptohash_internal(pg_cryptohash_type type, bytea *input)
+{
+	const uint8 *data;
+	const char  *typestr;
+	int			digest_len;
+	size_t		len;
+	pg_cryptohash_ctx *ctx;
+	bytea	   *result;
+
+	switch (type)
+	{
+		case PG_MD5:
+			typestr = "MD5";
+			digest_len = MD5_DIGEST_LENGTH;
+			break;
+		case PG_SHA1:
+			typestr = "SHA1";
+			digest_len = SHA1_DIGEST_LENGTH;
+			break;
+		case PG_SHA224:
+			typestr = "SHA224";
+			digest_len = PG_SHA224_DIGEST_LENGTH;
+			break;
+		case PG_SHA256:
+			typestr = "SHA256";
+			digest_len = PG_SHA256_DIGEST_LENGTH;
+			break;
+		case PG_SHA384:
+			typestr = "SHA384";
+			digest_len = PG_SHA384_DIGEST_LENGTH;
+			break;
+		case PG_SHA512:
+			typestr = "SHA512";
+			digest_len = PG_SHA512_DIGEST_LENGTH;
+			break;
+		default:
+			elog(ERROR, "unsupported digest type %d", type);
+	}
+
+	result = palloc0(digest_len + VARHDRSZ);
+	len = VARSIZE_ANY_EXHDR(input);
+	data = (unsigned char *) VARDATA_ANY(input);
+
+	ctx = pg_cryptohash_create(type);
+	if (pg_cryptohash_init(ctx) < 0)
+		elog(ERROR, "could not initialize %s context", typestr);
+	if (pg_cryptohash_update(ctx, data, len) < 0)
+		elog(ERROR, "could not update %s context", typestr);
+	if (pg_cryptohash_final(ctx, (unsigned char *) VARDATA(result)) < 0)
+		elog(ERROR, "could not finalize %s context", typestr);
+	pg_cryptohash_free(ctx);
+
+	SET_VARSIZE(result, digest_len + VARHDRSZ);
+
+	return result;
+}
+
+/*
+ * SHA-1
+ */
+Datum
+sha1_bytea(PG_FUNCTION_ARGS)
+{
+	bytea   *result = cryptohash_internal(PG_SHA1, PG_GETARG_BYTEA_PP(0));
+
+	PG_RETURN_BYTEA_P(result);
+}
 
 /*
  * SHA-2 variants
@@ -76,28 +148,7 @@ md5_bytea(PG_FUNCTION_ARGS)
 Datum
 sha224_bytea(PG_FUNCTION_ARGS)
 {
-	bytea	   *in = PG_GETARG_BYTEA_PP(0);
-	const uint8 *data;
-	size_t		len;
-	pg_cryptohash_ctx *ctx;
-	unsigned char buf[PG_SHA224_DIGEST_LENGTH];
-	bytea	   *result;
-
-	len = VARSIZE_ANY_EXHDR(in);
-	data = (unsigned char *) VARDATA_ANY(in);
-
-	ctx = pg_cryptohash_create(PG_SHA224);
-	if (pg_cryptohash_init(ctx) < 0)
-		elog(ERROR, "could not initialize %s context", "SHA224");
-	if (pg_cryptohash_update(ctx, data, len) < 0)
-		elog(ERROR, "could not update %s context", "SHA224");
-	if (pg_cryptohash_final(ctx, buf) < 0)
-		elog(ERROR, "could not finalize %s context", "SHA224");
-	pg_cryptohash_free(ctx);
-
-	result = palloc(sizeof(buf) + VARHDRSZ);
-	SET_VARSIZE(result, sizeof(buf) + VARHDRSZ);
-	memcpy(VARDATA(result), buf, sizeof(buf));
+	bytea   *result = cryptohash_internal(PG_SHA224, PG_GETARG_BYTEA_PP(0));
 
 	PG_RETURN_BYTEA_P(result);
 }
@@ -105,28 +156,7 @@ sha224_bytea(PG_FUNCTION_ARGS)
 Datum
 sha256_bytea(PG_FUNCTION_ARGS)
 {
-	bytea	   *in = PG_GETARG_BYTEA_PP(0);
-	const uint8 *data;
-	size_t		len;
-	pg_cryptohash_ctx *ctx;
-	unsigned char buf[PG_SHA256_DIGEST_LENGTH];
-	bytea	   *result;
-
-	len = VARSIZE_ANY_EXHDR(in);
-	data = (unsigned char *) VARDATA_ANY(in);
-
-	ctx = pg_cryptohash_create(PG_SHA256);
-	if (pg_cryptohash_init(ctx) < 0)
-		elog(ERROR, "could not initialize %s context", "SHA256");
-	if (pg_cryptohash_update(ctx, data, len) < 0)
-		elog(ERROR, "could not update %s context", "SHA256");
-	if (pg_cryptohash_final(ctx, buf) < 0)
-		elog(ERROR, "could not finalize %s context", "SHA256");
-	pg_cryptohash_free(ctx);
-
-	result = palloc(sizeof(buf) + VARHDRSZ);
-	SET_VARSIZE(result, sizeof(buf) + VARHDRSZ);
-	memcpy(VARDATA(result), buf, sizeof(buf));
+	bytea   *result = cryptohash_internal(PG_SHA256, PG_GETARG_BYTEA_PP(0));
 
 	PG_RETURN_BYTEA_P(result);
 }
@@ -134,28 +164,7 @@ sha256_bytea(PG_FUNCTION_ARGS)
 Datum
 sha384_bytea(PG_FUNCTION_ARGS)
 {
-	bytea	   *in = PG_GETARG_BYTEA_PP(0);
-	const uint8 *data;
-	size_t		len;
-	pg_cryptohash_ctx *ctx;
-	unsigned char buf[PG_SHA384_DIGEST_LENGTH];
-	bytea	   *result;
-
-	len = VARSIZE_ANY_EXHDR(in);
-	data = (unsigned char *) VARDATA_ANY(in);
-
-	ctx = pg_cryptohash_create(PG_SHA384);
-	if (pg_cryptohash_init(ctx) < 0)
-		elog(ERROR, "could not initialize %s context", "SHA384");
-	if (pg_cryptohash_update(ctx, data, len) < 0)
-		elog(ERROR, "could not update %s context", "SHA384");
-	if (pg_cryptohash_final(ctx, buf) < 0)
-		elog(ERROR, "could not finalize %s context", "SHA384");
-	pg_cryptohash_free(ctx);
-
-	result = palloc(sizeof(buf) + VARHDRSZ);
-	SET_VARSIZE(result, sizeof(buf) + VARHDRSZ);
-	memcpy(VARDATA(result), buf, sizeof(buf));
+	bytea   *result = cryptohash_internal(PG_SHA384, PG_GETARG_BYTEA_PP(0));
 
 	PG_RETURN_BYTEA_P(result);
 }
@@ -163,28 +172,7 @@ sha384_bytea(PG_FUNCTION_ARGS)
 Datum
 sha512_bytea(PG_FUNCTION_ARGS)
 {
-	bytea	   *in = PG_GETARG_BYTEA_PP(0);
-	const uint8 *data;
-	size_t		len;
-	pg_cryptohash_ctx *ctx;
-	unsigned char buf[PG_SHA512_DIGEST_LENGTH];
-	bytea	   *result;
-
-	len = VARSIZE_ANY_EXHDR(in);
-	data = (unsigned char *) VARDATA_ANY(in);
-
-	ctx = pg_cryptohash_create(PG_SHA512);
-	if (pg_cryptohash_init(ctx) < 0)
-		elog(ERROR, "could not initialize %s context", "SHA512");
-	if (pg_cryptohash_update(ctx, data, len) < 0)
-		elog(ERROR, "could not update %s context", "SHA512");
-	if (pg_cryptohash_final(ctx, buf) < 0)
-		elog(ERROR, "could not finalize %s context", "SHA512");
-	pg_cryptohash_free(ctx);
-
-	result = palloc(sizeof(buf) + VARHDRSZ);
-	SET_VARSIZE(result, sizeof(buf) + VARHDRSZ);
-	memcpy(VARDATA(result), buf, sizeof(buf));
+	bytea   *result = cryptohash_internal(PG_SHA512, PG_GETARG_BYTEA_PP(0));
 
 	PG_RETURN_BYTEA_P(result);
 }
#4Michael Paquier
michael@paquier.xyz
In reply to: Sehrope Sarkuni (#3)
Re: Add SQL function for SHA1

On Mon, Jan 25, 2021 at 10:42:25PM -0500, Sehrope Sarkuni wrote:

+1 to adding a SHA1 SQL function. Even if it's deprecated, there's plenty
of historical usage that I can see it being useful.

Let's wait for more opinions to see if we agree that this addition is
helpful or not. Even if this is not added, I think that there is
still value in refactoring the code anyway for the SHA-2 functions.

Either way, the rest of the refactor can be improved a bit to perform a
single palloc() and remove the memcpy(). Attached is a diff for
cryptohashfuncs.c that does that by writing the digest final directly to
the result. It also removes the digest length arg and determines it in the
switch block. There's only one correct digest length for each type so
there's no reason to give callers the option to give the wrong one.

Yeah, what you have here is better.

+      default:
+          elog(ERROR, "unsupported digest type %d", type);
Not using a default clause is the purpose here, as it would generate a
compilation warning if a value in the enum is forgotten.  Hence, if a
new option is added to pg_cryptohash_type in the future, people won't
miss that they could add a SQL function for the new option.  If we
decide that MD5 and SHA1 have no need to use this code path, I'd
rather just use elog(ERROR) instead.
--
Michael
#5Kenneth Marshall
ktm@rice.edu
In reply to: Michael Paquier (#4)
Re: Add SQL function for SHA1

On Tue, Jan 26, 2021 at 01:06:29PM +0900, Michael Paquier wrote:

On Mon, Jan 25, 2021 at 10:42:25PM -0500, Sehrope Sarkuni wrote:

+1 to adding a SHA1 SQL function. Even if it's deprecated, there's plenty
of historical usage that I can see it being useful.

Let's wait for more opinions to see if we agree that this addition is
helpful or not. Even if this is not added, I think that there is
still value in refactoring the code anyway for the SHA-2 functions.

+1 I know that it has been deprecated, but it can be very useful when
working with data from pre-deprecation. :) It is annoying to have to
resort to plperl or plpython because it is not available. The lack or
orthogonality is painful.

Regards,
Ken

#6Bruce Momjian
bruce@momjian.us
In reply to: Kenneth Marshall (#5)
Re: Add SQL function for SHA1

On Mon, Jan 25, 2021 at 10:23:30PM -0600, Kenneth Marshall wrote:

On Tue, Jan 26, 2021 at 01:06:29PM +0900, Michael Paquier wrote:

On Mon, Jan 25, 2021 at 10:42:25PM -0500, Sehrope Sarkuni wrote:

+1 to adding a SHA1 SQL function. Even if it's deprecated, there's plenty
of historical usage that I can see it being useful.

Let's wait for more opinions to see if we agree that this addition is
helpful or not. Even if this is not added, I think that there is
still value in refactoring the code anyway for the SHA-2 functions.

+1 I know that it has been deprecated, but it can be very useful when
working with data from pre-deprecation. :) It is annoying to have to
resort to plperl or plpython because it is not available. The lack or
orthogonality is painful.

Yes, I think having SHA1 makes sense --- there are probably still valid
uses for it.

--
Bruce Momjian <bruce@momjian.us> https://momjian.us
EDB https://enterprisedb.com

The usefulness of a cup is in its emptiness, Bruce Lee

#7David Fetter
david@fetter.org
In reply to: Michael Paquier (#1)
Re: Add SQL function for SHA1

On Mon, Jan 25, 2021 at 10:12:28PM +0900, Michael Paquier wrote:

Hi all,

SHA-1 is now an option available for cryptohashes, and like the
existing set of functions of SHA-2, I don't really see a reason why we
should not have a SQL function for SHA1. Attached is a patch doing
that.

Thanks for doing this!

While there are applications SHA1 is no longer good for, there are
plenty where it's still in play. One I use frequently is git. While
there are plans for creating an upgrade path to more cryptographically
secure hashes, it will take some years before repositories have
converted over.

Best,
David.
--
David Fetter <david(at)fetter(dot)org> http://fetter.org/
Phone: +1 415 235 3778

Remember to vote!
Consider donating to Postgres: http://www.postgresql.org/about/donate

#8Michael Paquier
michael@paquier.xyz
In reply to: Bruce Momjian (#6)
Re: Add SQL function for SHA1

On Mon, Jan 25, 2021 at 11:27:28PM -0500, Bruce Momjian wrote:

On Mon, Jan 25, 2021 at 10:23:30PM -0600, Kenneth Marshall wrote:

+1 I know that it has been deprecated, but it can be very useful when
working with data from pre-deprecation. :) It is annoying to have to
resort to plperl or plpython because it is not available. The lack or
orthogonality is painful.

plperl and plpython can be annoying to require if you have strong
security requirements as these are untrusted languages, but I don't
completely agree with this argument because pgcrypto gives the option
to use SHA1 with digest(), and this one is fine to have even in
environments under STIG or equally-constrained environments.

Yes, I think having SHA1 makes sense --- there are probably still valid
uses for it.

Consistency with the existing in-core SQL functions for cryptohashes
and the possibility to not need pgcrypto are my only arguments at
hand.

;)
--
Michael

#9Daniel Gustafsson
daniel@yesql.se
In reply to: Noah Misch (#2)
Re: Add SQL function for SHA1

On 26 Jan 2021, at 04:28, Noah Misch <noah@leadboat.com> wrote:

On Mon, Jan 25, 2021 at 10:12:28PM +0900, Michael Paquier wrote:

SHA-1 is now an option available for cryptohashes, and like the
existing set of functions of SHA-2, I don't really see a reason why we
should not have a SQL function for SHA1.

NIST deprecated SHA1 over ten years ago. It's too late to be adding this.

Agreed, and pgcrypto already allows for using sha1.

It seems like any legitimate need for sha1 could be better served by an
extension rather than supplying it in-core.

--
Daniel Gustafsson https://vmware.com/

#10Michael Paquier
michael@paquier.xyz
In reply to: Daniel Gustafsson (#9)
1 attachment(s)
Re: Add SQL function for SHA1

On Tue, Jan 26, 2021 at 10:38:43AM +0100, Daniel Gustafsson wrote:

Agreed, and pgcrypto already allows for using sha1.

It seems like any legitimate need for sha1 could be better served by an
extension rather than supplying it in-core.

Both of you telling the same thing is enough for me to discard this
new stuff. I'd like to refactor the code anyway as that's a nice
cleanup, and this would have the advantage to make people look at
cryptohashfuncs.c if introducing a new type. After sleeping about it,
I think that I would just make MD5 and SHA1 issue an elog(ERROR) if
the internal routine is taken in those cases, like in the attached.

If there are any comments or objections to the refactoring piece,
please let me know.
--
Michael

Attachments:

cryptohashfunc-refactor.patchtext/x-diff; charset=us-asciiDownload
diff --git a/src/backend/utils/adt/cryptohashfuncs.c b/src/backend/utils/adt/cryptohashfuncs.c
index d99485f4c6..4bb684a08b 100644
--- a/src/backend/utils/adt/cryptohashfuncs.c
+++ b/src/backend/utils/adt/cryptohashfuncs.c
@@ -68,6 +68,60 @@ md5_bytea(PG_FUNCTION_ARGS)
 	PG_RETURN_TEXT_P(cstring_to_text(hexsum));
 }
 
+/*
+ * Internal routine to compute a cryptohash with the given bytea input.
+ */
+static inline bytea *
+cryptohash_internal(pg_cryptohash_type type, bytea *input)
+{
+	const uint8 *data;
+	const char  *typestr = NULL;
+	int			digest_len = 0;
+	size_t		len;
+	pg_cryptohash_ctx *ctx;
+	bytea	   *result;
+
+	switch (type)
+	{
+		case PG_SHA224:
+			typestr = "SHA224";
+			digest_len = PG_SHA224_DIGEST_LENGTH;
+			break;
+		case PG_SHA256:
+			typestr = "SHA256";
+			digest_len = PG_SHA256_DIGEST_LENGTH;
+			break;
+		case PG_SHA384:
+			typestr = "SHA384";
+			digest_len = PG_SHA384_DIGEST_LENGTH;
+			break;
+		case PG_SHA512:
+			typestr = "SHA512";
+			digest_len = PG_SHA512_DIGEST_LENGTH;
+			break;
+		case PG_MD5:
+		case PG_SHA1:
+			elog(ERROR, "unsupported digest type %d", type);
+			break;
+	}
+
+	result = palloc0(digest_len + VARHDRSZ);
+	len = VARSIZE_ANY_EXHDR(input);
+	data = (unsigned char *) VARDATA_ANY(input);
+
+	ctx = pg_cryptohash_create(type);
+	if (pg_cryptohash_init(ctx) < 0)
+		elog(ERROR, "could not initialize %s context", typestr);
+	if (pg_cryptohash_update(ctx, data, len) < 0)
+		elog(ERROR, "could not update %s context", typestr);
+	if (pg_cryptohash_final(ctx, (unsigned char *) VARDATA(result)) < 0)
+		elog(ERROR, "could not finalize %s context", typestr);
+	pg_cryptohash_free(ctx);
+
+	SET_VARSIZE(result, digest_len + VARHDRSZ);
+
+	return result;
+}
 
 /*
  * SHA-2 variants
@@ -76,28 +130,7 @@ md5_bytea(PG_FUNCTION_ARGS)
 Datum
 sha224_bytea(PG_FUNCTION_ARGS)
 {
-	bytea	   *in = PG_GETARG_BYTEA_PP(0);
-	const uint8 *data;
-	size_t		len;
-	pg_cryptohash_ctx *ctx;
-	unsigned char buf[PG_SHA224_DIGEST_LENGTH];
-	bytea	   *result;
-
-	len = VARSIZE_ANY_EXHDR(in);
-	data = (unsigned char *) VARDATA_ANY(in);
-
-	ctx = pg_cryptohash_create(PG_SHA224);
-	if (pg_cryptohash_init(ctx) < 0)
-		elog(ERROR, "could not initialize %s context", "SHA224");
-	if (pg_cryptohash_update(ctx, data, len) < 0)
-		elog(ERROR, "could not update %s context", "SHA224");
-	if (pg_cryptohash_final(ctx, buf) < 0)
-		elog(ERROR, "could not finalize %s context", "SHA224");
-	pg_cryptohash_free(ctx);
-
-	result = palloc(sizeof(buf) + VARHDRSZ);
-	SET_VARSIZE(result, sizeof(buf) + VARHDRSZ);
-	memcpy(VARDATA(result), buf, sizeof(buf));
+	bytea   *result = cryptohash_internal(PG_SHA224, PG_GETARG_BYTEA_PP(0));
 
 	PG_RETURN_BYTEA_P(result);
 }
@@ -105,28 +138,7 @@ sha224_bytea(PG_FUNCTION_ARGS)
 Datum
 sha256_bytea(PG_FUNCTION_ARGS)
 {
-	bytea	   *in = PG_GETARG_BYTEA_PP(0);
-	const uint8 *data;
-	size_t		len;
-	pg_cryptohash_ctx *ctx;
-	unsigned char buf[PG_SHA256_DIGEST_LENGTH];
-	bytea	   *result;
-
-	len = VARSIZE_ANY_EXHDR(in);
-	data = (unsigned char *) VARDATA_ANY(in);
-
-	ctx = pg_cryptohash_create(PG_SHA256);
-	if (pg_cryptohash_init(ctx) < 0)
-		elog(ERROR, "could not initialize %s context", "SHA256");
-	if (pg_cryptohash_update(ctx, data, len) < 0)
-		elog(ERROR, "could not update %s context", "SHA256");
-	if (pg_cryptohash_final(ctx, buf) < 0)
-		elog(ERROR, "could not finalize %s context", "SHA256");
-	pg_cryptohash_free(ctx);
-
-	result = palloc(sizeof(buf) + VARHDRSZ);
-	SET_VARSIZE(result, sizeof(buf) + VARHDRSZ);
-	memcpy(VARDATA(result), buf, sizeof(buf));
+	bytea   *result = cryptohash_internal(PG_SHA256, PG_GETARG_BYTEA_PP(0));
 
 	PG_RETURN_BYTEA_P(result);
 }
@@ -134,28 +146,7 @@ sha256_bytea(PG_FUNCTION_ARGS)
 Datum
 sha384_bytea(PG_FUNCTION_ARGS)
 {
-	bytea	   *in = PG_GETARG_BYTEA_PP(0);
-	const uint8 *data;
-	size_t		len;
-	pg_cryptohash_ctx *ctx;
-	unsigned char buf[PG_SHA384_DIGEST_LENGTH];
-	bytea	   *result;
-
-	len = VARSIZE_ANY_EXHDR(in);
-	data = (unsigned char *) VARDATA_ANY(in);
-
-	ctx = pg_cryptohash_create(PG_SHA384);
-	if (pg_cryptohash_init(ctx) < 0)
-		elog(ERROR, "could not initialize %s context", "SHA384");
-	if (pg_cryptohash_update(ctx, data, len) < 0)
-		elog(ERROR, "could not update %s context", "SHA384");
-	if (pg_cryptohash_final(ctx, buf) < 0)
-		elog(ERROR, "could not finalize %s context", "SHA384");
-	pg_cryptohash_free(ctx);
-
-	result = palloc(sizeof(buf) + VARHDRSZ);
-	SET_VARSIZE(result, sizeof(buf) + VARHDRSZ);
-	memcpy(VARDATA(result), buf, sizeof(buf));
+	bytea   *result = cryptohash_internal(PG_SHA384, PG_GETARG_BYTEA_PP(0));
 
 	PG_RETURN_BYTEA_P(result);
 }
@@ -163,28 +154,7 @@ sha384_bytea(PG_FUNCTION_ARGS)
 Datum
 sha512_bytea(PG_FUNCTION_ARGS)
 {
-	bytea	   *in = PG_GETARG_BYTEA_PP(0);
-	const uint8 *data;
-	size_t		len;
-	pg_cryptohash_ctx *ctx;
-	unsigned char buf[PG_SHA512_DIGEST_LENGTH];
-	bytea	   *result;
-
-	len = VARSIZE_ANY_EXHDR(in);
-	data = (unsigned char *) VARDATA_ANY(in);
-
-	ctx = pg_cryptohash_create(PG_SHA512);
-	if (pg_cryptohash_init(ctx) < 0)
-		elog(ERROR, "could not initialize %s context", "SHA512");
-	if (pg_cryptohash_update(ctx, data, len) < 0)
-		elog(ERROR, "could not update %s context", "SHA512");
-	if (pg_cryptohash_final(ctx, buf) < 0)
-		elog(ERROR, "could not finalize %s context", "SHA512");
-	pg_cryptohash_free(ctx);
-
-	result = palloc(sizeof(buf) + VARHDRSZ);
-	SET_VARSIZE(result, sizeof(buf) + VARHDRSZ);
-	memcpy(VARDATA(result), buf, sizeof(buf));
+	bytea   *result = cryptohash_internal(PG_SHA512, PG_GETARG_BYTEA_PP(0));
 
 	PG_RETURN_BYTEA_P(result);
 }
#11Sehrope Sarkuni
sehrope@jackdb.com
In reply to: Michael Paquier (#10)
Re: Add SQL function for SHA1

On Tue, Jan 26, 2021 at 8:53 PM Michael Paquier <michael@paquier.xyz> wrote:

On Tue, Jan 26, 2021 at 10:38:43AM +0100, Daniel Gustafsson wrote:

Agreed, and pgcrypto already allows for using sha1.

It seems like any legitimate need for sha1 could be better served by an
extension rather than supplying it in-core.

Both of you telling the same thing is enough for me to discard this
new stuff. I'd like to refactor the code anyway as that's a nice
cleanup, and this would have the advantage to make people look at
cryptohashfuncs.c if introducing a new type. After sleeping about it,
I think that I would just make MD5 and SHA1 issue an elog(ERROR) if
the internal routine is taken in those cases, like in the attached.

The refactor patch looks good. It builds and passes make check.

Thanks for the enum explanation too.

Regards,
-- Sehrope Sarkuni
Founder & CEO | JackDB, Inc. | https://www.jackdb.com/

#12Michael Paquier
michael@paquier.xyz
In reply to: Sehrope Sarkuni (#11)
Re: Add SQL function for SHA1

On Tue, Jan 26, 2021 at 09:53:52PM -0500, Sehrope Sarkuni wrote:

The refactor patch looks good. It builds and passes make check.

Thanks for double-checking! The refactoring has been just done as of
f854c69.
--
Michael