diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml new file mode 100644 index 4c5af4b..b31d824 *** a/doc/src/sgml/func.sgml --- b/doc/src/sgml/func.sgml *************** SELECT NULLIF(value, '(none)') ... *** 11588,11593 **** --- 11588,11622 ---- + md5_agg + + md5_agg(expression) + + text or bytea + text + + MD5 hash of the concatenated input values + + + + + + + md5_total + + md5_total(expression) + + text or bytea + text + + sum of the MD5 hash values of the input values, independent of their + order + + + + + + min min(expression) *************** SELECT count(*) FROM sometable; *** 11714,11720 **** The aggregate functions array_agg, ! json_agg, string_agg, and xmlagg, as well as similar user-defined aggregate functions, produce meaningfully different result values --- 11743,11749 ---- The aggregate functions array_agg, ! json_agg, md5_agg, string_agg, and xmlagg, as well as similar user-defined aggregate functions, produce meaningfully different result values diff --git a/src/backend/libpq/md5.c b/src/backend/libpq/md5.c new file mode 100644 index 4fc8318..b677066 *** a/src/backend/libpq/md5.c --- b/src/backend/libpq/md5.c *************** *** 1,14 **** /* * md5.c * ! * Implements the MD5 Message-Digest Algorithm as specified in ! * RFC 1321. This implementation is a simple one, in that it ! * needs every input byte to be buffered before doing any ! * calculations. I do not expect this file to be used for ! * general purpose MD5'ing of large amounts of data, only for ! * generating hashed passwords from limited input. * ! * Sverre H. Huseby * * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California --- 1,9 ---- /* * md5.c * ! * Implements the MD5 Message-Digest Algorithm as specified in RFC 1321. * ! * Original coding by Sverre H. Huseby * * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California *************** *** 27,89 **** * PRIVATE FUNCTIONS */ - - /* - * The returned array is allocated using malloc. the caller should free it - * when it is no longer needed. - */ - static uint8 * - createPaddedCopyWithLength(const uint8 *b, uint32 *l) - { - uint8 *ret; - uint32 q; - uint32 len, - newLen448; - uint32 len_high, - len_low; /* 64-bit value split into 32-bit sections */ - - len = ((b == NULL) ? 0 : *l); - newLen448 = len + 64 - (len % 64) - 8; - if (newLen448 <= len) - newLen448 += 64; - - *l = newLen448 + 8; - if ((ret = (uint8 *) malloc(sizeof(uint8) * *l)) == NULL) - return NULL; - - if (b != NULL) - memcpy(ret, b, sizeof(uint8) * len); - - /* pad */ - ret[len] = 0x80; - for (q = len + 1; q < newLen448; q++) - ret[q] = 0x00; - - /* append length as a 64 bit bitcount */ - len_low = len; - /* split into two 32-bit values */ - /* we only look at the bottom 32-bits */ - len_high = len >> 29; - len_low <<= 3; - q = newLen448; - ret[q++] = (len_low & 0xff); - len_low >>= 8; - ret[q++] = (len_low & 0xff); - len_low >>= 8; - ret[q++] = (len_low & 0xff); - len_low >>= 8; - ret[q++] = (len_low & 0xff); - ret[q++] = (len_high & 0xff); - len_high >>= 8; - ret[q++] = (len_high & 0xff); - len_high >>= 8; - ret[q++] = (len_high & 0xff); - len_high >>= 8; - ret[q] = (len_high & 0xff); - - return ret; - } - #define F(x, y, z) (((x) & (y)) | (~(x) & (z))) #define G(x, y, z) (((x) & (z)) | ((y) & ~(z))) #define H(x, y, z) ((x) ^ (y) ^ (z)) --- 22,27 ---- *************** doTheRounds(uint32 X[16], uint32 state[4 *** 181,233 **** state[3] += d; } ! static int ! calculateDigestFromBuffer(const uint8 *b, uint32 len, uint8 sum[16]) { - register uint32 i, - j, - k, - newI; - uint32 l; - uint8 *input; - register uint32 *wbp; - uint32 workBuff[16], - state[4]; - - l = len; - state[0] = 0x67452301; state[1] = 0xEFCDAB89; state[2] = 0x98BADCFE; state[3] = 0x10325476; ! if ((input = createPaddedCopyWithLength(b, &l)) == NULL) ! return 0; ! for (i = 0;;) { ! if ((newI = i + 16 * 4) > l) ! break; ! k = i + 3; ! for (j = 0; j < 16; j++) ! { ! wbp = (workBuff + j); ! *wbp = input[k--]; ! *wbp <<= 8; ! *wbp |= input[k--]; ! *wbp <<= 8; ! *wbp |= input[k--]; ! *wbp <<= 8; ! *wbp |= input[k]; ! k += 7; ! } ! doTheRounds(workBuff, state); ! i = newI; } ! free(input); ! j = 0; ! for (i = 0; i < 4; i++) { k = state[i]; sum[j++] = (k & 0xff); --- 119,201 ---- state[3] += d; } ! static inline void ! initializeState(uint32 state[4]) { state[0] = 0x67452301; state[1] = 0xEFCDAB89; state[2] = 0x98BADCFE; state[3] = 0x10325476; + } ! static inline void ! processBlock(const uint8 input[64], uint32 state[4]) ! { ! uint32 buff[16]; ! uint32 i; ! uint32 j; ! uint32 *bp; ! for (i = 0, j = 3; i < 16; i++) { ! bp = (buff + i); ! *bp = input[j--]; ! *bp <<= 8; ! *bp |= input[j--]; ! *bp <<= 8; ! *bp |= input[j--]; ! *bp <<= 8; ! *bp |= input[j]; ! j += 7; } ! doTheRounds(buff, state); ! } ! static void ! calculateFinalSum(const uint8 input[64], uint32 state[4], ! uint32 len_low, uint32 len_high, uint8 sum[16]) ! { ! uint8 buff[128]; ! uint32 offset; ! uint32 size; ! uint32 i; ! uint32 j; ! uint32 k; ! ! /* Last partial data block */ ! offset = (len_low >> 3) & 63; ! if (offset > 0) ! memcpy(buff, input, offset); ! ! /* Pad to a complete block minus 8 bytes for the bit count */ ! size = offset < 56 ? 56 : 120; ! buff[offset++] = 0x80; ! while (offset < size) ! buff[offset++] = 0x00; ! ! /* Append the 64-bit bit count */ ! buff[offset++] = (len_low & 0xff); ! len_low >>= 8; ! buff[offset++] = (len_low & 0xff); ! len_low >>= 8; ! buff[offset++] = (len_low & 0xff); ! len_low >>= 8; ! buff[offset++] = (len_low & 0xff); ! buff[offset++] = (len_high & 0xff); ! len_high >>= 8; ! buff[offset++] = (len_high & 0xff); ! len_high >>= 8; ! buff[offset++] = (len_high & 0xff); ! len_high >>= 8; ! buff[offset] = (len_high & 0xff); ! ! /* Process these last 1 or 2 blocks */ ! processBlock(buff, state); ! if (size > 56) ! processBlock(buff+64, state); ! ! /* Produce the final MD5 sum */ ! for (i = j = 0; i < 4; i++) { k = state[i]; sum[j++] = (k & 0xff); *************** calculateDigestFromBuffer(const uint8 *b *** 238,259 **** k >>= 8; sum[j++] = (k & 0xff); } - return 1; } static void ! bytesToHex(uint8 b[16], char *s) { static const char *hex = "0123456789abcdef"; int q, w; ! for (q = 0, w = 0; q < 16; q++) { ! s[w++] = hex[(b[q] >> 4) & 0x0F]; ! s[w++] = hex[b[q] & 0x0F]; } ! s[w] = '\0'; } /* --- 206,260 ---- k >>= 8; sum[j++] = (k & 0xff); } } static void ! calculateDigestFromBuffer(const uint8 *b, uint32 len, uint8 sum[16]) ! { ! uint32 state[4]; ! uint32 len_low; ! uint32 len_high; ! ! initializeState(state); ! ! if (b == NULL) ! { ! /* Treated the same as empty input */ ! len_low = 0; ! len_high = 0; ! } ! else ! { ! /* Calculate the 64-bit bit count */ ! len_low = len << 3; ! len_high = len >> 29; ! ! /* Process any complete blocks of data */ ! while (len >= 64) ! { ! processBlock(b, state); ! b += 64; ! len -= 64; ! } ! } ! ! /* Produce the final MD5 sum from any remaining data */ ! calculateFinalSum(b, state, len_low, len_high, sum); ! } ! ! static void ! bytesToHex(uint8 *input, size_t input_len, char *output) { static const char *hex = "0123456789abcdef"; int q, w; ! for (q = 0, w = 0; q < input_len; q++) { ! output[w++] = hex[(input[q] >> 4) & 0x0F]; ! output[w++] = hex[input[q] & 0x0F]; } ! output[w] = '\0'; } /* *************** pg_md5_hash(const void *buff, size_t len *** 291,308 **** { uint8 sum[16]; ! if (!calculateDigestFromBuffer(buff, len, sum)) ! return false; ! ! bytesToHex(sum, hexsum); return true; } bool pg_md5_binary(const void *buff, size_t len, void *outbuf) { ! if (!calculateDigestFromBuffer(buff, len, outbuf)) ! return false; return true; } --- 292,306 ---- { uint8 sum[16]; ! calculateDigestFromBuffer(buff, len, sum); ! bytesToHex(sum, 16, hexsum); return true; } bool pg_md5_binary(const void *buff, size_t len, void *outbuf) { ! calculateDigestFromBuffer(buff, len, outbuf); return true; } *************** pg_md5_encrypt(const char *passwd, const *** 343,345 **** --- 341,525 ---- return ret; } + + /* + * pg_md5_init + * + * Initialize or reset the specified MD5State for performing a new + * cumulative MD5 computation. + */ + void + pg_md5_init(MD5State *state) + { + state->len_low = 0; + state->len_high = 0; + initializeState(state->state); + } + + /* + * pg_md5_update + * + * Update the specified MD5 message digest by adding the specified data. + * This may be called multiple times after the MD5State has been + * initialized with pg_md5_init, and before calling pg_md5_final_hash. + */ + void + pg_md5_update(MD5State *state, const void *buff, size_t len) + { + uint32 offset; + const uint8 *input; + uint32 nbits; + + /* Size of previous partial data block */ + offset = (state->len_low >> 3) & 63; + input = (uint8 *) buff; + + /* Update the bit count */ + nbits = (uint32) (len << 3); + state->len_low += nbits; + state->len_high += len >> 29; + if (state->len_low < nbits) + state->len_high++; /* overflow */ + + /* Process the previous partial block of data */ + if (offset > 0) + { + int copy = offset + len > 64 ? 64 - offset : len; + + memcpy(state->buffer + offset, input, copy); + if (offset + copy < 64) + return; /* still don't have a complete block of data */ + + processBlock(state->buffer, state->state); + input += copy; + len -= copy; + } + + /* Process any complete blocks of data */ + while (len >= 64) + { + processBlock(input, state->state); + input += 64; + len -= 64; + } + + /* Save any remaining partial data block for next time */ + if (len > 0) + memcpy(state->buffer, input, len); + } + + /* + * pg_md5_final_hash + * + * Compute the final MD5 sum of the data added to the specified MD5State. + * + * Once this has been called, it should not be called again, and no + * further data should be added to the MD5State without first resetting + * it using pg_md5_init. + */ + void + pg_md5_final_hash(MD5State *state, char *hexsum) + { + uint8 sum[16]; + + calculateFinalSum(state->buffer, state->state, + state->len_low, state->len_high, sum); + bytesToHex(sum, 16, hexsum); + } + + /* + * pg_md5s_init + * + * Initialize the specified MD5SumState for calculating a sum of MD5 + * sums. sum_len is the number of bytes to use for the sum of MD5 sums. + * This must be at least 16 (the size of a single MD5 sum). If the + * overall sum exceeds this size it will wrap around. + */ + void + pg_md5s_init(MD5SumState *state, size_t sum_len) + { + int i; + + /* The sum array in an MD5SumState is at least 16 bytes in size */ + state->sum_len = sum_len; + if (state->sum_len < 16) + state->sum_len = 16; + + /* Initialize the array to 0 */ + for (i = 0; i < state->sum_len; i++) + state->sum[i] = 0; + } + + /* + * pg_md5s_update + * + * Update the specified sum of MD5 sums by adding the MD5 sum of the + * specified data to the total. This may be called multiple times after + * the MD5SumState has been initialized with pg_md5s_init, and before + * calling pg_md5s_final_hash. + */ + void + pg_md5s_update(MD5SumState *state, const void *buff, size_t len) + { + uint8 md5[16]; + int i; + int j; + uint32 sum; + uint32 carry; + + /* Calculate the MD5 sum of the input */ + calculateDigestFromBuffer(buff, len, md5); + + /* Add it to our sum of MD5 sums */ + i = state->sum_len - 1; + j = 15; + carry = 0; + + while (j >= 0) + { + sum = (uint32) state->sum[i] + md5[j] + carry; + state->sum[i] = (uint8) (sum & 0xFF); + carry = sum >> 8; + i--; + j--; + } + while (carry > 0 && i >= 0) + { + sum = (uint32) state->sum[i] + carry; + state->sum[i] = (uint8) (sum & 0xFF); + carry = sum >> 8; + i--; + } + } + + /* + * pg_md5s_final_hash + * + * Return the final sum of MD5 sums of the data added to the specified + * MD5SumState. + */ + void + pg_md5s_final_hash(MD5SumState *state, char *hexsum) + { + size_t len; + uint8 *sum; + + /* Skip leading zeros */ + len = state->sum_len; + sum = state->sum; + + while (len > 0 && *sum == 0) + { + sum++; + len--; + } + + /* Output the sum as hexadecimal */ + if (len > 0) + bytesToHex(sum, len, hexsum); + else + { + hexsum[0] = '0'; + hexsum[1] = '\0'; + } + } diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c new file mode 100644 index 56349e7..722e425 *** a/src/backend/utils/adt/varlena.c --- b/src/backend/utils/adt/varlena.c *************** md5_text(PG_FUNCTION_ARGS) *** 3643,3652 **** len = VARSIZE_ANY_EXHDR(in_text); /* get the hash result */ ! if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false) ! ereport(ERROR, ! (errcode(ERRCODE_OUT_OF_MEMORY), ! errmsg("out of memory"))); /* convert to text and return it */ PG_RETURN_TEXT_P(cstring_to_text(hexsum)); --- 3643,3649 ---- len = VARSIZE_ANY_EXHDR(in_text); /* get the hash result */ ! pg_md5_hash(VARDATA_ANY(in_text), len, hexsum); /* convert to text and return it */ PG_RETURN_TEXT_P(cstring_to_text(hexsum)); *************** md5_bytea(PG_FUNCTION_ARGS) *** 3664,3678 **** char hexsum[MD5_HASH_LEN + 1]; len = VARSIZE_ANY_EXHDR(in); ! if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false) ! ereport(ERROR, ! (errcode(ERRCODE_OUT_OF_MEMORY), ! errmsg("out of memory"))); PG_RETURN_TEXT_P(cstring_to_text(hexsum)); } /* * Return the size of a datum, possibly compressed * * Works on any data type --- 3661,3889 ---- char hexsum[MD5_HASH_LEN + 1]; len = VARSIZE_ANY_EXHDR(in); ! pg_md5_hash(VARDATA_ANY(in), len, hexsum); PG_RETURN_TEXT_P(cstring_to_text(hexsum)); } /* + * md5_agg - Create an md5 hash of the input values. + * + * This is equivalent to using string_agg with an empty delimiter to + * concatenate all the input values, and then taking the MD5 sum of the + * result. However, doing that would use an excessive amount of memory, + * whereas md5_agg uses only a small fixed-size state structure. + */ + + static MD5State * + makeMD5State(FunctionCallInfo fcinfo) + { + MD5State *state; + MemoryContext aggcontext; + MemoryContext oldcontext; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "md5_agg_transfn called in non-aggregate context"); + } + + oldcontext = MemoryContextSwitchTo(aggcontext); + state = (MD5State *) palloc(sizeof(MD5State)); + pg_md5_init(state); + MemoryContextSwitchTo(oldcontext); + + return state; + } + + Datum + md5_text_agg_transfn(PG_FUNCTION_ARGS) + { + MD5State *state; + + state = PG_ARGISNULL(0) ? NULL : (MD5State *) PG_GETARG_POINTER(0); + + /* Add the value to the MD5 sum, unless it is null */ + if (!PG_ARGISNULL(1)) + { + text *in_text = PG_GETARG_TEXT_PP(1); + size_t len; + + if (state == NULL) + state = makeMD5State(fcinfo); + + len = VARSIZE_ANY_EXHDR(in_text); + pg_md5_update(state, VARDATA_ANY(in_text), len); + } + + /* + * The transition type for md5_agg() is declared to be "internal", + * which is a pass-by-value type the same size as a pointer. + */ + PG_RETURN_POINTER(state); + } + + Datum + md5_bytea_agg_transfn(PG_FUNCTION_ARGS) + { + MD5State *state; + + state = PG_ARGISNULL(0) ? NULL : (MD5State *) PG_GETARG_POINTER(0); + + /* Add the value to the MD5 sum, unless it is null */ + if (!PG_ARGISNULL(1)) + { + bytea *in = PG_GETARG_BYTEA_PP(1); + size_t len; + + if (state == NULL) + state = makeMD5State(fcinfo); + + len = VARSIZE_ANY_EXHDR(in); + pg_md5_update(state, VARDATA_ANY(in), len); + } + + /* + * The transition type for md5_agg() is declared to be "internal", + * which is a pass-by-value type the same size as a pointer. + */ + PG_RETURN_POINTER(state); + } + + Datum + md5_agg_finalfn(PG_FUNCTION_ARGS) + { + MD5State *state; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = PG_ARGISNULL(0) ? NULL : (MD5State *) PG_GETARG_POINTER(0); + + if (state != NULL) + { + char hexsum[MD5_HASH_LEN + 1]; + + pg_md5_final_hash(state, hexsum); + + PG_RETURN_TEXT_P(cstring_to_text(hexsum)); + } + else + PG_RETURN_NULL(); + } + + /* + * md5_total - Create a sum of md5 hashes of the input values. + * + * This computes the md5 hash of each non-null input value and adds it to the + * total, treating it like a 128-bit integer. The result is then the final + * sum of md5 sums, as a hexadecimal number, with at least 32 digits. + * + * An md5 hash is 16 bytes; use an addition 8 bytes for the sum of md5 hashes + * so that it won't wrap round until at least 2^64 values have been added. + */ + + #define MD5_SUM_LEN 24 + + static MD5SumState * + makeMD5SumState(FunctionCallInfo fcinfo) + { + MD5SumState *state; + MemoryContext aggcontext; + MemoryContext oldcontext; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "md5_total_transfn called in non-aggregate context"); + } + + oldcontext = MemoryContextSwitchTo(aggcontext); + state = (MD5SumState *) palloc(sizeof(MD5SumState) + + MD5_SUM_LEN - MD5_HASH_LEN/2); + pg_md5s_init(state, MD5_SUM_LEN); + MemoryContextSwitchTo(oldcontext); + + return state; + } + + Datum + md5_text_total_transfn(PG_FUNCTION_ARGS) + { + MD5SumState *state; + + state = PG_ARGISNULL(0) ? NULL : (MD5SumState *) PG_GETARG_POINTER(0); + + /* Add the value's MD5 sum to the sum of MD5 sums, unless it is null */ + if (!PG_ARGISNULL(1)) + { + text *in_text = PG_GETARG_TEXT_PP(1); + size_t len; + + if (state == NULL) + state = makeMD5SumState(fcinfo); + + len = VARSIZE_ANY_EXHDR(in_text); + pg_md5s_update(state, VARDATA_ANY(in_text), len); + } + + /* + * The transition type for md5_total() is declared to be "internal", + * which is a pass-by-value type the same size as a pointer. + */ + PG_RETURN_POINTER(state); + } + + Datum + md5_bytea_total_transfn(PG_FUNCTION_ARGS) + { + MD5SumState *state; + + state = PG_ARGISNULL(0) ? NULL : (MD5SumState *) PG_GETARG_POINTER(0); + + /* Add the value's MD5 sum to the sum of MD5 sums, unless it is null */ + if (!PG_ARGISNULL(1)) + { + bytea *in = PG_GETARG_BYTEA_PP(1); + size_t len; + + if (state == NULL) + state = makeMD5SumState(fcinfo); + + len = VARSIZE_ANY_EXHDR(in); + pg_md5s_update(state, VARDATA_ANY(in), len); + } + + /* + * The transition type for md5_total() is declared to be "internal", + * which is a pass-by-value type the same size as a pointer. + */ + PG_RETURN_POINTER(state); + } + + Datum + md5_total_finalfn(PG_FUNCTION_ARGS) + { + MD5SumState *state; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = PG_ARGISNULL(0) ? NULL : (MD5SumState *) PG_GETARG_POINTER(0); + + if (state != NULL) + { + char hexsum[sizeof(state->sum) * 2 + 1]; + + pg_md5s_final_hash(state, hexsum); + + PG_RETURN_TEXT_P(cstring_to_text(hexsum)); + } + else + PG_RETURN_NULL(); + } + + /* * Return the size of a datum, possibly compressed * * Works on any data type diff --git a/src/include/catalog/pg_aggregate.h b/src/include/catalog/pg_aggregate.h new file mode 100644 index 6fb10a9..5182f12 *** a/src/include/catalog/pg_aggregate.h --- b/src/include/catalog/pg_aggregate.h *************** DATA(insert ( 3545 bytea_string_agg_tran *** 235,240 **** --- 235,246 ---- /* json */ DATA(insert ( 3175 json_agg_transfn json_agg_finalfn 0 2281 _null_ )); + /* md5 */ + DATA(insert ( 3179 md5_text_agg_transfn md5_agg_finalfn 0 2281 _null_ )); + DATA(insert ( 3181 md5_bytea_agg_transfn md5_agg_finalfn 0 2281 _null_ )); + DATA(insert ( 3184 md5_text_total_transfn md5_total_finalfn 0 2281 _null_ )); + DATA(insert ( 3186 md5_bytea_total_transfn md5_total_finalfn 0 2281 _null_ )); + /* * prototypes for functions in pg_aggregate.c */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h new file mode 100644 index b5be075..d7cf3b4 *** a/src/include/catalog/pg_proc.h --- b/src/include/catalog/pg_proc.h *************** DESCR("MD5 hash"); *** 3519,3524 **** --- 3519,3546 ---- DATA(insert OID = 2321 ( md5 PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "17" _null_ _null_ _null_ _null_ md5_bytea _null_ _null_ _null_ )); DESCR("MD5 hash"); + DATA(insert OID = 3177 ( md5_text_agg_transfn PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 2281 "2281 25" _null_ _null_ _null_ _null_ md5_text_agg_transfn _null_ _null_ _null_ )); + DESCR("aggregate transition function"); + DATA(insert OID = 3178 ( md5_agg_finalfn PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 0 25 "2281" _null_ _null_ _null_ _null_ md5_agg_finalfn _null_ _null_ _null_ )); + DESCR("aggregate final function"); + DATA(insert OID = 3179 ( md5_agg PGNSP PGUID 12 1 0 0 0 t f f f f f i 1 0 25 "25" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("MD5 text hashing aggregate"); + DATA(insert OID = 3180 ( md5_bytea_agg_transfn PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 2281 "2281 17" _null_ _null_ _null_ _null_ md5_bytea_agg_transfn _null_ _null_ _null_ )); + DESCR("aggregate transition function"); + DATA(insert OID = 3181 ( md5_agg PGNSP PGUID 12 1 0 0 0 t f f f f f i 1 0 25 "17" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("MD5 bytea hashing aggregate"); + + DATA(insert OID = 3182 ( md5_text_total_transfn PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 2281 "2281 25" _null_ _null_ _null_ _null_ md5_text_total_transfn _null_ _null_ _null_ )); + DESCR("aggregate transition function"); + DATA(insert OID = 3183 ( md5_total_finalfn PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 0 25 "2281" _null_ _null_ _null_ _null_ md5_total_finalfn _null_ _null_ _null_ )); + DESCR("aggregate final function"); + DATA(insert OID = 3184 ( md5_total PGNSP PGUID 12 1 0 0 0 t f f f f f i 1 0 25 "25" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("MD5 text hash sum aggregate"); + DATA(insert OID = 3185 ( md5_bytea_total_transfn PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 2281 "2281 17" _null_ _null_ _null_ _null_ md5_bytea_total_transfn _null_ _null_ _null_ )); + DESCR("aggregate transition function"); + DATA(insert OID = 3186 ( md5_total PGNSP PGUID 12 1 0 0 0 t f f f f f i 1 0 25 "17" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("MD5 bytea hash sum aggregate"); + /* crosstype operations for date vs. timestamp and timestamptz */ DATA(insert OID = 2338 ( date_lt_timestamp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "1082 1114" _null_ _null_ _null_ _null_ date_lt_timestamp _null_ _null_ _null_ )); DATA(insert OID = 2339 ( date_le_timestamp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "1082 1114" _null_ _null_ _null_ _null_ date_le_timestamp _null_ _null_ _null_ )); diff --git a/src/include/libpq/md5.h b/src/include/libpq/md5.h new file mode 100644 index cfa8df5..bd49f92 *** a/src/include/libpq/md5.h --- b/src/include/libpq/md5.h *************** *** 21,30 **** --- 21,56 ---- #define isMD5(passwd) (strncmp(passwd, "md5", 3) == 0 && \ strlen(passwd) == MD5_PASSWD_LEN) + /* State information needed in a cumulative MD5 computation. */ + typedef struct MD5State + { + uint32 len_low; /* low part of 64-bit bit count */ + uint32 len_high; /* high part of 64-bit bit count */ + uint32 state[4]; /* MD5 digest state buffer */ + uint8 buffer[64]; /* input data buffer */ + } MD5State; + + /* State information for computing a sum of MD5 sums. */ + typedef struct MD5SumState + { + size_t sum_len; /* length of sum (>= 16) */ + uint8 sum[16]; /* sum of MD5 sums --- VARIABLE LENGTH ARRAY */ + } MD5SumState; + /* functions to compute the MD5 sum of a single input value */ extern bool pg_md5_hash(const void *buff, size_t len, char *hexsum); extern bool pg_md5_binary(const void *buff, size_t len, void *outbuf); extern bool pg_md5_encrypt(const char *passwd, const char *salt, size_t salt_len, char *buf); + /* cumulative MD5 computation functions */ + extern void pg_md5_init(MD5State *state); + extern void pg_md5_update(MD5State *state, const void *buff, size_t len); + extern void pg_md5_final_hash(MD5State *state, char *hexsum); + + /* functions to compute a sum of MD5 sums */ + extern void pg_md5s_init(MD5SumState *state, size_t sum_len); + extern void pg_md5s_update(MD5SumState *state, const void *buff, size_t len); + extern void pg_md5s_final_hash(MD5SumState *state, char *hexsum); + #endif diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h new file mode 100644 index 667c58b..0ed6e33 *** a/src/include/utils/builtins.h --- b/src/include/utils/builtins.h *************** extern Datum to_hex32(PG_FUNCTION_ARGS); *** 779,784 **** --- 779,790 ---- extern Datum to_hex64(PG_FUNCTION_ARGS); extern Datum md5_text(PG_FUNCTION_ARGS); extern Datum md5_bytea(PG_FUNCTION_ARGS); + extern Datum md5_text_agg_transfn(PG_FUNCTION_ARGS); + extern Datum md5_bytea_agg_transfn(PG_FUNCTION_ARGS); + extern Datum md5_agg_finalfn(PG_FUNCTION_ARGS); + extern Datum md5_text_total_transfn(PG_FUNCTION_ARGS); + extern Datum md5_bytea_total_transfn(PG_FUNCTION_ARGS); + extern Datum md5_total_finalfn(PG_FUNCTION_ARGS); extern Datum unknownin(PG_FUNCTION_ARGS); extern Datum unknownout(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out new file mode 100644 index d379c0d..c069b22 *** a/src/test/regress/expected/aggregates.out --- b/src/test/regress/expected/aggregates.out *************** select string_agg(v, decode('ee', 'hex') *** 1153,1156 **** --- 1153,1195 ---- \xffeeaa (1 row) + -- md5_agg tests + select md5_agg(a), md5(string_agg(a,'')) from (values('aaaa'),('bbbb'),('cccc')) g(a); + md5_agg | md5 + ----------------------------------+---------------------------------- + ccb3bf4d77b887690b3b89663823d13d | ccb3bf4d77b887690b3b89663823d13d + (1 row) + + select md5_agg(a), md5(string_agg(a,'')) from (values('aaaa'),(null),('cccc')) g(a); + md5_agg | md5 + ----------------------------------+---------------------------------- + 39797f2e1f4fc388384fccfd75dcd5b6 | 39797f2e1f4fc388384fccfd75dcd5b6 + (1 row) + + select md5_agg(a), md5(string_agg(a,'')) from (values(null),(null)) g(a); + md5_agg | md5 + ---------+----- + | + (1 row) + + select md5_agg(distinct f1 order by f1), + md5(string_agg(distinct f1, '' order by f1)) from varchar_tbl; + md5_agg | md5 + ----------------------------------+---------------------------------- + f271345903df8fde90583255d5c5dcd4 | f271345903df8fde90583255d5c5dcd4 + (1 row) + + select md5_agg(v), md5(string_agg(v, '')) from bytea_test_table; + md5_agg | md5 + ----------------------------------+---------------------------------- + 6c33e22e546f0c70faa7e2f674760ac4 | 6c33e22e546f0c70faa7e2f674760ac4 + (1 row) + + truncate bytea_test_table; + select md5_agg(v), md5(string_agg(v, '')) from bytea_test_table; + md5_agg | md5 + ---------+----- + | + (1 row) + drop table bytea_test_table; diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out new file mode 100644 index 281c695..1babf96 *** a/src/test/regress/expected/strings.out --- b/src/test/regress/expected/strings.out *************** select md5('') = 'd41d8cd98f00b204e98009 *** 1278,1359 **** --- 1278,1443 ---- t (1 row) + select md5_agg(v) = 'd41d8cd98f00b204e9800998ecf8427e' AS "TRUE" from (values('')) t(v); + TRUE + ------ + t + (1 row) + select md5('a') = '0cc175b9c0f1b6a831c399e269772661' AS "TRUE"; TRUE ------ t (1 row) + select md5_agg(v) = '0cc175b9c0f1b6a831c399e269772661' AS "TRUE" from (values('a')) t(v); + TRUE + ------ + t + (1 row) + select md5('abc') = '900150983cd24fb0d6963f7d28e17f72' AS "TRUE"; TRUE ------ t (1 row) + select md5_agg(v) = '900150983cd24fb0d6963f7d28e17f72' AS "TRUE" from (values('abc')) t(v); + TRUE + ------ + t + (1 row) + select md5('message digest') = 'f96b697d7cb7938d525a2f31aaf161d0' AS "TRUE"; TRUE ------ t (1 row) + select md5_agg(v) = 'f96b697d7cb7938d525a2f31aaf161d0' AS "TRUE" from (values('message digest')) t(v); + TRUE + ------ + t + (1 row) + select md5('abcdefghijklmnopqrstuvwxyz') = 'c3fcd3d76192e4007dfb496cca67e13b' AS "TRUE"; TRUE ------ t (1 row) + select md5_agg(v) = 'c3fcd3d76192e4007dfb496cca67e13b' AS "TRUE" from (values('abcdefghijklmnopqrstuvwxyz')) t(v); + TRUE + ------ + t + (1 row) + select md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789') = 'd174ab98d277d9f5a5611c2c9f419d9f' AS "TRUE"; TRUE ------ t (1 row) + select md5_agg(v) = 'd174ab98d277d9f5a5611c2c9f419d9f' AS "TRUE" from (values('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')) t(v); + TRUE + ------ + t + (1 row) + select md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890') = '57edf4a22be3c955ac49da2e2107b67a' AS "TRUE"; TRUE ------ t (1 row) + select md5_agg(v) = '57edf4a22be3c955ac49da2e2107b67a' AS "TRUE" from (values('12345678901234567890123456789012345678901234567890123456789012345678901234567890')) t(v); + TRUE + ------ + t + (1 row) + select md5(''::bytea) = 'd41d8cd98f00b204e9800998ecf8427e' AS "TRUE"; TRUE ------ t (1 row) + select md5_agg(v) = 'd41d8cd98f00b204e9800998ecf8427e' AS "TRUE" from (values(''::bytea)) t(v); + TRUE + ------ + t + (1 row) + select md5('a'::bytea) = '0cc175b9c0f1b6a831c399e269772661' AS "TRUE"; TRUE ------ t (1 row) + select md5_agg(v) = '0cc175b9c0f1b6a831c399e269772661' AS "TRUE" from (values('a'::bytea)) t(v); + TRUE + ------ + t + (1 row) + select md5('abc'::bytea) = '900150983cd24fb0d6963f7d28e17f72' AS "TRUE"; TRUE ------ t (1 row) + select md5_agg(v) = '900150983cd24fb0d6963f7d28e17f72' AS "TRUE" from (values('abc'::bytea)) t(v); + TRUE + ------ + t + (1 row) + select md5('message digest'::bytea) = 'f96b697d7cb7938d525a2f31aaf161d0' AS "TRUE"; TRUE ------ t (1 row) + select md5_agg(v) = 'f96b697d7cb7938d525a2f31aaf161d0' AS "TRUE" from (values('message digest'::bytea)) t(v); + TRUE + ------ + t + (1 row) + select md5('abcdefghijklmnopqrstuvwxyz'::bytea) = 'c3fcd3d76192e4007dfb496cca67e13b' AS "TRUE"; TRUE ------ t (1 row) + select md5_agg(v) = 'c3fcd3d76192e4007dfb496cca67e13b' AS "TRUE" from (values('abcdefghijklmnopqrstuvwxyz'::bytea)) t(v); + TRUE + ------ + t + (1 row) + select md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'::bytea) = 'd174ab98d277d9f5a5611c2c9f419d9f' AS "TRUE"; TRUE ------ t (1 row) + select md5_agg(v) = 'd174ab98d277d9f5a5611c2c9f419d9f' AS "TRUE" from (values('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'::bytea)) t(v); + TRUE + ------ + t + (1 row) + select md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890'::bytea) = '57edf4a22be3c955ac49da2e2107b67a' AS "TRUE"; TRUE ------ t + (1 row) + + select md5_agg(v) = '57edf4a22be3c955ac49da2e2107b67a' AS "TRUE" from (values('12345678901234567890123456789012345678901234567890123456789012345678901234567890'::bytea)) t(v); + TRUE + ------ + t (1 row) -- diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql new file mode 100644 index 38d4757..a89c0ba *** a/src/test/regress/sql/aggregates.sql --- b/src/test/regress/sql/aggregates.sql *************** select string_agg(v, '') from bytea_test *** 441,444 **** --- 441,456 ---- select string_agg(v, NULL) from bytea_test_table; select string_agg(v, decode('ee', 'hex')) from bytea_test_table; + -- md5_agg tests + select md5_agg(a), md5(string_agg(a,'')) from (values('aaaa'),('bbbb'),('cccc')) g(a); + select md5_agg(a), md5(string_agg(a,'')) from (values('aaaa'),(null),('cccc')) g(a); + select md5_agg(a), md5(string_agg(a,'')) from (values(null),(null)) g(a); + + select md5_agg(distinct f1 order by f1), + md5(string_agg(distinct f1, '' order by f1)) from varchar_tbl; + + select md5_agg(v), md5(string_agg(v, '')) from bytea_test_table; + truncate bytea_test_table; + select md5_agg(v), md5(string_agg(v, '')) from bytea_test_table; + drop table bytea_test_table; diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql new file mode 100644 index e7841aa..8385d57 *** a/src/test/regress/sql/strings.sql --- b/src/test/regress/sql/strings.sql *************** select to_hex(256::bigint*256::bigint*25 *** 455,486 **** --- 455,500 ---- -- (see: ftp://ftp.rfc-editor.org/in-notes/rfc1321.txt) -- select md5('') = 'd41d8cd98f00b204e9800998ecf8427e' AS "TRUE"; + select md5_agg(v) = 'd41d8cd98f00b204e9800998ecf8427e' AS "TRUE" from (values('')) t(v); select md5('a') = '0cc175b9c0f1b6a831c399e269772661' AS "TRUE"; + select md5_agg(v) = '0cc175b9c0f1b6a831c399e269772661' AS "TRUE" from (values('a')) t(v); select md5('abc') = '900150983cd24fb0d6963f7d28e17f72' AS "TRUE"; + select md5_agg(v) = '900150983cd24fb0d6963f7d28e17f72' AS "TRUE" from (values('abc')) t(v); select md5('message digest') = 'f96b697d7cb7938d525a2f31aaf161d0' AS "TRUE"; + select md5_agg(v) = 'f96b697d7cb7938d525a2f31aaf161d0' AS "TRUE" from (values('message digest')) t(v); select md5('abcdefghijklmnopqrstuvwxyz') = 'c3fcd3d76192e4007dfb496cca67e13b' AS "TRUE"; + select md5_agg(v) = 'c3fcd3d76192e4007dfb496cca67e13b' AS "TRUE" from (values('abcdefghijklmnopqrstuvwxyz')) t(v); select md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789') = 'd174ab98d277d9f5a5611c2c9f419d9f' AS "TRUE"; + select md5_agg(v) = 'd174ab98d277d9f5a5611c2c9f419d9f' AS "TRUE" from (values('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')) t(v); select md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890') = '57edf4a22be3c955ac49da2e2107b67a' AS "TRUE"; + select md5_agg(v) = '57edf4a22be3c955ac49da2e2107b67a' AS "TRUE" from (values('12345678901234567890123456789012345678901234567890123456789012345678901234567890')) t(v); select md5(''::bytea) = 'd41d8cd98f00b204e9800998ecf8427e' AS "TRUE"; + select md5_agg(v) = 'd41d8cd98f00b204e9800998ecf8427e' AS "TRUE" from (values(''::bytea)) t(v); select md5('a'::bytea) = '0cc175b9c0f1b6a831c399e269772661' AS "TRUE"; + select md5_agg(v) = '0cc175b9c0f1b6a831c399e269772661' AS "TRUE" from (values('a'::bytea)) t(v); select md5('abc'::bytea) = '900150983cd24fb0d6963f7d28e17f72' AS "TRUE"; + select md5_agg(v) = '900150983cd24fb0d6963f7d28e17f72' AS "TRUE" from (values('abc'::bytea)) t(v); select md5('message digest'::bytea) = 'f96b697d7cb7938d525a2f31aaf161d0' AS "TRUE"; + select md5_agg(v) = 'f96b697d7cb7938d525a2f31aaf161d0' AS "TRUE" from (values('message digest'::bytea)) t(v); select md5('abcdefghijklmnopqrstuvwxyz'::bytea) = 'c3fcd3d76192e4007dfb496cca67e13b' AS "TRUE"; + select md5_agg(v) = 'c3fcd3d76192e4007dfb496cca67e13b' AS "TRUE" from (values('abcdefghijklmnopqrstuvwxyz'::bytea)) t(v); select md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'::bytea) = 'd174ab98d277d9f5a5611c2c9f419d9f' AS "TRUE"; + select md5_agg(v) = 'd174ab98d277d9f5a5611c2c9f419d9f' AS "TRUE" from (values('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'::bytea)) t(v); select md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890'::bytea) = '57edf4a22be3c955ac49da2e2107b67a' AS "TRUE"; + select md5_agg(v) = '57edf4a22be3c955ac49da2e2107b67a' AS "TRUE" from (values('12345678901234567890123456789012345678901234567890123456789012345678901234567890'::bytea)) t(v); -- -- test behavior of escape_string_warning and standard_conforming_strings options