diff --git a/src/backend/access/brin/brin_minmax_multi.c b/src/backend/access/brin/brin_minmax_multi.c index c62a3c8ba8..95b67d72e5 100644 --- a/src/backend/access/brin/brin_minmax_multi.c +++ b/src/backend/access/brin/brin_minmax_multi.c @@ -2,7 +2,7 @@ * brin_minmax_multi.c * Implementation of Multi Min/Max opclass for BRIN * - * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -21,7 +21,7 @@ * * [1000,2000] and [1000000,1000000] * - * This allow us to still eliminate the page range when the scan keys hit + * This allows us to still eliminate the page range when the scan keys hit * the gap between 2000 and 1000000, making it useful in cases when the * simple minmax opclass gets inefficient. * @@ -145,7 +145,7 @@ typedef struct MinMaxMultiOptions * * The 'values' array stores boundary values for regular ranges first (there * are 2*nranges values to store), and then the nvalues boundary values for - * single-point ranges. That is, we have (2*nranges + nvalues) boundary + * single-point ranges. That is, we have (2 * nranges + nvalues) boundary * values in the array. * * +---------------------------------+-------------------------------+ @@ -180,7 +180,7 @@ typedef struct Ranges /* * We simply add the values into a large buffer, without any expensive * steps (sorting, deduplication, ...). The buffer is a multiple of the - * target number of values, so the compaction happen less often, + * target number of values, so the compaction happens less often, * amortizing the costs. We keep the actual target and compact to the * requested number of values at the very end, before serializing to * on-disk representation. @@ -456,7 +456,7 @@ AssertCheckExpandedRanges(BrinDesc *bdesc, Oid colloid, AttrNumber attno, } /* - * And the ranges should be ordered and must nor overlap, i.e. upper < + * And the ranges should be ordered and must not overlap, i.e. upper < * lower for boundaries of consecutive ranges. */ for (i = 0; i < nranges - 1; i++) @@ -634,7 +634,7 @@ range_serialize(Ranges *range) for (i = 0; i < nvalues; i++) { /* don't forget to include the null terminator ;-) */ - len += strlen(DatumGetPointer(range->values[i])) + 1; + len += strlen(DatumGetCString(range->values[i])) + 1; } } else /* fixed-length types (even by-reference) */ @@ -670,8 +670,8 @@ range_serialize(Ranges *range) /* * For values passed by value, we need to copy just the * significant bytes - we can't use memcpy directly, as that - * assumes little endian behavior. store_att_byval does almost - * what we need, but it requires properly aligned buffer - the + * assumes little-endian behavior. store_att_byval does almost + * what we need, but it requires a properly aligned buffer - the * output buffer does not guarantee that. So we simply use a local * Datum variable (which guarantees proper alignment), and then * copy the value from it. @@ -695,9 +695,9 @@ range_serialize(Ranges *range) } else if (typlen == -2) /* cstring */ { - int tmp = strlen(DatumGetPointer(range->values[i])) + 1; + int tmp = strlen(DatumGetCString(range->values[i])) + 1; - memcpy(ptr, DatumGetPointer(range->values[i]), tmp); + memcpy(ptr, DatumGetCString(range->values[i]), tmp); ptr += tmp; } @@ -757,7 +757,7 @@ range_deserialize(int maxvalues, SerializedRanges *serialized) /* * And now deconstruct the values into Datum array. We have to copy the * data because the serialized representation ignores alignment, and we - * don't want to rely it will be kept around anyway. + * don't want to rely on it being kept around anyway. */ ptr = serialized->data; @@ -780,8 +780,10 @@ range_deserialize(int maxvalues, SerializedRanges *serialized) } else if (typlen == -2) /* cstring */ { - datalen += MAXALIGN(strlen(DatumGetPointer(ptr)) + 1); - ptr += strlen(DatumGetPointer(ptr)) + 1; + Size slen = strlen(DatumGetCString(ptr)) + 1; + + datalen += MAXALIGN(slen); + ptr += slen; } } @@ -830,7 +832,7 @@ range_deserialize(int maxvalues, SerializedRanges *serialized) memcpy(dataptr, ptr, slen); dataptr += MAXALIGN(slen); - ptr += (slen); + ptr += slen; } /* make sure we haven't overflown the buffer end */ @@ -848,9 +850,9 @@ range_deserialize(int maxvalues, SerializedRanges *serialized) * compare_expanded_ranges * Compare the expanded ranges - first by minimum, then by maximum. * - * We do guarantee that ranges in a single Range object do not overlap, - * so it may seem strange that we don't order just by minimum. But when - * merging two Ranges (which happens in the union function), the ranges + * We do guarantee that ranges in a single ExpandedRange object do not + * overlap, so it may seem strange that we don't order just by minimum. But + * when merging two Ranges (which happens in the union function), the ranges * may in fact overlap. So we do compare both. */ static int @@ -1060,9 +1062,9 @@ range_contains_value(BrinDesc *bdesc, Oid colloid, /* * There is no matching range, so let's inspect the sorted values. * - * We do a sequential search for small number of values, and binary search - * once we have more than 16 values. This threshold is somewhat arbitrary, - * as it depends on how expensive the comparison function is. + * We do a sequential search for small numbers of values, and binary + * search once we have more than 16 values. This threshold is somewhat + * arbitrary, as it depends on how expensive the comparison function is. * * XXX If we use the threshold here, maybe we should do the same thing in * has_matching_range? Or maybe we should do the bin search all the time? @@ -1312,10 +1314,10 @@ compare_distances(const void *a, const void *b) } /* - * Given an array of expanded ranges, compute distance of the gaps between - * the ranges - for ncranges there are (ncranges-1) gaps. + * Given an array of expanded ranges, compute the distance of the gaps between + * the ranges - for neranges there are (neranges - 1) gaps. * - * We simply call the "distance" function to compute the (max-min) for pairs + * We simply call the "distance" function to compute the (max - min) for pairs * of consecutive ranges. The function may be fairly expensive, so we do that * just once (and then use it to pick as many ranges to merge as possible). * @@ -1335,8 +1337,8 @@ build_distances(FmgrInfo *distanceFn, Oid colloid, distances = (DistanceValue *) palloc0(sizeof(DistanceValue) * ndistances); /* - * Walk though the ranges once and compute distance between the ranges so - * that we can sort them once. + * Walk through the ranges once and compute the distance between the + * ranges so that we can sort them once. */ for (i = 0; i < ndistances; i++) { @@ -1428,13 +1430,13 @@ count_values(ExpandedRange *cranges, int ncranges) * * Combines ranges until the number of boundary values drops below the * threshold specified by max_values. This happens by merging enough - * ranges by distance between them. + * ranges by the distance between them. * * Returns the number of result ranges. * * We simply use the global min/max and then add boundaries for enough - * largest gaps. Each gap adds 2 values, so we simply use (target/2-1) - * distances. Then we simply sort all the values - each two values are + * largest gaps. Each gap adds 2 values, so we simply use (target / 2 - 1) + * distances. Then we simply sort all the values - every two values are * a boundary of a range (possibly collapsed). * * XXX Some of the ranges may be collapsed (i.e. the min/max values are @@ -1446,7 +1448,7 @@ count_values(ExpandedRange *cranges, int ncranges) * are of equal (or very similar) length. * * Consider for example points 1, 2, 3, .., 64, which have gaps of the - * same length 1 of course. In that case we tend to pick the first + * same length 1 of course. In that case, we tend to pick the first * gap of that length, which leads to this: * * step 1: [1, 2], 3, 4, 5, .., 64 @@ -1482,7 +1484,7 @@ reduce_expanded_ranges(ExpandedRange *eranges, int neranges, int keep = (max_values / 2 - 1); /* - * Maybe we have sufficiently low number of ranges already? + * Maybe we have a sufficiently low number of ranges already? * * XXX This should happen before we actually do the expensive stuff like * sorting, so maybe this should be just an assert. @@ -1505,7 +1507,7 @@ reduce_expanded_ranges(ExpandedRange *eranges, int neranges, /* add boundary values for enough gaps */ for (i = 0; i < keep; i++) { - /* index of the gap between (index) and (index+1) ranges */ + /* index of the gap between (index) and (index + 1) ranges */ int index = distances[i].index; Assert((index >= 0) && ((index + 1) < neranges)); @@ -1527,7 +1529,7 @@ reduce_expanded_ranges(ExpandedRange *eranges, int neranges, qsort_arg(values, nvalues, sizeof(Datum), compare_values, (void *) &cxt); - /* We have nvalues boundary values, which means nvalues/2 ranges. */ + /* We have nvalues boundary values, which means nvalues / 2 ranges. */ for (i = 0; i < (nvalues / 2); i++) { eranges[i].minval = values[2 * i]; @@ -1624,8 +1626,8 @@ ensure_free_space_in_buffer(BrinDesc *bdesc, Oid colloid, * * We don't simply check against range->maxvalues again. The deduplication * might have freed very little space (e.g. just one value), forcing us to - * do deduplication very often. In that case it's better to do compaction - * and reduce more space. + * do deduplication very often. In that case, it's better to do the + * compaction and reduce more space. */ if (2 * range->nranges + range->nvalues <= range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR) return true; @@ -1711,14 +1713,14 @@ range_add_value(BrinDesc *bdesc, Oid colloid, * rule that we never have duplicates with the ranges or sorted values. * * We might also deduplicate and recheck if the value is contained, but - * that seems like an overkill. We'd need to deduplicate anyway, so why - * not do it now. + * that seems like overkill. We'd need to deduplicate anyway, so why not + * do it now. */ modified = ensure_free_space_in_buffer(bdesc, colloid, attno, attr, ranges); /* - * Bail out if the value already is covered by the range. + * Bail if the value already is covered by the range. * * We could also add values until we hit values_per_range, and then do the * deduplication in a batch, hoping for better efficiency. But that would @@ -1803,10 +1805,10 @@ compactify_ranges(BrinDesc *bdesc, Ranges *ranges, int max_values) /* * The distanceFn calls (which may internally call e.g. numeric_le) may - * allocate quite a bit of memory, and we must not leak it. Otherwise we'd - * have problems e.g. when building indexes. So we create a local memory - * context and make sure we free the memory before leaving this function - * (not after every call). + * allocate quite a bit of memory, and we must not leak it. Otherwise, + * we'd have problems e.g. when building indexes. So we create a local + * memory context and make sure we free the memory before leaving this + * function (not after every call). */ ctx = AllocSetContextCreate(CurrentMemoryContext, "minmax-multi context", @@ -1863,7 +1865,7 @@ brin_minmax_multi_opcinfo(PG_FUNCTION_ARGS) } /* - * Compute distance between two float4 values (plain subtraction). + * Compute the distance between two float4 values (plain subtraction). */ Datum brin_minmax_multi_distance_float4(PG_FUNCTION_ARGS) @@ -1881,7 +1883,7 @@ brin_minmax_multi_distance_float4(PG_FUNCTION_ARGS) } /* - * Compute distance between two float8 values (plain subtraction). + * Compute the distance between two float8 values (plain subtraction). */ Datum brin_minmax_multi_distance_float8(PG_FUNCTION_ARGS) @@ -1899,7 +1901,7 @@ brin_minmax_multi_distance_float8(PG_FUNCTION_ARGS) } /* - * Compute distance between two int2 values (plain subtraction). + * Compute the distance between two int2 values (plain subtraction). */ Datum brin_minmax_multi_distance_int2(PG_FUNCTION_ARGS) @@ -1917,7 +1919,7 @@ brin_minmax_multi_distance_int2(PG_FUNCTION_ARGS) } /* - * Compute distance between two int4 values (plain subtraction). + * Compute the distance between two int4 values (plain subtraction). */ Datum brin_minmax_multi_distance_int4(PG_FUNCTION_ARGS) @@ -1935,7 +1937,7 @@ brin_minmax_multi_distance_int4(PG_FUNCTION_ARGS) } /* - * Compute distance between two int8 values (plain subtraction). + * Compute the distance between two int8 values (plain subtraction). */ Datum brin_minmax_multi_distance_int8(PG_FUNCTION_ARGS) @@ -1953,8 +1955,8 @@ brin_minmax_multi_distance_int8(PG_FUNCTION_ARGS) } /* - * Compute distance between two tid values (by mapping them to float8 - * and then subtracting them). + * Compute the distance between two tid values (by mapping them to float8 and + * then subtracting them). */ Datum brin_minmax_multi_distance_tid(PG_FUNCTION_ARGS) @@ -1985,7 +1987,7 @@ brin_minmax_multi_distance_tid(PG_FUNCTION_ARGS) } /* - * Computes distance between two numeric values (plain subtraction). + * Compute the distance between two numeric values (plain subtraction). */ Datum brin_minmax_multi_distance_numeric(PG_FUNCTION_ARGS) @@ -2006,7 +2008,7 @@ brin_minmax_multi_distance_numeric(PG_FUNCTION_ARGS) } /* - * Computes approximate distance between two UUID values. + * Compute the approximate distance between two UUID values. * * XXX We do not need a perfectly accurate value, so we approximate the * deltas (which would have to be 128-bit integers) with a 64-bit float. @@ -2044,7 +2046,7 @@ brin_minmax_multi_distance_uuid(PG_FUNCTION_ARGS) } /* - * Compute approximate distance between two dates. + * Compute the approximate distance between two dates. */ Datum brin_minmax_multi_distance_date(PG_FUNCTION_ARGS) @@ -2059,7 +2061,7 @@ brin_minmax_multi_distance_date(PG_FUNCTION_ARGS) } /* - * Computes approximate distance between two time (without tz) values. + * Compute the approximate distance between two time (without tz) values. * * TimeADT is just an int64, so we simply subtract the values directly. */ @@ -2079,7 +2081,7 @@ brin_minmax_multi_distance_time(PG_FUNCTION_ARGS) } /* - * Computes approximate distance between two timetz values. + * Compute the approximate distance between two timetz values. * * Simply subtracts the TimeADT (int64) values embedded in TimeTzADT. */ @@ -2098,6 +2100,9 @@ brin_minmax_multi_distance_timetz(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(delta); } +/* + * Compute the distance between two timestamp values. + */ Datum brin_minmax_multi_distance_timestamp(PG_FUNCTION_ARGS) { @@ -2117,7 +2122,7 @@ brin_minmax_multi_distance_timestamp(PG_FUNCTION_ARGS) } /* - * Computes distance between two interval values. + * Compute the distance between two interval values. */ Datum brin_minmax_multi_distance_interval(PG_FUNCTION_ARGS) @@ -2175,7 +2180,7 @@ brin_minmax_multi_distance_interval(PG_FUNCTION_ARGS) } /* - * Compute distance between two pg_lsn values. + * Compute the distance between two pg_lsn values. * * LSN is just an int64 encoding position in the stream, so just subtract * those int64 values directly. @@ -2196,7 +2201,7 @@ brin_minmax_multi_distance_pg_lsn(PG_FUNCTION_ARGS) } /* - * Compute distance between two macaddr values. + * Compute the distance between two macaddr values. * * mac addresses are treated as 6 unsigned chars, so do the same thing we * already do for UUID values. @@ -2233,7 +2238,7 @@ brin_minmax_multi_distance_macaddr(PG_FUNCTION_ARGS) } /* - * Compute distance between two macaddr8 values. + * Compute the distance between two macaddr8 values. * * macaddr8 addresses are 8 unsigned chars, so do the same thing we * already do for UUID values. @@ -2276,15 +2281,15 @@ brin_minmax_multi_distance_macaddr8(PG_FUNCTION_ARGS) } /* - * Compute distance between two inet values. + * Compute the distance between two inet values. * - * The distance is defined as difference between 32-bit/128-bit values, + * The distance is defined as the difference between 32-bit/128-bit values, * depending on the IP version. The distance is computed by subtracting * the bytes and normalizing it to [0,1] range for each IP family. * Addresses from different families are considered to be in maximum * distance, which is 1.0. * - * XXX Does this need to consider the mask (bits)? For now it's ignored. + * XXX Does this need to consider the mask (bits)? For now, it's ignored. */ Datum brin_minmax_multi_distance_inet(PG_FUNCTION_ARGS) @@ -2318,7 +2323,8 @@ brin_minmax_multi_distance_inet(PG_FUNCTION_ARGS) * The length is calculated from the mask length, because we sort the * addresses by first address in the range, so A.B.C.D/24 < A.B.C.1 (the * first range starts at A.B.C.0, which is before A.B.C.1). We don't want - * to produce negative delta in this case, so we just cut the extra bytes. + * to produce a negative delta in this case, so we just cut the extra + * bytes. * * XXX Maybe this should be a bit more careful and cut the bits, not just * whole bytes. @@ -2394,11 +2400,11 @@ brin_minmax_multi_get_values(BrinDesc *bdesc, MinMaxMultiOptions *opts) } /* - * Examine the given index tuple (which contains partial status of a certain - * page range) by comparing it to the given value that comes from another heap - * tuple. If the new value is outside the min/max range specified by the - * existing tuple values, update the index tuple and return true. Otherwise, - * return false and do not modify in this case. + * Examine the given index tuple (which contains the partial status of a + * certain page range) by comparing it to the given value that comes from + * another heap tuple. If the new value is outside the min/max range + * specified by the existing tuple values, update the index tuple and return + * true. Otherwise, return false and do not modify in this case. */ Datum brin_minmax_multi_add_value(PG_FUNCTION_ARGS) @@ -2425,13 +2431,13 @@ brin_minmax_multi_add_value(PG_FUNCTION_ARGS) /* * If this is the first non-null value, we need to initialize the range - * list. Otherwise just extract the existing range list from BrinValues. + * list. Otherwise, just extract the existing range list from BrinValues. * * When starting with an empty range, we assume this is a batch mode and * we use a larger buffer. The buffer size is derived from the BRIN range - * size, number of rows per page, with some sensible min/max values. Small - * buffer would be bad for performance, but large buffer might require a - * lot of memory (because of keeping all the values). + * size, number of rows per page, with some sensible min/max values. A + * small buffer would be bad for performance, but a large buffer might + * require a lot of memory (because of keeping all the values). */ if (column->bv_allnulls) { @@ -2767,10 +2773,10 @@ brin_minmax_multi_union(PG_FUNCTION_ARGS) /* * The distanceFn calls (which may internally call e.g. numeric_le) may - * allocate quite a bit of memory, and we must not leak it. Otherwise we'd - * have problems e.g. when building indexes. So we create a local memory - * context and make sure we free the memory before leaving this function - * (not after every call). + * allocate quite a bit of memory, and we must not leak it. Otherwise, + * we'd have problems e.g. when building indexes. So we create a local + * memory context and make sure we free the memory before leaving this + * function (not after every call). */ ctx = AllocSetContextCreate(CurrentMemoryContext, "minmax-multi context", @@ -3032,19 +3038,17 @@ brin_minmax_multi_summary_out(PG_FUNCTION_ARGS) idx = 0; for (i = 0; i < ranges_deserialized->nranges; i++) { - Datum a, - b; + char *a, + *b; text *c; StringInfoData str; initStringInfo(&str); - a = FunctionCall1(&fmgrinfo, ranges_deserialized->values[idx++]); - b = FunctionCall1(&fmgrinfo, ranges_deserialized->values[idx++]); + a = OutputFunctionCall(&fmgrinfo, ranges_deserialized->values[idx++]); + b = OutputFunctionCall(&fmgrinfo, ranges_deserialized->values[idx++]); - appendStringInfo(&str, "%s ... %s", - DatumGetPointer(a), - DatumGetPointer(b)); + appendStringInfo(&str, "%s ... %s", a, b); c = cstring_to_text(str.data); @@ -3084,7 +3088,7 @@ brin_minmax_multi_summary_out(PG_FUNCTION_ARGS) a = FunctionCall1(&fmgrinfo, ranges_deserialized->values[idx++]); - appendStringInfoString(&str, DatumGetPointer(a)); + appendStringInfoString(&str, DatumGetCString(a)); b = cstring_to_text(str.data);