[PATCH] Optimize numeric comparisons and aggregations via packed-datum extraction

Started by ChenhuiMoabout 16 hours ago1 messageshackers
Jump to latest
#1ChenhuiMo
chenhuimo.mch@qq.com

Hi,

While profiling the execution of MAX(), MIN(), SUM(), AVG(), and VARIANCE() on numeric columns, I noticed that a significant amount of CPU time is spent on heap allocations and varlena detoasting. Specifically, PG_GETARG_NUMERIC() unconditionally detoasts short-header datums, which incurs continuous palloc/memcpy overhead in tight aggregation loops.

This patch introduces a fast-path optimization by directly extracting numeric fields from packed datums.

The key modifications include:

1.  cmp_numerics_packed(): A new comparison function that reads the `n_header` and payload directly from `VARDATA_ANY()` using pointer arithmetic. This avoids the standard `NUMERIC_*` macros which assume a 4-byte header structure, allowing us to safely compare 1-byte header numerics in-place.
2.  Zero-copy MIN/MAX: In `numeric_smaller` and `numeric_larger`, instead of unpacking inputs to `Numeric` and repacking them for return, the patch uses `PG_DETOAST_DATUM_PACKED` and directly returns the original `Datum` using `PG_RETURN_DATUM(PG_GETARG_DATUM(X))`.
3.  init_var_from_packed(): Extended the same direct-extraction logic to `do_numeric_accum` and `do_numeric_discard` for SUM, AVG, and VARIANCE.

I ran the built-in `numeric.sql` regression tests, and all passed without issues.

Here are the benchmark results executing on 20M rows using different precisions (NUMERIC(18,2), NUMERIC(38,2), NUMERIC(9,2)):

The DDLs and DMLs are:

drop table if exists t;
create table t(id bigserial primary key, order_date date, amount numeric(18, 2));

insert into t(order_date, amount) select
    DATE '2024-01-01' + (gs % 730),
    gs::numeric(18, 2) * gs % 998244353 * gs % 1000000007 * 133.31
from
    generate_series(1, 20000000) as gs;

vacuum analyze t;

drop table if exists t_big;
create table t_big(id bigserial primary key, order_date date, amount numeric(38, 2));

insert into t_big(order_date, amount) select
    DATE '2024-01-01' + (gs % 730),
    gs::numeric(38, 2) * gs * gs * 133.31
from
    generate_series(1, 20000000) as gs;

vacuum analyze t_big;

drop table if exists t_small;
create table t_small(id bigserial primary key, order_date date, amount numeric(9, 2));

insert into t_small(order_date, amount) select
    DATE '2024-01-01' + (gs % 730),
    (gs % 1331) * (gs % 1331) * 1.31
from
    generate_series(1, 20000000) as gs;

vacuum analyze t_small;

and the TEST CASEs are:

elect max(amount) from t;
select max(amount) from t where order_date > '2025-01-01';

select order_date, max(amount) from t group by order_date;
select order_date, max(amount) from t where order_date > '2025-01-01' group by order_date;

select min(amount) from t;
select min(amount) from t where order_date > '2025-01-01';

select order_date, min(amount) from t group by order_date;
select order_date, min(amount) from t where order_date > '2025-01-01' group by order_date;

select sum(amount) from t;
select sum(amount) from t where order_date > '2025-01-01';

select order_date, sum(amount) from t group by order_date;
select order_date, sum(amount) from t where order_date > '2025-01-01' group by order_date;

select avg(amount) from t;
select avg(amount) from t where order_date > '2025-01-01';

select order_date, avg(amount) from t group by order_date;
select order_date, avg(amount) from t where order_date > '2025-01-01' group by order_date;

select variance(amount) from t;
select variance(amount) from t where order_date > '2025-01-01';

select order_date, variance(amount) from t group by order_date;
select order_date, variance(amount) from t where order_date > '2025-01-01' group by order_date;

select max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t;
select max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t where order_date > '2025-01-01';

select order_date, max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t group by order_date;
select order_date, max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t where order_date > '2025-01-01' group by order_date;

select max(amount) from t_big;
select max(amount) from t_big where order_date > '2025-01-01';

select order_date, max(amount) from t_big group by order_date;
select order_date, max(amount) from t_big where order_date > '2025-01-01' group by order_date;

select min(amount) from t_big;
select min(amount) from t_big where order_date > '2025-01-01';

select order_date, min(amount) from t_big group by order_date;
select order_date, min(amount) from t_big where order_date > '2025-01-01' group by order_date;

select sum(amount) from t_big;
select sum(amount) from t_big where order_date > '2025-01-01';

select order_date, sum(amount) from t_big group by order_date;
select order_date, sum(amount) from t_big where order_date > '2025-01-01' group by order_date;

select avg(amount) from t_big;
select avg(amount) from t_big where order_date > '2025-01-01';

select order_date, avg(amount) from t_big group by order_date;
select order_date, avg(amount) from t_big where order_date > '2025-01-01' group by order_date;

select variance(amount) from t_big;
select variance(amount) from t_big where order_date > '2025-01-01';

select order_date, variance(amount) from t_big group by order_date;
select order_date, variance(amount) from t_big where order_date > '2025-01-01' group by order_date;

select max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_big;
select max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_big where order_date > '2025-01-01';

select order_date, max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_big group by order_date;
select order_date, max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_big where order_date > '2025-01-01' group by order_date;

select max(amount) from t_small;
select max(amount) from t_small where order_date > '2025-01-01';

select order_date, max(amount) from t_small group by order_date;
select order_date, max(amount) from t_small where order_date > '2025-01-01' group by order_date;

select min(amount) from t_small;
select min(amount) from t_small where order_date > '2025-01-01';

select order_date, min(amount) from t_small group by order_date;
select order_date, min(amount) from t_small where order_date > '2025-01-01' group by order_date;

select sum(amount) from t_small;
select sum(amount) from t_small where order_date > '2025-01-01';

select order_date, sum(amount) from t_small group by order_date;
select order_date, sum(amount) from t_small where order_date > '2025-01-01' group by order_date;

select avg(amount) from t_small;
select avg(amount) from t_small where order_date > '2025-01-01';

select order_date, avg(amount) from t_small group by order_date;
select order_date, avg(amount) from t_small where order_date > '2025-01-01' group by order_date;

select variance(amount) from t_small;
select variance(amount) from t_small where order_date > '2025-01-01';

select order_date, variance(amount) from t_small group by order_date;
select order_date, variance(amount) from t_small where order_date > '2025-01-01' group by order_date;

select max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_small;
select max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_small where order_date > '2025-01-01';

select order_date, max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_small group by order_date;
select order_date, max(amount), min(amount), sum(amount), avg(amount), variance(amount) from t_small where order_date > '2025-01-01' group by order_date;

And I ran the test cases on my machine (12th Gen Intel(R) Core(TM) i9-12900H (2.50 GHz), 64GB RAM), better performance shows below:

# ====== DATASET: t (20M rows, NUMERIC(18,2)) ======
# ---- Baseline (All optimizations reverted) ----
# MAX queries
select max(amount) from t;                                                          Time: 980.844 ms
select max(amount) from t where order_date > '2025-01-01';                           Time: 406.842 ms
select order_date, max(amount) from t group by order_date;                           Time: 952.209 ms
select order_date, max(amount) from t where order_date > '2025-01-01' group by ...;  Time: 571.053 ms

# MIN queries
select min(amount) from t;                                                          Time: 567.840 ms
select min(amount) from t where order_date > '2025-01-01';                           Time: 405.364 ms
select order_date, min(amount) from t group by order_date;                           Time: 935.274 ms
select order_date, min(amount) from t where order_date > '2025-01-01' group by ...;  Time: 546.571 ms

# SUM queries
select sum(amount) from t;                                                           Time: 562.059 ms
select sum(amount) from t where order_date > '2025-01-01';                           Time: 400.519 ms
select order_date, sum(amount) from t group by order_date;                           Time: 920.640 ms
select order_date, sum(amount) from t where order_date > '2025-01-01' group by ...;  Time: 543.760 ms

# AVG queries
select avg(amount) from t;                                                           Time: 566.719 ms
select avg(amount) from t where order_date > '2025-01-01';                           Time: 404.249 ms
select order_date, avg(amount) from t group by order_date;                           Time: 922.547 ms
select order_date, avg(amount) from t where order_date > '2025-01-01' group by ...;  Time: 534.897 ms

# VARIANCE queries
select variance(amount) from t;                                                      Time: 699.547 ms
select variance(amount) from t where order_date > '2025-01-01';                      Time: 479.679 ms
select order_date, variance(amount) from t group by order_date;                      Time: 1081.712 ms
select order_date, variance(amount) from t where order_date > '2025-01-01' group by  Time: 620.472 ms

# COMBINED queries
select max, min, sum, avg, variance from t;                                          Time: 1686.727 ms
select max, min, sum, avg, variance from t where order_date > '2025-01-01';          Time: 971.959 ms
select order_date, max, min, sum, avg, variance from t group by order_date;          Time: 2206.506 ms
select order_date, max, min, sum... from t where order_date > '2025-01-01'...        Time: 1155.119 ms

# ---- Optimized (packed-datum extraction for MAX, MIN, SUM, AVG, VARIANCE) ----
# MAX queries
select max(amount) from t;                                                          Time: 297.890 ms  (-70%)
select max(amount) from t where order_date > '2025-01-01';                           Time: 261.415 ms  (-36%)
select order_date, max(amount) from t group by order_date;                           Time: 702.121 ms  (-26%)
select order_date, max(amount) from t where order_date > '2025-01-01' group by ...;  Time: 407.533 ms  (-29%)

# MIN queries
select min(amount) from t;                                                          Time: 301.846 ms  (-47%)
select min(amount) from t where order_date > '2025-01-01';                           Time: 266.653 ms  (-34%)
select order_date, min(amount) from t group by order_date;                           Time: 681.805 ms  (-27%)
select order_date, min(amount) from t where order_date > '2025-01-01' group by ...;  Time: 413.830 ms  (-24%)

# SUM queries
select sum(amount) from t;                                                           Time: 300.850 ms  (-46%)
select sum(amount) from t where order_date > '2025-01-01';                           Time: 282.615 ms  (-29%)
select order_date, sum(amount) from t group by order_date;                           Time: 765.818 ms  (-17%)
select order_date, sum(amount) from t where order_date > '2025-01-01' group by ...;  Time: 463.414 ms  (-15%)

# AVG queries
select avg(amount) from t;                                                           Time: 327.844 ms  (-42%)
select avg(amount) from t where order_date > '2025-01-01';                           Time: 365.507 ms  (-10%)
select order_date, avg(amount) from t group by order_date;                           Time: 650.977 ms  (-29%)
select order_date, avg(amount) from t where order_date > '2025-01-01' group by ...;  Time: 430.043 ms  (-20%)

# VARIANCE queries
select variance(amount) from t;                                                      Time: 505.815 ms  (-28%)
select variance(amount) from t where order_date > '2025-01-01';                      Time: 403.832 ms  (-16%)
select order_date, variance(amount) from t group by order_date;                      Time: 901.068 ms  (-17%)
select order_date, variance(amount) from t where order_date > '2025-01-01' group by  Time: 561.622 ms  (-9%)

# COMBINED queries
select max, min, sum, avg, variance from t;                                          Time: 804.479 ms  (-52%)
select max, min, sum, avg, variance from t where order_date > '2025-01-01';          Time: 530.819 ms  (-45%)
select order_date, max, min, sum, avg, variance from t group by order_date;          Time: 1241.194 ms (-44%)
select order_date, max, min, sum... from t where order_date > '2025-01-01'...        Time: 741.431 ms  (-36%)

# ====== DATASET: t_big (20M rows, NUMERIC(38,2)) ======
# ---- Baseline (All optimizations reverted) ----
# MAX queries
select max(amount) from t_big;                                                      Time: 1143.060 ms
select max(amount) from t... where order_date > '2025-01-01';                       Time: 462.994 ms
select order_date, max(amount) from t_big group by order_date;                      Time: 1032.899 ms
select order_date, max... from t... where order_date > '2025-01-01' group by ...;   Time: 556.832 ms

# MIN queries
select min(amount) from t_big;                                                      Time: 534.970 ms
select min(amount) from t... where order_date > '2025-01-01';                       Time: 412.798 ms
select order_date, min(amount) from t_big group by order_date;                      Time: 923.593 ms
select order_date, min... from t... where order_date > '2025-01-01' group by ...;   Time: 565.523 ms

# SUM queries
select sum(amount) from t_big;                                                      Time: 611.950 ms
select sum(amount) from t... where order_date > '2025-01-01';                       Time: 476.377 ms
select order_date, sum(amount) from t_big group by order_date;                      Time: 1009.434 ms
select order_date, sum... from t... where order_date > '2025-01-01' group by ...;   Time: 589.432 ms

# AVG queries
select avg(amount) from t_big;                                                      Time: 601.082 ms
select avg(amount) from t... where order_date > '2025-01-01';                       Time: 450.408 ms
select order_date, avg(amount) from t_big group by order_date;                      Time: 1038.965 ms
select order_date, avg... from t... where order_date > '2025-01-01' group by ...;   Time: 670.555 ms

# VARIANCE queries
select variance(amount) from t_big;                                                 Time: 1019.580 ms
select variance(amount) from t... where order_date > '2025-01-01';                  Time: 658.554 ms
select order_date, variance(amount) from t_big group by order_date;                 Time: 1366.489 ms
select order_date, variance... from t... where order_date > '2025-01-01' group by   Time: 766.608 ms

# COMBINED queries
select max, min, sum, avg, variance from t_big;                                     Time: 1797.785 ms
select max, min, sum, avg, variance from t... where order_date > '2025-01-01';      Time: 1064.796 ms
select order_date, max, min, sum, avg, variance from t_big group by order_date;     Time: 2254.985 ms
select order_date, max, min, sum... from t... where order_date > '2025-01-01'...    Time: 1189.765 ms

# ---- Optimized (packed-datum extraction for MAX, MIN, SUM, AVG, VARIANCE) ----
# MAX queries
select max(amount) from t_big;                                                      Time: 457.682 ms  (-60%)
select max(amount) from t... where order_date > '2025-01-01';                       Time: 342.199 ms  (-26%)
select order_date, max(amount) from t_big group by order_date;                      Time: 777.064 ms  (-25%)
select order_date, max... from t... where order_date > '2025-01-01' group by ...;   Time: 463.163 ms  (-17%)

# MIN queries
select min(amount) from t_big;                                                      Time: 328.795 ms  (-39%)
select min(amount) from t... where order_date > '2025-01-01';                       Time: 305.936 ms  (-26%)
select order_date, min(amount) from t_big group by order_date;                      Time: 693.899 ms  (-25%)
select order_date, min... from t... where order_date > '2025-01-01' group by ...;   Time: 456.664 ms  (-19%)

# SUM queries
select sum(amount) from t_big;                                                      Time: 329.647 ms  (-46%)
select sum(amount) from t... where order_date > '2025-01-01';                       Time: 311.748 ms  (-35%)
select order_date, sum(amount) from t_big group by order_date;                      Time: 703.230 ms  (-30%)
select order_date, sum... from t... where order_date > '2025-01-01' group by ...;   Time: 473.354 ms  (-20%)

# AVG queries
select avg(amount) from t_big;                                                      Time: 357.884 ms  (-40%)
select avg(amount) from t... where order_date > '2025-01-01';                       Time: 381.753 ms  (-15%)
select order_date, avg(amount) from t_big group by order_date;                      Time: 707.939 ms  (-32%)
select order_date, avg... from t... where order_date > '2025-01-01' group by ...;   Time: 489.593 ms  (-27%)

# VARIANCE queries
select variance(amount) from t_big;                                                 Time: 723.205 ms  (-29%)
select variance(amount) from t... where order_date > '2025-01-01';                  Time: 481.065 ms  (-27%)
select order_date, variance(amount) from t_big group by order_date;                 Time: 1273.396 ms (-7%)
select order_date, variance... from t... where order_date > '2025-01-01' group by   Time: 692.473 ms  (-10%)

# COMBINED queries
select max, min, sum, avg, variance from t_big;                                     Time: 1144.183 ms (-36%)
select max, min, sum, avg, variance from t... where order_date > '2025-01-01';      Time: 735.677 ms  (-31%)
select order_date, max, min, sum, avg, variance from t_big group by order_date;     Time: 1603.737 ms (-29%)
select order_date, max, min, sum... from t... where order_date > '2025-01-01'...    Time: 869.972 ms  (-27%)

# ====== DATASET: t_small (20M rows, NUMERIC(9,2)) ======
# ---- Baseline (All optimizations reverted) ----
# MAX queries
select max(amount) from t_small;                                                    Time: 919.394 ms
select max(amount) from t... where order_date > '2025-01-01';                       Time: 414.951 ms
select order_date, max(amount) from t_small group by order_date;                    Time: 939.192 ms
select order_date, max... from t... where order_date > '2025-01-01' group by ...;   Time: 548.936 ms

# MIN queries
select min(amount) from t_small;                                                    Time: 566.213 ms
select min(amount) from t... where order_date > '2025-01-01';                       Time: 468.580 ms
select order_date, min(amount) from t_small group by order_date;                    Time: 973.943 ms
select order_date, min... from t... where order_date > '2025-01-01' group by ...;   Time: 536.868 ms

# SUM queries
select sum(amount) from t_small;                                                    Time: 549.539 ms
select sum(amount) from t... where order_date > '2025-01-01';                       Time: 405.480 ms
select order_date, sum(amount) from t_small group by order_date;                    Time: 970.394 ms
select order_date, sum... from t... where order_date > '2025-01-01' group by ...;   Time: 521.120 ms

# AVG queries
select avg(amount) from t_small;                                                    Time: 557.897 ms
select avg(amount) from t... where order_date > '2025-01-01';                       Time: 417.120 ms
select order_date, avg(amount) from t_small group by order_date;                    Time: 937.021 ms
select order_date, avg... from t... where order_date > '2025-01-01' group by ...;   Time: 552.019 ms

# VARIANCE queries
select variance(amount) from t_small;                                               Time: 655.459 ms
select variance(amount) from t... where order_date > '2025-01-01';                  Time: 494.058 ms
select order_date, variance(amount) from t_small group by order_date;               Time: 1051.335 ms
select order_date, variance... from t... where order_date > '2025-01-01' group by   Time: 606.667 ms

# COMBINED queries
select max, min, sum, avg, variance from t_small;                                   Time: 1489.296 ms
select max, min, sum, avg, variance from t... where order_date > '2025-01-01';      Time: 892.727 ms
select order_date, max, min, sum, avg, variance from t_small group by order_date;   Time: 1940.004 ms
select order_date, max, min, sum... from t... where order_date > '2025-01-01'...    Time: 1041.348 ms

# ---- Optimized (packed-datum extraction for MAX, MIN, SUM, AVG, VARIANCE) ----
# MAX queries
select max(amount) from t_small;                                                    Time: 332.218 ms  (-64%)
select max(amount) from t... where order_date > '2025-01-01';                       Time: 293.562 ms  (-29%)
select order_date, max(amount) from t_small group by order_date;                    Time: 670.589 ms  (-29%)
select order_date, max... from t... where order_date > '2025-01-01' group by ...;   Time: 485.264 ms  (-12%)

# MIN queries
select min(amount) from t_small;                                                    Time: 303.559 ms  (-46%)
select min(amount) from t... where order_date > '2025-01-01';                       Time: 330.970 ms  (-29%)
select order_date, min(amount) from t_small group by order_date;                    Time: 726.441 ms  (-25%)
select order_date, min... from t... where order_date > '2025-01-01' group by ...;   Time: 455.427 ms  (-15%)

# SUM queries
select sum(amount) from t_small;                                                    Time: 331.040 ms  (-40%)
select sum(amount) from t... where order_date > '2025-01-01';                       Time: 354.306 ms  (-13%)
select order_date, sum(amount) from t_small group by order_date;                    Time: 731.451 ms  (-25%)
select order_date, sum... from t... where order_date > '2025-01-01' group by ...;   Time: 479.442 ms  (-8%)

# AVG queries
select avg(amount) from t_small;                                                    Time: 397.363 ms  (-29%)
select avg(amount) from t... where order_date > '2025-01-01';                       Time: 325.047 ms  (-22%)
select order_date, avg(amount) from t_small group by order_date;                    Time: 750.833 ms  (-20%)
select order_date, avg... from t... where order_date > '2025-01-01' group by ...;   Time: 547.354 ms  (-1%)

# VARIANCE queries
select variance(amount) from t_small;                                               Time: 504.897 ms  (-23%)
select variance(amount) from t... where order_date > '2025-01-01';                  Time: 405.917 ms  (-18%)
select order_date, variance(amount) from t_small group by order_date;               Time: 871.387 ms  (-17%)
select order_date, variance... from t... where order_date > '2025-01-01' group by   Time: 542.256 ms  (-11%)

# COMBINED queries
select max, min, sum, avg, variance from t_small;                                   Time: 753.569 ms  (-49%)
select max, min, sum, avg, variance from t... where order_date > '2025-01-01';      Time: 524.862 ms  (-41%)
select order_date, max, min, sum, avg, variance from t_small group by order_date;   Time: 1228.248 ms (-37%)
select order_date, max, min, sum... from t... where order_date > '2025-01-01'...    Time: 724.252 ms  (-30%)

Feedback and review welcome.

--
Regards, Chenhui Mo,
pgEdge

Attachments:

0001-numeric_opt.patchapplication/octet-stream; charset=utf-8; name=0001-numeric_opt.patchDownload+222-29