From cc446f4b90c9d17c9652a34ed3f509a8b648675a Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Mon, 12 Jan 2026 15:40:27 +0900 Subject: [PATCH] Row pattern recognition: Enhance grammar and AST structure for pattern definitions --- src/backend/executor/nodeWindowAgg.c | 330 +++++++---- src/backend/optimizer/plan/createplan.c | 16 +- src/backend/parser/Makefile | 1 + src/backend/parser/gram.y | 348 ++++++++++- src/backend/parser/parse_clause.c | 262 +-------- src/backend/parser/parse_rpr.c | 749 ++++++++++++++++++++++++ src/backend/parser/scan.l | 4 +- src/backend/utils/adt/ruleutils.c | 125 +++- src/include/nodes/execnodes.h | 5 +- src/include/nodes/parsenodes.h | 47 +- src/include/nodes/plannodes.h | 10 +- src/include/parser/parse_clause.h | 3 + src/include/parser/parse_rpr.h | 22 + src/test/regress/expected/rpr.out | 654 ++++++++++++++++++++- src/test/regress/sql/rpr.sql | 281 +++++++++ 15 files changed, 2404 insertions(+), 453 deletions(-) create mode 100644 src/backend/parser/parse_rpr.c create mode 100644 src/include/parser/parse_rpr.h diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 88a1f2bd46e..c4509dabf74 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -304,6 +304,12 @@ static int variable_pos_fetch(VariablePos *variable_pos, char initial, int index); static VariablePos *variable_pos_build(WindowAggState *winstate); +static int count_pattern_vars(RPRPatternNode *node); +static List *get_pattern_var_names(RPRPatternNode *node); +static void build_pattern_str(WindowAggState *winstate, RPRPatternNode *node, + StringInfo pattern_str, VariablePos *variable_pos, + int *initial_index); + static void check_rpr_navigation(Node *node, bool is_prev); static bool rpr_navigation_walker(Node *node, void *context); @@ -2986,8 +2992,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) /* Set up SKIP TO type */ winstate->rpSkipTo = node->rpSkipTo; /* Set up row pattern recognition PATTERN clause */ - winstate->patternVariableList = node->patternVariable; - winstate->patternRegexpList = node->patternRegexp; + winstate->rpPattern = node->rpPattern; /* Set up row pattern recognition DEFINE clause */ winstate->defineInitial = node->defineInitial; @@ -4541,7 +4546,7 @@ static bool rpr_is_defined(WindowAggState *winstate) { - return winstate->patternVariableList != NIL; + return winstate->rpPattern != NULL; } /* @@ -4721,15 +4726,14 @@ void update_reduced_frame(WindowObject winobj, int64 pos) { WindowAggState *winstate = winobj->winstate; - ListCell *lc1, - *lc2; + ListCell *lc; + List *pattern_var_names; bool expression_result; int num_matched_rows; int64 original_pos; bool anymatch; StringInfo encoded_str; StringSet *str_set; - bool greedy = false; int64 result_pos, i; int init_size; @@ -4743,6 +4747,9 @@ update_reduced_frame(WindowObject winobj, int64 pos) /* initialize pattern variables set */ str_set = string_set_init(); + /* get pattern variable names from rpPattern */ + pattern_var_names = get_pattern_var_names(winstate->rpPattern); + /* save original pos */ original_pos = pos; @@ -4750,74 +4757,12 @@ update_reduced_frame(WindowObject winobj, int64 pos) * Calculate initial memory allocation size from the number of pattern * variables, */ - init_size = sizeof(char) * list_length(winstate->patternVariableList) + 1; - - /* - * Check if the pattern does not include any greedy quantifier. If it does - * not, we can just apply the pattern to each row. If it succeeds, we are - * done. - */ - foreach(lc1, winstate->patternRegexpList) - { - char *quantifier = strVal(lfirst(lc1)); - - if (*quantifier == '+' || *quantifier == '*' || *quantifier == '?') - { - greedy = true; - break; - } - } + init_size = sizeof(char) * count_pattern_vars(winstate->rpPattern) + 1; /* - * Non greedy case - */ - if (!greedy) - { - num_matched_rows = 0; - encoded_str = makeStringInfoExt(init_size); - - foreach(lc1, winstate->patternVariableList) - { - char *vname = strVal(lfirst(lc1)); -#ifdef RPR_DEBUG - elog(DEBUG1, "pos: " INT64_FORMAT " pattern vname: %s", - pos, vname); -#endif - expression_result = false; - - /* evaluate row pattern against current row */ - result_pos = evaluate_pattern(winobj, pos, vname, - encoded_str, &expression_result); - if (!expression_result || result_pos < 0) - { -#ifdef RPR_DEBUG - elog(DEBUG1, "expression result is false or out of frame"); -#endif - register_reduced_frame_map(winstate, original_pos, - RF_UNMATCHED); - destroyStringInfo(encoded_str); - return; - } - /* move to next row */ - pos++; - num_matched_rows++; - } - destroyStringInfo(encoded_str); -#ifdef RPR_DEBUG - elog(DEBUG1, "pattern matched"); -#endif - register_reduced_frame_map(winstate, original_pos, RF_FRAME_HEAD); - - for (i = original_pos + 1; i < original_pos + num_matched_rows; i++) - { - register_reduced_frame_map(winstate, i, RF_SKIPPED); - } - return; - } - - /* - * Greedy quantifiers included. Loop over until none of pattern matches or - * encounters end of frame. + * Loop over until none of pattern matches or encounters end of frame. + * Build encoded_str representing which variables matched each row, + * then use regex to find the match. */ for (;;) { @@ -4830,15 +4775,12 @@ update_reduced_frame(WindowObject winobj, int64 pos) encoded_str = makeStringInfoExt(init_size); - forboth(lc1, winstate->patternVariableList, lc2, - winstate->patternRegexpList) + foreach(lc, pattern_var_names) { - char *vname = strVal(lfirst(lc1)); + char *vname = strVal(lfirst(lc)); #ifdef RPR_DEBUG - char *quantifier = strVal(lfirst(lc2)); - - elog(DEBUG1, "pos: " INT64_FORMAT " pattern vname: %s quantifier: %s", - pos, vname, quantifier); + elog(DEBUG1, "pos: " INT64_FORMAT " pattern vname: %s", + pos, vname); #endif expression_result = false; @@ -5205,15 +5147,18 @@ add_pattern(WindowAggState *winstate, StringInfo old, int old_info, /* * Adhoc optimization. If the first letter in the input string is in the - * head and second position and there's no associated quatifier '+', then - * we can dicard the input because there's no chance to expand the string + * head and second position and there's no associated quantifier, then + * we can discard the input because there's no chance to expand the string * further. * * For example, pattern "abc" cannot match "aa". + * But "a+" or "a*" or "a{n}" can match "aa". */ if (pattern[1] == new->data[0] && pattern[1] == new->data[1] && pattern[2] != '+' && + pattern[2] != '*' && + pattern[2] != '{' && pattern[1] != pattern[2]) { destroyStringInfo(new); @@ -5915,6 +5860,201 @@ variable_pos_fetch(VariablePos *variable_pos, char initial, int index) return variable_pos[pos].pos[index]; } +/* + * count_pattern_vars + * + * Count the number of VAR nodes in RPRPatternNode tree. + */ +static int +count_pattern_vars(RPRPatternNode *node) +{ + ListCell *lc; + int count = 0; + + if (node == NULL) + return 0; + + switch (node->nodeType) + { + case RPR_PATTERN_VAR: + return 1; + + case RPR_PATTERN_SEQ: + case RPR_PATTERN_ALT: + case RPR_PATTERN_GROUP: + foreach(lc, node->children) + { + count += count_pattern_vars((RPRPatternNode *) lfirst(lc)); + } + return count; + } + return 0; +} + +/* + * get_pattern_var_names + * + * Get list of variable names from RPRPatternNode tree (in order). + * Returns List of String nodes. + */ +static List * +get_pattern_var_names(RPRPatternNode *node) +{ + ListCell *lc; + List *result = NIL; + + if (node == NULL) + return NIL; + + switch (node->nodeType) + { + case RPR_PATTERN_VAR: + return list_make1(makeString(pstrdup(node->varName))); + + case RPR_PATTERN_SEQ: + case RPR_PATTERN_ALT: + case RPR_PATTERN_GROUP: + foreach(lc, node->children) + { + result = list_concat(result, + get_pattern_var_names((RPRPatternNode *) lfirst(lc))); + } + return result; + } + return NIL; +} + +/* + * build_pattern_str + * + * Recursively traverse RPRPatternNode tree and build pattern string. + * Also registers variable positions. + */ +static void +build_pattern_str(WindowAggState *winstate, RPRPatternNode *node, + StringInfo pattern_str, VariablePos *variable_pos, + int *initial_index) +{ + ListCell *lc; + char initial; + + if (node == NULL) + return; + + switch (node->nodeType) + { + case RPR_PATTERN_VAR: + /* Check for unsupported RELUCTANT quantifier */ + if (node->reluctant) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("RELUCTANT quantifier is not yet supported"), + errhint("Use greedy quantifiers (*, +, ?) instead."))); + + /* Get initial character for this variable */ + initial = pattern_initial(winstate, node->varName); + Assert(initial != 0); + + /* Add quantifier if any */ + if (node->min == 0 && node->max == INT_MAX) + { + /* A* */ + appendStringInfoChar(pattern_str, initial); + appendStringInfoChar(pattern_str, '*'); + } + else if (node->min == 1 && node->max == INT_MAX) + { + /* A+ */ + appendStringInfoChar(pattern_str, initial); + appendStringInfoChar(pattern_str, '+'); + } + else if (node->min == 0 && node->max == 1) + { + /* A? */ + appendStringInfoChar(pattern_str, initial); + appendStringInfoChar(pattern_str, '?'); + } + else if (node->min == node->max) + { + /* A{n} */ + appendStringInfoChar(pattern_str, initial); + appendStringInfo(pattern_str, "{%d}", node->min); + } + else if (node->max == INT_MAX) + { + /* A{n,} */ + appendStringInfoChar(pattern_str, initial); + appendStringInfo(pattern_str, "{%d,}", node->min); + } + else + { + /* A{n,m} */ + appendStringInfoChar(pattern_str, initial); + appendStringInfo(pattern_str, "{%d,%d}", node->min, node->max); + } + + /* Register position */ + variable_pos_register(variable_pos, initial, *initial_index); + (*initial_index)++; + break; + + case RPR_PATTERN_SEQ: + /* Process children in sequence */ + foreach(lc, node->children) + { + build_pattern_str(winstate, (RPRPatternNode *) lfirst(lc), + pattern_str, variable_pos, initial_index); + } + break; + + case RPR_PATTERN_ALT: + /* Generate alternation pattern: (A|B|C) */ + appendStringInfoChar(pattern_str, '('); + foreach(lc, node->children) + { + if (lc != list_head(node->children)) + appendStringInfoChar(pattern_str, '|'); + build_pattern_str(winstate, (RPRPatternNode *) lfirst(lc), + pattern_str, variable_pos, initial_index); + } + appendStringInfoChar(pattern_str, ')'); + break; + + case RPR_PATTERN_GROUP: + /* Generate group pattern with quantifier: (...)+ or (...)* etc */ + appendStringInfoChar(pattern_str, '('); + foreach(lc, node->children) + { + build_pattern_str(winstate, (RPRPatternNode *) lfirst(lc), + pattern_str, variable_pos, initial_index); + } + appendStringInfoChar(pattern_str, ')'); + + /* Add quantifier if not {1,1} */ + if (node->min == 0 && node->max == INT_MAX) + appendStringInfoChar(pattern_str, '*'); + else if (node->min == 1 && node->max == INT_MAX) + appendStringInfoChar(pattern_str, '+'); + else if (node->min == 0 && node->max == 1) + appendStringInfoChar(pattern_str, '?'); + else if (node->min == node->max && node->min > 1) + { + /* {n} - fixed repetition */ + appendStringInfo(pattern_str, "{%d}", node->min); + } + else if (node->min != 1 || node->max != 1) + { + /* {n,m} - range repetition */ + if (node->max == INT_MAX) + appendStringInfo(pattern_str, "{%d,}", node->min); + else + appendStringInfo(pattern_str, "{%d,%d}", node->min, node->max); + } + /* else {1,1} - no quantifier needed */ + break; + } +} + /* * variable_pos_build * @@ -5927,36 +6067,14 @@ variable_pos_build(WindowAggState *winstate) VariablePos *variable_pos; StringInfo pattern_str; int initial_index = 0; - ListCell *lc1, - *lc2; variable_pos = winstate->variable_pos = variable_pos_init(); pattern_str = winstate->pattern_str = makeStringInfo(); appendStringInfoChar(pattern_str, '^'); - forboth(lc1, winstate->patternVariableList, - lc2, winstate->patternRegexpList) - { - char *vname = strVal(lfirst(lc1)); - char *quantifier = strVal(lfirst(lc2)); - char initial; - - initial = pattern_initial(winstate, vname); - Assert(initial != 0); - appendStringInfoChar(pattern_str, initial); - if (quantifier[0]) - appendStringInfoChar(pattern_str, quantifier[0]); - - /* - * Register the initial at initial_index. If the initial appears more - * than once, all of it's initial_index will be recorded. This could - * happen if a pattern variable appears in the PATTERN clause more - * than once like "UP DOWN UP" "UP UP UP". - */ - variable_pos_register(variable_pos, initial, initial_index); - - initial_index++; - } + /* Build pattern string from RPRPatternNode tree */ + build_pattern_str(winstate, winstate->rpPattern, + pattern_str, variable_pos, &initial_index); return variable_pos; } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index fb779c84403..5dee13cbdf0 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -289,8 +289,9 @@ static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators, static WindowAgg *make_windowagg(List *tlist, WindowClause *wc, int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, - List *runCondition, RPSkipTo rpSkipTo, List *patternVariable, - List *patternRegexp, List *defineClause, List *defineInitial, + List *runCondition, RPSkipTo rpSkipTo, + RPRPatternNode *rpPattern, + List *defineClause, List *defineInitial, List *qual, bool topWindow, Plan *lefttree); static Group *make_group(List *tlist, List *qual, int numGroupCols, @@ -2545,8 +2546,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) ordCollations, best_path->runCondition, wc->rpSkipTo, - wc->patternVariable, - wc->patternRegexp, + wc->rpPattern, wc->defineClause, wc->defineInitial, best_path->qual, @@ -6611,8 +6611,9 @@ static WindowAgg * make_windowagg(List *tlist, WindowClause *wc, int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, - List *runCondition, RPSkipTo rpSkipTo, List *patternVariable, - List *patternRegexp, List *defineClause, List *defineInitial, + List *runCondition, RPSkipTo rpSkipTo, + RPRPatternNode *rpPattern, + List *defineClause, List *defineInitial, List *qual, bool topWindow, Plan *lefttree) { WindowAgg *node = makeNode(WindowAgg); @@ -6641,8 +6642,7 @@ make_windowagg(List *tlist, WindowClause *wc, node->inRangeNullsFirst = wc->inRangeNullsFirst; node->topWindow = topWindow; node->rpSkipTo = rpSkipTo; - node->patternVariable = patternVariable; - node->patternRegexp = patternRegexp; + node->rpPattern = rpPattern; node->defineClause = defineClause; node->defineInitial = defineInitial; diff --git a/src/backend/parser/Makefile b/src/backend/parser/Makefile index 8c0fe28d63f..f4c7d605fe3 100644 --- a/src/backend/parser/Makefile +++ b/src/backend/parser/Makefile @@ -29,6 +29,7 @@ OBJS = \ parse_oper.o \ parse_param.o \ parse_relation.o \ + parse_rpr.o \ parse_target.o \ parse_type.o \ parse_utilcmd.o \ diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 99c27dc178c..b2f195a6f67 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -684,9 +684,10 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type row_pattern_definition %type opt_row_pattern_common_syntax - row_pattern_term + row_pattern row_pattern_alt row_pattern_seq + row_pattern_term row_pattern_primary + row_pattern_quantifier_opt %type row_pattern_definition_list - row_pattern %type opt_row_pattern_skip_to %type opt_row_pattern_initial_or_seek @@ -900,7 +901,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %nonassoc IDENT PARTITION RANGE ROWS GROUPS PRECEDING FOLLOWING CUBE ROLLUP SET KEYS OBJECT_P SCALAR VALUE_P WITH WITHOUT PATH AFTER INITIAL SEEK PATTERN_P -%left Op OPERATOR /* multi-character ops and user-defined operators */ +%left Op OPERATOR '|' /* multi-character ops and user-defined operators */ %left '+' '-' %left '*' '/' '%' %left '^' @@ -16833,7 +16834,7 @@ opt_row_pattern_skip_to opt_row_pattern_initial_or_seek RPCommonSyntax *n = makeNode(RPCommonSyntax); n->rpSkipTo = $1; n->initial = $2; - n->rpPatterns = $5; + n->rpPattern = (RPRPatternNode *) $5; n->rpDefs = $8; $$ = (Node *) n; } @@ -16869,28 +16870,336 @@ opt_row_pattern_initial_or_seek: ; row_pattern: - row_pattern_term { $$ = list_make1($1); } - | row_pattern row_pattern_term { $$ = lappend($1, $2); } + row_pattern_alt { $$ = $1; } + ; + +row_pattern_alt: + row_pattern_seq { $$ = $1; } + | row_pattern_alt '|' row_pattern_seq + { + RPRPatternNode *n; + /* If left side is already ALT, append to it */ + if (IsA($1, RPRPatternNode) && + ((RPRPatternNode *) $1)->nodeType == RPR_PATTERN_ALT) + { + n = (RPRPatternNode *) $1; + n->children = lappend(n->children, $3); + $$ = (Node *) n; + } + else + { + n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_ALT; + n->children = list_make2($1, $3); + n->min = 1; + n->max = 1; + n->location = @1; + $$ = (Node *) n; + } + } + ; + +row_pattern_seq: + row_pattern_term { $$ = $1; } + | row_pattern_seq row_pattern_term + { + RPRPatternNode *n; + /* If left side is already SEQ, append to it */ + if (IsA($1, RPRPatternNode) && + ((RPRPatternNode *) $1)->nodeType == RPR_PATTERN_SEQ) + { + n = (RPRPatternNode *) $1; + n->children = lappend(n->children, $2); + $$ = (Node *) n; + } + else + { + n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_SEQ; + n->children = list_make2($1, $2); + n->min = 1; + n->max = 1; + n->location = @1; + $$ = (Node *) n; + } + } ; row_pattern_term: - ColId { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "", (Node *)makeString($1), NULL, @1); } - | ColId '*' { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "*", (Node *)makeString($1), NULL, @1); } - | ColId '+' { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", (Node *)makeString($1), NULL, @1); } -/* - * '?' quantifier. We cannot write this directly "ColId '?'" because '?' is - * not a "self" token. So we let '?' matches Op first then check if it's '?' - * or not. - */ - | ColId Op + row_pattern_primary row_pattern_quantifier_opt + { + RPRPatternNode *n = (RPRPatternNode *) $1; + RPRPatternNode *q = (RPRPatternNode *) $2; + + n->min = q->min; + n->max = q->max; + n->reluctant = q->reluctant; + $$ = (Node *) n; + } + ; + +row_pattern_primary: + ColId + { + RPRPatternNode *n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_VAR; + n->varName = $1; + n->min = 1; + n->max = 1; + n->reluctant = false; + n->children = NIL; + n->location = @1; + $$ = (Node *) n; + } + | '(' row_pattern ')' + { + RPRPatternNode *inner = (RPRPatternNode *) $2; + RPRPatternNode *n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_GROUP; + n->children = list_make1(inner); + n->min = 1; + n->max = 1; + n->reluctant = false; + n->location = @1; + $$ = (Node *) n; + } + ; + +row_pattern_quantifier_opt: + /* EMPTY */ + { + RPRPatternNode *n = makeNode(RPRPatternNode); + n->min = 1; + n->max = 1; + n->reluctant = false; + $$ = (Node *) n; + } + | '*' { - if (strcmp("?", $2)) + RPRPatternNode *n = makeNode(RPRPatternNode); + n->min = 0; + n->max = INT_MAX; + n->reluctant = false; + $$ = (Node *) n; + } + | '+' + { + RPRPatternNode *n = makeNode(RPRPatternNode); + n->min = 1; + n->max = INT_MAX; + n->reluctant = false; + $$ = (Node *) n; + } + | Op + { + RPRPatternNode *n = makeNode(RPRPatternNode); + /* Handle single Op: ? or reluctant quantifiers *?, +?, ?? */ + if (strcmp($1, "?") == 0) + { + n->min = 0; + n->max = 1; + n->reluctant = false; + } + else if (strcmp($1, "*?") == 0) + { + n->min = 0; + n->max = INT_MAX; + n->reluctant = true; + } + else if (strcmp($1, "+?") == 0) + { + n->min = 1; + n->max = INT_MAX; + n->reluctant = true; + } + else if (strcmp($1, "??") == 0) + { + n->min = 0; + n->max = 1; + n->reluctant = true; + } + else + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier \"%s\"", $1), + parser_errposition(@1)); + $$ = (Node *) n; + } + /* RELUCTANT quantifiers (when lexer separates tokens) */ + | '*' Op + { + RPRPatternNode *n = makeNode(RPRPatternNode); + if (strcmp($2, "?") != 0) ereport(ERROR, errcode(ERRCODE_SYNTAX_ERROR), errmsg("unsupported quantifier"), parser_errposition(@2)); - - $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "?", (Node *)makeString($1), NULL, @1); + n->min = 0; + n->max = INT_MAX; + n->reluctant = true; + $$ = (Node *) n; + } + | '+' Op + { + RPRPatternNode *n = makeNode(RPRPatternNode); + if (strcmp($2, "?") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier"), + parser_errposition(@2)); + n->min = 1; + n->max = INT_MAX; + n->reluctant = true; + $$ = (Node *) n; + } + | Op Op + { + RPRPatternNode *n = makeNode(RPRPatternNode); + if (strcmp($1, "?") != 0 || strcmp($2, "?") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier"), + parser_errposition(@1)); + n->min = 0; + n->max = 1; + n->reluctant = true; + $$ = (Node *) n; + } + /* {n}, {n,}, {,m}, {n,m} quantifiers */ + | '{' Iconst '}' + { + RPRPatternNode *n = makeNode(RPRPatternNode); + if ($2 < 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must not be negative"), + parser_errposition(@2)); + n->min = $2; + n->max = $2; + n->reluctant = false; + $$ = (Node *) n; + } + | '{' Iconst ',' '}' + { + RPRPatternNode *n = makeNode(RPRPatternNode); + if ($2 < 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must not be negative"), + parser_errposition(@2)); + n->min = $2; + n->max = INT_MAX; + n->reluctant = false; + $$ = (Node *) n; + } + | '{' ',' Iconst '}' + { + RPRPatternNode *n = makeNode(RPRPatternNode); + if ($3 < 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must not be negative"), + parser_errposition(@3)); + n->min = 0; + n->max = $3; + n->reluctant = false; + $$ = (Node *) n; + } + | '{' Iconst ',' Iconst '}' + { + RPRPatternNode *n = makeNode(RPRPatternNode); + if ($2 < 0 || $4 < 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must not be negative"), + parser_errposition(@2)); + if ($2 > $4) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier minimum bound must not exceed maximum"), + parser_errposition(@2)); + n->min = $2; + n->max = $4; + n->reluctant = false; + $$ = (Node *) n; + } + /* Reluctant versions: {n}?, {n,}?, {,m}?, {n,m}? */ + | '{' Iconst '}' Op + { + RPRPatternNode *n = makeNode(RPRPatternNode); + if (strcmp($4, "?") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier"), + parser_errposition(@4)); + if ($2 < 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must not be negative"), + parser_errposition(@2)); + n->min = $2; + n->max = $2; + n->reluctant = true; + $$ = (Node *) n; + } + | '{' Iconst ',' '}' Op + { + RPRPatternNode *n = makeNode(RPRPatternNode); + if (strcmp($5, "?") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier"), + parser_errposition(@5)); + if ($2 < 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must not be negative"), + parser_errposition(@2)); + n->min = $2; + n->max = INT_MAX; + n->reluctant = true; + $$ = (Node *) n; + } + | '{' ',' Iconst '}' Op + { + RPRPatternNode *n = makeNode(RPRPatternNode); + if (strcmp($5, "?") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier"), + parser_errposition(@5)); + if ($3 < 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must not be negative"), + parser_errposition(@3)); + n->min = 0; + n->max = $3; + n->reluctant = true; + $$ = (Node *) n; + } + | '{' Iconst ',' Iconst '}' Op + { + RPRPatternNode *n = makeNode(RPRPatternNode); + if (strcmp($6, "?") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier"), + parser_errposition(@6)); + if ($2 < 0 || $4 < 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must not be negative"), + parser_errposition(@2)); + if ($2 > $4) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier minimum bound must not exceed maximum"), + parser_errposition(@2)); + n->min = $2; + n->max = $4; + n->reluctant = true; + $$ = (Node *) n; } ; @@ -16953,12 +17262,15 @@ MathOp: '+' { $$ = "+"; } | LESS_EQUALS { $$ = "<="; } | GREATER_EQUALS { $$ = ">="; } | NOT_EQUALS { $$ = "<>"; } + | '|' { $$ = "|"; } ; qual_Op: Op { $$ = list_make1(makeString($1)); } | OPERATOR '(' any_operator ')' { $$ = $3; } + | '|' + { $$ = list_make1(makeString("|")); } ; qual_all_Op: diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 40c4e93837c..215f672835c 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -37,6 +37,7 @@ #include "parser/parse_func.h" #include "parser/parse_oper.h" #include "parser/parse_relation.h" +#include "parser/parse_rpr.h" #include "parser/parse_target.h" #include "parser/parse_type.h" #include "parser/parser.h" @@ -84,8 +85,6 @@ static void checkExprIsVarFree(ParseState *pstate, Node *n, const char *constructName); static TargetEntry *findTargetlistEntrySQL92(ParseState *pstate, Node *node, List **tlist, ParseExprKind exprKind); -static TargetEntry *findTargetlistEntrySQL99(ParseState *pstate, Node *node, - List **tlist, ParseExprKind exprKind); static int get_matching_location(int sortgroupref, List *sortgrouprefs, List *exprs); static List *resolve_unique_index_expr(ParseState *pstate, InferClause *infer, @@ -96,12 +95,6 @@ static WindowClause *findWindowClause(List *wclist, const char *name); static Node *transformFrameOffset(ParseState *pstate, int frameOptions, Oid rangeopfamily, Oid rangeopcintype, Oid *inRangeFunc, Node *clause); -static void transformRPR(ParseState *pstate, WindowClause *wc, WindowDef *windef, - List **targetlist); -static List *transformDefineClause(ParseState *pstate, WindowClause *wc, WindowDef *windef, - List **targetlist); -static void transformPatternClause(ParseState *pstate, WindowClause *wc, - WindowDef *windef); /* * transformFromClause - @@ -2173,7 +2166,7 @@ findTargetlistEntrySQL92(ParseState *pstate, Node *node, List **tlist, * tlist the target list (passed by reference so we can append to it) * exprKind identifies clause type being processed */ -static TargetEntry * +TargetEntry * findTargetlistEntrySQL99(ParseState *pstate, Node *node, List **tlist, ParseExprKind exprKind) { @@ -3903,254 +3896,3 @@ transformFrameOffset(ParseState *pstate, int frameOptions, return node; } - -/* - * transformRPR - * Process Row Pattern Recognition related clauses - */ -static void -transformRPR(ParseState *pstate, WindowClause *wc, WindowDef *windef, - List **targetlist) -{ - /* - * Window definition exists? - */ - if (windef == NULL) - return; - - /* - * Row Pattern Common Syntax clause exists? - */ - if (windef->rpCommonSyntax == NULL) - return; - - /* Check Frame option. Frame must start at current row */ - if ((wc->frameOptions & FRAMEOPTION_START_CURRENT_ROW) == 0) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("FRAME must start at current row when row pattern recognition is used"))); - - /* Transform AFTER MACH SKIP TO clause */ - wc->rpSkipTo = windef->rpCommonSyntax->rpSkipTo; - - /* Transform SEEK or INITIAL clause */ - wc->initial = windef->rpCommonSyntax->initial; - - /* Transform DEFINE clause into list of TargetEntry's */ - wc->defineClause = transformDefineClause(pstate, wc, windef, targetlist); - - /* Check PATTERN clause and copy to patternClause */ - transformPatternClause(pstate, wc, windef); -} - -/* - * transformDefineClause - * Process DEFINE clause and transform ResTarget into list of - * TargetEntry. - * - * XXX we only support column reference in row pattern definition search - * condition, e.g. "price". . is not supported, e.g. "A.price". - */ -static List * -transformDefineClause(ParseState *pstate, WindowClause *wc, WindowDef *windef, - List **targetlist) -{ - /* DEFINE variable name initials */ - static const char *defineVariableInitials = "abcdefghijklmnopqrstuvwxyz"; - - ListCell *lc, - *l; - ResTarget *restarget, - *r; - List *restargets; - List *defineClause; - char *name; - int initialLen; - int numinitials; - - /* - * If Row Definition Common Syntax exists, DEFINE clause must exist. (the - * raw parser should have already checked it.) - */ - Assert(windef->rpCommonSyntax->rpDefs != NULL); - - /* - * Check and add "A AS A IS TRUE" if pattern variable is missing in DEFINE - * per the SQL standard. - */ - restargets = NIL; - foreach(lc, windef->rpCommonSyntax->rpPatterns) - { - A_Expr *a; - bool found = false; - - if (!IsA(lfirst(lc), A_Expr)) - ereport(ERROR, - errmsg("node type is not A_Expr")); - - a = (A_Expr *) lfirst(lc); - name = strVal(a->lexpr); - - foreach(l, windef->rpCommonSyntax->rpDefs) - { - restarget = (ResTarget *) lfirst(l); - - if (!strcmp(restarget->name, name)) - { - found = true; - break; - } - } - - if (!found) - { - /* - * "name" is missing. So create "name AS name IS TRUE" ResTarget - * node and add it to the temporary list. - */ - A_Const *n; - - restarget = makeNode(ResTarget); - n = makeNode(A_Const); - n->val.boolval.type = T_Boolean; - n->val.boolval.boolval = true; - n->location = -1; - restarget->name = pstrdup(name); - restarget->indirection = NIL; - restarget->val = (Node *) n; - restarget->location = -1; - restargets = lappend((List *) restargets, restarget); - } - } - - if (list_length(restargets) >= 1) - { - /* add missing DEFINEs */ - windef->rpCommonSyntax->rpDefs = - list_concat(windef->rpCommonSyntax->rpDefs, restargets); - list_free(restargets); - } - - /* - * Check for duplicate row pattern definition variables. The standard - * requires that no two row pattern definition variable names shall be - * equivalent. - */ - restargets = NIL; - numinitials = 0; - initialLen = strlen(defineVariableInitials); - foreach(lc, windef->rpCommonSyntax->rpDefs) - { - char initial[2]; - - restarget = (ResTarget *) lfirst(lc); - name = restarget->name; - - /* - * Add DEFINE expression (Restarget->val) to the targetlist as a - * TargetEntry if it does not exist yet. Planner will add the column - * ref var node to the outer plan's target list later on. This makes - * DEFINE expression could access the outer tuple while evaluating - * PATTERN. - * - * XXX: adding whole expressions of DEFINE to the plan.targetlist is - * not so good, because it's not necessary to evalute the expression - * in the target list while running the plan. We should extract the - * var nodes only then add them to the plan.targetlist. - */ - findTargetlistEntrySQL99(pstate, (Node *) restarget->val, - targetlist, EXPR_KIND_RPR_DEFINE); - - /* - * Make sure that the row pattern definition search condition is a - * boolean expression. - */ - transformWhereClause(pstate, restarget->val, - EXPR_KIND_RPR_DEFINE, "DEFINE"); - - foreach(l, restargets) - { - char *n; - - r = (ResTarget *) lfirst(l); - n = r->name; - - if (!strcmp(n, name)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("row pattern definition variable name \"%s\" appears more than once in DEFINE clause", - name), - parser_errposition(pstate, exprLocation((Node *) r)))); - } - - /* - * Create list of row pattern DEFINE variable name's initial. We - * assign [a-z] to them (up to 26 variable names are allowed). - */ - if (numinitials >= initialLen) - { - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("number of row pattern definition variable names exceeds %d", - initialLen), - parser_errposition(pstate, - exprLocation((Node *) restarget)))); - } - initial[0] = defineVariableInitials[numinitials++]; - initial[1] = '\0'; - wc->defineInitial = lappend(wc->defineInitial, - makeString(pstrdup(initial))); - - restargets = lappend(restargets, restarget); - } - list_free(restargets); - - /* turns a list of ResTarget's into a list of TargetEntry's */ - defineClause = transformTargetList(pstate, windef->rpCommonSyntax->rpDefs, - EXPR_KIND_RPR_DEFINE); - - /* mark column origins */ - markTargetListOrigins(pstate, defineClause); - - /* mark all nodes in the DEFINE clause tree with collation information */ - assign_expr_collations(pstate, (Node *) defineClause); - - return defineClause; -} - -/* - * transformPatternClause - * Process PATTERN clause and return PATTERN clause in the raw parse tree - * - * windef->rpCommonSyntax must exist. - */ -static void -transformPatternClause(ParseState *pstate, WindowClause *wc, - WindowDef *windef) -{ - ListCell *lc; - - Assert(windef->rpCommonSyntax != NULL); - - wc->patternVariable = NIL; - wc->patternRegexp = NIL; - foreach(lc, windef->rpCommonSyntax->rpPatterns) - { - A_Expr *a; - char *name; - char *regexp; - - if (!IsA(lfirst(lc), A_Expr)) - ereport(ERROR, - errmsg("node type is not A_Expr")); - - a = (A_Expr *) lfirst(lc); - name = strVal(a->lexpr); - - wc->patternVariable = lappend(wc->patternVariable, makeString(pstrdup(name))); - regexp = strVal(lfirst(list_head(a->name))); - - wc->patternRegexp = lappend(wc->patternRegexp, makeString(pstrdup(regexp))); - } -} diff --git a/src/backend/parser/parse_rpr.c b/src/backend/parser/parse_rpr.c new file mode 100644 index 00000000000..3a779dc5ed4 --- /dev/null +++ b/src/backend/parser/parse_rpr.c @@ -0,0 +1,749 @@ +/*------------------------------------------------------------------------- + * + * parse_rpr.c + * handle Row Pattern Recognition in parser + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/parser/parse_rpr.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "parser/parse_clause.h" +#include "parser/parse_collate.h" +#include "parser/parse_expr.h" +#include "parser/parse_rpr.h" +#include "parser/parse_target.h" + +/* DEFINE variable name initials */ +static const char *defineVariableInitials = "abcdefghijklmnopqrstuvwxyz"; + +/* Forward declarations */ +static void collectRPRPatternVarNames(RPRPatternNode *node, List **varNames); +static List *transformDefineClause(ParseState *pstate, WindowClause *wc, + WindowDef *windef, List **targetlist); +static void transformPatternClause(ParseState *pstate, WindowClause *wc, + WindowDef *windef); +static bool rprPatternEqual(RPRPatternNode *a, RPRPatternNode *b); +static RPRPatternNode *removeDuplicateAlternatives(RPRPatternNode *pattern); +static RPRPatternNode *mergeConsecutiveVars(RPRPatternNode *pattern); +static RPRPatternNode *optimizeRPRPattern(RPRPatternNode *pattern); + +/* + * transformRPR + * Process Row Pattern Recognition related clauses + */ +void +transformRPR(ParseState *pstate, WindowClause *wc, WindowDef *windef, + List **targetlist) +{ + /* + * Window definition exists? + */ + if (windef == NULL) + return; + + /* + * Row Pattern Common Syntax clause exists? + */ + if (windef->rpCommonSyntax == NULL) + return; + + /* Check Frame option. Frame must start at current row */ + if ((wc->frameOptions & FRAMEOPTION_START_CURRENT_ROW) == 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("FRAME must start at current row when row pattern recognition is used"))); + + /* Transform AFTER MACH SKIP TO clause */ + wc->rpSkipTo = windef->rpCommonSyntax->rpSkipTo; + + /* Transform SEEK or INITIAL clause */ + wc->initial = windef->rpCommonSyntax->initial; + + /* Transform DEFINE clause into list of TargetEntry's */ + wc->defineClause = transformDefineClause(pstate, wc, windef, targetlist); + + /* Check PATTERN clause and copy to patternClause */ + transformPatternClause(pstate, wc, windef); +} + +/* + * collectRPRPatternVarNames + * Collect all variable names from RPRPatternNode tree. + */ +static void +collectRPRPatternVarNames(RPRPatternNode *node, List **varNames) +{ + ListCell *lc; + + if (node == NULL) + return; + + switch (node->nodeType) + { + case RPR_PATTERN_VAR: + /* Add variable name if not already in list */ + { + bool found = false; + + foreach(lc, *varNames) + { + if (strcmp(strVal(lfirst(lc)), node->varName) == 0) + { + found = true; + break; + } + } + if (!found) + *varNames = lappend(*varNames, makeString(pstrdup(node->varName))); + } + break; + + case RPR_PATTERN_SEQ: + case RPR_PATTERN_ALT: + case RPR_PATTERN_GROUP: + /* Recurse into children */ + foreach(lc, node->children) + { + collectRPRPatternVarNames((RPRPatternNode *) lfirst(lc), varNames); + } + break; + } +} + +/* + * transformDefineClause + * Process DEFINE clause and transform ResTarget into list of + * TargetEntry. + * + * XXX we only support column reference in row pattern definition search + * condition, e.g. "price". . is not supported, e.g. "A.price". + */ +static List * +transformDefineClause(ParseState *pstate, WindowClause *wc, WindowDef *windef, + List **targetlist) +{ + ListCell *lc, + *l; + ResTarget *restarget, + *r; + List *restargets; + List *defineClause; + char *name; + int initialLen; + int numinitials; + List *patternVarNames = NIL; + + /* + * If Row Definition Common Syntax exists, DEFINE clause must exist. (the + * raw parser should have already checked it.) + */ + Assert(windef->rpCommonSyntax->rpDefs != NULL); + + /* + * Collect all pattern variable names from the pattern tree. + */ + collectRPRPatternVarNames(windef->rpCommonSyntax->rpPattern, &patternVarNames); + + /* + * Check and add "A AS A IS TRUE" if pattern variable is missing in DEFINE + * per the SQL standard. + */ + restargets = NIL; + foreach(lc, patternVarNames) + { + bool found = false; + + name = strVal(lfirst(lc)); + + foreach(l, windef->rpCommonSyntax->rpDefs) + { + restarget = (ResTarget *) lfirst(l); + + if (!strcmp(restarget->name, name)) + { + found = true; + break; + } + } + + if (!found) + { + /* + * "name" is missing. So create "name AS name IS TRUE" ResTarget + * node and add it to the temporary list. + */ + A_Const *n; + + restarget = makeNode(ResTarget); + n = makeNode(A_Const); + n->val.boolval.type = T_Boolean; + n->val.boolval.boolval = true; + n->location = -1; + restarget->name = pstrdup(name); + restarget->indirection = NIL; + restarget->val = (Node *) n; + restarget->location = -1; + restargets = lappend((List *) restargets, restarget); + } + } + + if (list_length(restargets) >= 1) + { + /* add missing DEFINEs */ + windef->rpCommonSyntax->rpDefs = + list_concat(windef->rpCommonSyntax->rpDefs, restargets); + list_free(restargets); + } + + /* + * Check for duplicate row pattern definition variables. The standard + * requires that no two row pattern definition variable names shall be + * equivalent. + */ + restargets = NIL; + numinitials = 0; + initialLen = strlen(defineVariableInitials); + foreach(lc, windef->rpCommonSyntax->rpDefs) + { + char initial[2]; + + restarget = (ResTarget *) lfirst(lc); + name = restarget->name; + + /* + * Add DEFINE expression (Restarget->val) to the targetlist as a + * TargetEntry if it does not exist yet. Planner will add the column + * ref var node to the outer plan's target list later on. This makes + * DEFINE expression could access the outer tuple while evaluating + * PATTERN. + * + * XXX: adding whole expressions of DEFINE to the plan.targetlist is + * not so good, because it's not necessary to evalute the expression + * in the target list while running the plan. We should extract the + * var nodes only then add them to the plan.targetlist. + */ + findTargetlistEntrySQL99(pstate, (Node *) restarget->val, + targetlist, EXPR_KIND_RPR_DEFINE); + + /* + * Make sure that the row pattern definition search condition is a + * boolean expression. + */ + transformWhereClause(pstate, restarget->val, + EXPR_KIND_RPR_DEFINE, "DEFINE"); + + foreach(l, restargets) + { + char *n; + + r = (ResTarget *) lfirst(l); + n = r->name; + + if (!strcmp(n, name)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("row pattern definition variable name \"%s\" appears more than once in DEFINE clause", + name), + parser_errposition(pstate, exprLocation((Node *) r)))); + } + + /* + * Create list of row pattern DEFINE variable name's initial. We + * assign [a-z] to them (up to 26 variable names are allowed). + */ + if (numinitials >= initialLen) + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("number of row pattern definition variable names exceeds %d", + initialLen), + parser_errposition(pstate, + exprLocation((Node *) restarget)))); + } + initial[0] = defineVariableInitials[numinitials++]; + initial[1] = '\0'; + wc->defineInitial = lappend(wc->defineInitial, + makeString(pstrdup(initial))); + + restargets = lappend(restargets, restarget); + } + list_free(restargets); + + /* turns a list of ResTarget's into a list of TargetEntry's */ + defineClause = transformTargetList(pstate, windef->rpCommonSyntax->rpDefs, + EXPR_KIND_RPR_DEFINE); + + /* mark column origins */ + markTargetListOrigins(pstate, defineClause); + + /* mark all nodes in the DEFINE clause tree with collation information */ + assign_expr_collations(pstate, (Node *) defineClause); + + return defineClause; +} + +/* + * transformPatternClause + * Process PATTERN clause and return PATTERN clause in the raw parse tree + * + * windef->rpCommonSyntax must exist. + */ +static void +transformPatternClause(ParseState *pstate, WindowClause *wc, + WindowDef *windef) +{ + RPRPatternNode *pattern; + + Assert(windef->rpCommonSyntax != NULL); + + /* + * Optimize the pattern tree in multiple passes: + * 1. optimizeRPRPattern: flatten SEQ/ALT, unwrap GROUP{1,1} + * 2. removeDuplicateAlternatives: (A | B | A) -> (A | B) + * 3. mergeConsecutiveVars: A A A -> A{3,3} + */ + pattern = windef->rpCommonSyntax->rpPattern; + pattern = optimizeRPRPattern(pattern); + pattern = removeDuplicateAlternatives(pattern); + pattern = mergeConsecutiveVars(pattern); + + wc->rpPattern = pattern; +} + +/* + * rprPatternEqual + * Compare two RPRPatternNode trees for equality. + * + * Returns true if the trees are structurally identical. + */ +static bool +rprPatternEqual(RPRPatternNode *a, RPRPatternNode *b) +{ + ListCell *lca, + *lcb; + + if (a == NULL && b == NULL) + return true; + if (a == NULL || b == NULL) + return false; + + /* Must have same node type and quantifiers */ + if (a->nodeType != b->nodeType) + return false; + if (a->min != b->min || a->max != b->max) + return false; + if (a->reluctant != b->reluctant) + return false; + + switch (a->nodeType) + { + case RPR_PATTERN_VAR: + return strcmp(a->varName, b->varName) == 0; + + case RPR_PATTERN_SEQ: + case RPR_PATTERN_ALT: + case RPR_PATTERN_GROUP: + /* Must have same number of children */ + if (list_length(a->children) != list_length(b->children)) + return false; + + /* Compare each child */ + forboth(lca, a->children, lcb, b->children) + { + if (!rprPatternEqual((RPRPatternNode *) lfirst(lca), + (RPRPatternNode *) lfirst(lcb))) + return false; + } + return true; + } + + return false; /* keep compiler quiet */ +} + +/* + * removeDuplicateAlternatives + * Remove duplicate alternatives from ALT nodes. + * + * For example: (A | B | A) -> (A | B) + * This optimization is applied recursively to all nodes. + */ +static RPRPatternNode * +removeDuplicateAlternatives(RPRPatternNode *pattern) +{ + ListCell *lc; + + if (pattern == NULL) + return NULL; + + switch (pattern->nodeType) + { + case RPR_PATTERN_VAR: + /* Leaf node - nothing to do */ + return pattern; + + case RPR_PATTERN_SEQ: + case RPR_PATTERN_GROUP: + { + /* Recursively process children */ + List *newChildren = NIL; + + foreach(lc, pattern->children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + + newChildren = lappend(newChildren, + removeDuplicateAlternatives(child)); + } + pattern->children = newChildren; + return pattern; + } + + case RPR_PATTERN_ALT: + { + /* Recursively process children first */ + List *newChildren = NIL; + ListCell *lc2; + + foreach(lc, pattern->children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + RPRPatternNode *processed = removeDuplicateAlternatives(child); + bool isDuplicate = false; + + /* Check if this alternative already exists */ + foreach(lc2, newChildren) + { + if (rprPatternEqual((RPRPatternNode *) lfirst(lc2), processed)) + { + isDuplicate = true; + break; + } + } + + if (!isDuplicate) + newChildren = lappend(newChildren, processed); + } + pattern->children = newChildren; + + /* If only one alternative remains, unwrap */ + if (list_length(pattern->children) == 1) + return (RPRPatternNode *) linitial(pattern->children); + + return pattern; + } + } + + return pattern; /* keep compiler quiet */ +} + +/* + * mergeConsecutiveVars + * Merge consecutive identical VAR nodes in SEQ. + * + * For example: A A A -> A{3,3} + * Only merges vars with {1,1} quantifier. + */ +static RPRPatternNode * +mergeConsecutiveVars(RPRPatternNode *pattern) +{ + ListCell *lc; + + if (pattern == NULL) + return NULL; + + switch (pattern->nodeType) + { + case RPR_PATTERN_VAR: + /* Leaf node - nothing to do */ + return pattern; + + case RPR_PATTERN_ALT: + case RPR_PATTERN_GROUP: + { + /* Recursively process children */ + List *newChildren = NIL; + + foreach(lc, pattern->children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + + newChildren = lappend(newChildren, + mergeConsecutiveVars(child)); + } + pattern->children = newChildren; + return pattern; + } + + case RPR_PATTERN_SEQ: + { + /* Recursively process children first */ + List *tempChildren = NIL; + List *newChildren = NIL; + RPRPatternNode *prev = NULL; + int count = 0; + + foreach(lc, pattern->children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + + tempChildren = lappend(tempChildren, + mergeConsecutiveVars(child)); + } + + /* Now merge consecutive identical VAR{1,1} nodes */ + foreach(lc, tempChildren) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + + if (child->nodeType == RPR_PATTERN_VAR && + child->min == 1 && child->max == 1 && + !child->reluctant) + { + if (prev != NULL && + prev->nodeType == RPR_PATTERN_VAR && + strcmp(prev->varName, child->varName) == 0 && + prev->min == 1 && prev->max == 1 && + !prev->reluctant) + { + /* Same var as previous - increment count */ + count++; + } + else + { + /* Different var - flush previous if any */ + if (prev != NULL) + { + if (count > 1) + { + prev->min = count; + prev->max = count; + } + newChildren = lappend(newChildren, prev); + } + prev = child; + count = 1; + } + } + else + { + /* Non-VAR or VAR with quantifier - flush previous */ + if (prev != NULL) + { + if (count > 1) + { + prev->min = count; + prev->max = count; + } + newChildren = lappend(newChildren, prev); + } + newChildren = lappend(newChildren, child); + prev = NULL; + count = 0; + } + } + + /* Flush any remaining */ + if (prev != NULL) + { + if (count > 1) + { + prev->min = count; + prev->max = count; + } + newChildren = lappend(newChildren, prev); + } + + pattern->children = newChildren; + list_free(tempChildren); + + /* If only one child remains, unwrap */ + if (list_length(pattern->children) == 1) + return (RPRPatternNode *) linitial(pattern->children); + + return pattern; + } + } + + return pattern; /* keep compiler quiet */ +} + +/* + * optimizeRPRPattern + * Optimize RPRPatternNode tree. + * + * Optimizations applied (in order): + * 1. Flatten nested SEQ: (A (B C)) -> (A B C) + * 2. Flatten nested ALT: (A | (B | C)) -> (A | B | C) + * 3. Quantifier multiplication: (A{2}){3} -> A{6} + * 4. Unwrap GROUP{1,1}: ((A)) -> A, (A B){1,1} -> A B + * 5. Remove single-item SEQ/ALT wrappers + * 6. Remove duplicate alternatives: (A | B | A) -> (A | B) + * 7. Merge consecutive vars: A A A -> A{3,3} + */ +static RPRPatternNode * +optimizeRPRPattern(RPRPatternNode *pattern) +{ + ListCell *lc; + List *newChildren; + + if (pattern == NULL) + return NULL; + + switch (pattern->nodeType) + { + case RPR_PATTERN_VAR: + /* Leaf node - nothing to optimize */ + return pattern; + + case RPR_PATTERN_SEQ: + /* Recursively optimize children first */ + newChildren = NIL; + foreach(lc, pattern->children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + RPRPatternNode *optimizedChild = optimizeRPRPattern(child); + + /* + * Flatten: unwrap GROUP{1,1} and flatten nested SEQ + */ + if (optimizedChild->nodeType == RPR_PATTERN_GROUP && + optimizedChild->min == 1 && optimizedChild->max == 1 && + !optimizedChild->reluctant) + { + /* Unwrap GROUP{1,1}: flatten its content into parent SEQ */ + ListCell *lc2; + + foreach(lc2, optimizedChild->children) + { + newChildren = lappend(newChildren, lfirst(lc2)); + } + } + else if (optimizedChild->nodeType == RPR_PATTERN_SEQ) + { + /* Flatten nested SEQ into parent SEQ */ + ListCell *lc2; + + foreach(lc2, optimizedChild->children) + { + newChildren = lappend(newChildren, lfirst(lc2)); + } + } + else + { + newChildren = lappend(newChildren, optimizedChild); + } + } + pattern->children = newChildren; + + /* Remove single-item SEQ wrapper */ + if (list_length(pattern->children) == 1) + return (RPRPatternNode *) linitial(pattern->children); + + return pattern; + + case RPR_PATTERN_ALT: + /* Recursively optimize children first */ + newChildren = NIL; + foreach(lc, pattern->children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + RPRPatternNode *optimizedChild = optimizeRPRPattern(child); + + /* + * Flatten nested ALT into parent ALT + */ + if (optimizedChild->nodeType == RPR_PATTERN_ALT) + { + ListCell *lc2; + + foreach(lc2, optimizedChild->children) + { + newChildren = lappend(newChildren, lfirst(lc2)); + } + } + else + { + newChildren = lappend(newChildren, optimizedChild); + } + } + pattern->children = newChildren; + + /* Remove single-item ALT wrapper */ + if (list_length(pattern->children) == 1) + return (RPRPatternNode *) linitial(pattern->children); + + return pattern; + + case RPR_PATTERN_GROUP: + /* Recursively optimize children */ + newChildren = NIL; + foreach(lc, pattern->children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + RPRPatternNode *optimizedChild = optimizeRPRPattern(child); + + newChildren = lappend(newChildren, optimizedChild); + } + pattern->children = newChildren; + + /* + * Quantifier multiplication: (A{m}){n} -> A{m*n} + * Only applies when GROUP has single VAR child and neither is reluctant + */ + if (list_length(pattern->children) == 1 && !pattern->reluctant) + { + RPRPatternNode *child = (RPRPatternNode *) linitial(pattern->children); + + if (child->nodeType == RPR_PATTERN_VAR && !child->reluctant) + { + /* + * Multiply quantifiers: (A{2}){3} -> A{6} + * Handle INT_MAX carefully to avoid overflow + */ + int new_min = pattern->min * child->min; + int new_max; + + if (pattern->max == INT_MAX || child->max == INT_MAX) + new_max = INT_MAX; + else + new_max = pattern->max * child->max; + + child->min = new_min; + child->max = new_max; + return child; + } + } + + /* + * Unwrap GROUP{1,1}: ((A)) -> A, (A B){1,1} -> A B + * But preserve groups with quantifiers: (A)+ stays as is + */ + if (pattern->min == 1 && pattern->max == 1 && !pattern->reluctant) + { + /* Single child: return it directly */ + if (list_length(pattern->children) == 1) + return (RPRPatternNode *) linitial(pattern->children); + + /* + * Multiple children: convert to SEQ + * (A B){1,1} -> SEQ(A, B) + */ + pattern->nodeType = RPR_PATTERN_SEQ; + } + + return pattern; + } + + return pattern; /* keep compiler quiet */ +} diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index a67815339b7..ccfc12a194a 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -363,7 +363,7 @@ not_equals "!=" * If you change either set, adjust the character lists appearing in the * rule for "operator"! */ -self [,()\[\].;\:\+\-\*\/\%\^\<\>\=] +self [,()\[\].;\:\+\-\*\/\%\^\<\>\=\|] op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=] operator {op_chars}+ @@ -955,7 +955,7 @@ other . * that the "self" rule would have. */ if (nchars == 1 && - strchr(",()[].;:+-*/%^<>=", yytext[0])) + strchr(",()[].;:+-*/%^<>=|", yytext[0])) return yytext[0]; /* * Likewise, if what we have left is two chars, and diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index b643c0c4bf0..01b598e0bdb 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -438,10 +438,12 @@ static void get_rule_groupingset(GroupingSet *gset, List *targetlist, bool omit_parens, deparse_context *context); static void get_rule_orderby(List *orderList, List *targetList, bool force_colno, deparse_context *context); -static void get_rule_pattern(List *patternVariable, List *patternRegexp, - bool force_colno, deparse_context *context); -static void get_rule_define(List *defineClause, List *patternVariables, - bool force_colno, deparse_context *context); +static void append_pattern_quantifier(StringInfo buf, RPRPatternNode *node); +static void get_rule_pattern_node(RPRPatternNode *node, deparse_context *context); +static void get_rule_pattern(RPRPatternNode *rpPattern, bool force_colno, + deparse_context *context); +static void get_rule_define(List *defineClause, bool force_colno, + deparse_context *context); static void get_rule_windowclause(Query *query, deparse_context *context); static void get_rule_windowspec(WindowClause *wc, List *targetList, deparse_context *context); @@ -6749,37 +6751,101 @@ get_rule_orderby(List *orderList, List *targetList, } /* - * Display a PATTERN clause. + * Helper function to append quantifier string for pattern node */ static void -get_rule_pattern(List *patternVariable, List *patternRegexp, - bool force_colno, deparse_context *context) +append_pattern_quantifier(StringInfo buf, RPRPatternNode *node) +{ + bool has_quantifier = true; + + if (node->min == 1 && node->max == 1) + { + /* {1,1} = no quantifier */ + has_quantifier = false; + } + else if (node->min == 0 && node->max == INT_MAX) + appendStringInfoChar(buf, '*'); + else if (node->min == 1 && node->max == INT_MAX) + appendStringInfoChar(buf, '+'); + else if (node->min == 0 && node->max == 1) + appendStringInfoChar(buf, '?'); + else if (node->max == INT_MAX) + appendStringInfo(buf, "{%d,}", node->min); + else if (node->min == node->max) + appendStringInfo(buf, "{%d}", node->min); + else + appendStringInfo(buf, "{%d,%d}", node->min, node->max); + + if (node->reluctant && has_quantifier) + appendStringInfoChar(buf, '?'); +} + +/* + * Recursive helper to display RPRPatternNode tree + */ +static void +get_rule_pattern_node(RPRPatternNode *node, deparse_context *context) { StringInfo buf = context->buf; + ListCell *lc; const char *sep; - ListCell *lc_var, - *lc_reg = list_head(patternRegexp); - sep = ""; - appendStringInfoChar(buf, '('); - foreach(lc_var, patternVariable) - { - char *variable = strVal((String *) lfirst(lc_var)); - char *regexp = NULL; + if (node == NULL) + return; - if (lc_reg != NULL) - { - regexp = strVal((String *) lfirst(lc_reg)); + switch (node->nodeType) + { + case RPR_PATTERN_VAR: + appendStringInfoString(buf, node->varName); + append_pattern_quantifier(buf, node); + break; - lc_reg = lnext(patternRegexp, lc_reg); - } + case RPR_PATTERN_SEQ: + sep = ""; + foreach(lc, node->children) + { + appendStringInfoString(buf, sep); + get_rule_pattern_node((RPRPatternNode *) lfirst(lc), context); + sep = " "; + } + break; - appendStringInfo(buf, "%s%s", sep, variable); - if (regexp !=NULL) - appendStringInfoString(buf, regexp); + case RPR_PATTERN_ALT: + sep = ""; + foreach(lc, node->children) + { + appendStringInfoString(buf, sep); + get_rule_pattern_node((RPRPatternNode *) lfirst(lc), context); + sep = " | "; + } + break; - sep = " "; + case RPR_PATTERN_GROUP: + appendStringInfoChar(buf, '('); + sep = ""; + foreach(lc, node->children) + { + appendStringInfoString(buf, sep); + get_rule_pattern_node((RPRPatternNode *) lfirst(lc), context); + sep = " "; + } + appendStringInfoChar(buf, ')'); + append_pattern_quantifier(buf, node); + break; } +} + +/* + * Display a PATTERN clause. + */ +static void +get_rule_pattern(RPRPatternNode *rpPattern, bool force_colno, + deparse_context *context) +{ + StringInfo buf = context->buf; + + appendStringInfoChar(buf, '('); + get_rule_pattern_node(rpPattern, context); appendStringInfoChar(buf, ')'); } @@ -6787,8 +6853,7 @@ get_rule_pattern(List *patternVariable, List *patternRegexp, * Display a DEFINE clause. */ static void -get_rule_define(List *defineClause, List *patternVariables, - bool force_colno, deparse_context *context) +get_rule_define(List *defineClause, bool force_colno, deparse_context *context) { StringInfo buf = context->buf; const char *sep; @@ -6922,13 +6987,12 @@ get_rule_windowspec(WindowClause *wc, List *targetList, appendStringInfoString(buf, "\n INITIAL"); needspace = true; } - if (wc->patternVariable) + if (wc->rpPattern) { if (needspace) appendStringInfoChar(buf, ' '); appendStringInfoString(buf, "\n PATTERN "); - get_rule_pattern(wc->patternVariable, wc->patternRegexp, - false, context); + get_rule_pattern(wc->rpPattern, false, context); needspace = true; } @@ -6937,8 +7001,7 @@ get_rule_windowspec(WindowClause *wc, List *targetList, if (needspace) appendStringInfoChar(buf, ' '); appendStringInfoString(buf, "\n DEFINE\n"); - get_rule_define(wc->defineClause, wc->patternVariable, - false, context); + get_rule_define(wc->defineClause, false, context); appendStringInfoChar(buf, ' '); } diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 3a798de25b2..2771dfb5485 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -2720,10 +2720,7 @@ typedef struct WindowAggState /* these fields are used in Row pattern recognition: */ RPSkipTo rpSkipTo; /* Row Pattern Skip To type */ - List *patternVariableList; /* list of row pattern variables names - * (list of String) */ - List *patternRegexpList; /* list of row pattern regular expressions - * ('+' or ''. list of String) */ + RPRPatternNode *rpPattern; /* Row Pattern PATTERN clause AST */ List *defineVariableList; /* list of row pattern definition * variables (list of String) */ List *defineClauseList; /* expression for row pattern definition diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 21ba207bb88..29f4a39a428 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -589,8 +589,37 @@ typedef enum RPSkipTo } RPSkipTo; /* - * RowPatternCommonSyntax - raw representation of row pattern common syntax + * RPRPatternNodeType - Row Pattern Recognition pattern node types + */ +typedef enum RPRPatternNodeType +{ + RPR_PATTERN_VAR, /* variable reference */ + RPR_PATTERN_SEQ, /* sequence (concatenation) */ + RPR_PATTERN_ALT, /* alternation (|) */ + RPR_PATTERN_GROUP /* group (parentheses) */ +} RPRPatternNodeType; + +/* + * RPRPatternNode - Row Pattern Recognition pattern AST node * + * Represents a pattern in Row Pattern Recognition PATTERN clause. + * Used in both WINDOW clause and MATCH_RECOGNIZE. + * Forms a tree structure for complex patterns like "(A | B)+ C?". + */ +typedef struct RPRPatternNode +{ + NodeTag type; /* T_RPRPatternNode */ + RPRPatternNodeType nodeType; /* VAR, SEQ, ALT, GROUP */ + char *varName; /* VAR: variable name */ + int min; /* minimum repetitions (0 for *, ?) */ + int max; /* maximum repetitions (INT_MAX for *, +) */ + bool reluctant; /* true for *?, +?, ?? */ + List *children; /* SEQ, ALT, GROUP: child nodes */ + ParseLoc location; /* token location, or -1 */ +} RPRPatternNode; + +/* + * RowPatternCommonSyntax - raw representation of row pattern common syntax */ typedef struct RPCommonSyntax { @@ -598,7 +627,7 @@ typedef struct RPCommonSyntax RPSkipTo rpSkipTo; /* Row Pattern AFTER MATCH SKIP type */ bool initial; /* true if is * initial */ - List *rpPatterns; /* PATTERN variables (list of A_Expr) */ + RPRPatternNode *rpPattern; /* PATTERN clause AST */ List *rpDefs; /* row pattern definitions clause (list of * ResTarget) */ } RPCommonSyntax; @@ -1615,8 +1644,8 @@ typedef struct GroupingSet * * "defineClause" is Row Pattern Recognition DEFINE clause (list of * TargetEntry). TargetEntry.resname represents row pattern definition - * variable name. "patternVariable" and "patternRegexp" represents PATTERN - * clause. + * variable name. "rpPattern" represents PATTERN clause as an AST tree + * (RPRPatternNode). * * The information relevant for the query jumbling is the partition clause * type and its bounds. @@ -1655,14 +1684,8 @@ typedef struct WindowClause List *defineClause; /* Row Pattern DEFINE variable initial names (list of String) */ List *defineInitial; - /* Row Pattern PATTERN variable name (list of String) */ - List *patternVariable; - - /* - * Row Pattern PATTERN regular expression quantifier ('+' or ''. list of - * String) - */ - List *patternRegexp; + /* Row Pattern PATTERN clause AST */ + RPRPatternNode *rpPattern; } WindowClause; /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 1759a621f58..6d750784386 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -1297,14 +1297,8 @@ typedef struct WindowAgg /* Row Pattern Recognition AFTER MATCH SKIP clause */ RPSkipTo rpSkipTo; /* Row Pattern Skip To type */ - /* Row Pattern PATTERN variable name (list of String) */ - List *patternVariable; - - /* - * Row Pattern RPATTERN regular expression quantifier ('+' or ''. list of - * String) - */ - List *patternRegexp; + /* Row Pattern PATTERN clause AST */ + RPRPatternNode *rpPattern; /* Row Pattern DEFINE clause (list of TargetEntry) */ List *defineClause; diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h index ede3903d1dd..40dd8b6b9b0 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -52,6 +52,9 @@ extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle, extern Index assignSortGroupRef(TargetEntry *tle, List *tlist); extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList); +extern TargetEntry *findTargetlistEntrySQL99(ParseState *pstate, Node *node, + List **tlist, ParseExprKind exprKind); + /* functions in parse_jsontable.c */ extern ParseNamespaceItem *transformJsonTable(ParseState *pstate, JsonTable *jt); diff --git a/src/include/parser/parse_rpr.h b/src/include/parser/parse_rpr.h new file mode 100644 index 00000000000..40e0258d2e6 --- /dev/null +++ b/src/include/parser/parse_rpr.h @@ -0,0 +1,22 @@ +/*------------------------------------------------------------------------- + * + * parse_rpr.h + * handle Row Pattern Recognition in parser + * + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/parser/parse_rpr.h + * + *------------------------------------------------------------------------- + */ +#ifndef PARSE_RPR_H +#define PARSE_RPR_H + +#include "parser/parse_node.h" + +extern void transformRPR(ParseState *pstate, WindowClause *wc, + WindowDef *windef, List **targetlist); + +#endif /* PARSE_RPR_H */ diff --git a/src/test/regress/expected/rpr.out b/src/test/regress/expected/rpr.out index 312b62fb489..c9db77fa37b 100644 --- a/src/test/regress/expected/rpr.out +++ b/src/test/regress/expected/rpr.out @@ -203,6 +203,229 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER company2 | 07-10-2023 | 1300 | | | (20 rows) +-- test using alternation (|) with sequence +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START (UP | DOWN)) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 200 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | 150 | 140 + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | 150 | 90 + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 110 | 130 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | 120 | 130 + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 2000 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | 1500 | 1400 + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | 1500 | 60 + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 1100 | 1300 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | 1200 | 1300 + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- test using alternation (|) with group quantifier (not yet supported) +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START (UP | DOWN)+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | 150 | 90 + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 110 | 120 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1400 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | 1500 | 60 + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 1100 | 1200 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- test using nested alternation +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START ((UP DOWN) | FLAT)+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price), + FLAT AS price = PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 150 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | 140 | 90 + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 110 | 120 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1500 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | 1400 | 60 + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 1100 | 1200 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- test using group with quantifier +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((UP DOWN)+) + DEFINE + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | | + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- test using absolute threshold values (not relative PREV) +-- HIGH: price > 150, LOW: price < 100, MID: neutral range +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOW MID* HIGH) + DEFINE + LOW AS price < 100, + MID AS price >= 100 AND price <= 150, + HIGH AS price > 150 +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 2000 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | 60 | 1100 + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- test threshold-based pattern with alternation +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOW (MID | HIGH)+) + DEFINE + LOW AS price < 100, + MID AS price >= 100 AND price <= 150, + HIGH AS price > 150 +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | 90 | 130 + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1500 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | 60 | 1300 + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + -- basic test with none-greedy pattern SELECT company, tdate, price, count(*) OVER w FROM stock @@ -238,6 +461,111 @@ SELECT company, tdate, price, count(*) OVER w company2 | 07-10-2023 | 1300 | 0 (20 rows) +-- test using {n} quantifier (A A A should be optimized to A{3}) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{3}) + DEFINE + A AS price >= 140 AND price <= 150 +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 0 + company1 | 07-02-2023 | 200 | 0 + company1 | 07-03-2023 | 150 | 3 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 0 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 0 + company2 | 07-02-2023 | 2000 | 0 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 0 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- test using {n,} quantifier (2 or more) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE + A AS price > 100 +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 0 + company1 | 07-02-2023 | 200 | 4 + company1 | 07-03-2023 | 150 | 0 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 4 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 0 + company2 | 07-02-2023 | 2000 | 4 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 4 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- test using {n,m} quantifier (2 to 4) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,4}) + DEFINE + A AS price > 100 +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 0 + company1 | 07-02-2023 | 200 | 4 + company1 | 07-03-2023 | 150 | 0 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 4 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 0 + company2 | 07-02-2023 | 2000 | 4 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 4 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + -- last_value() should remain consistent SELECT company, tdate, price, last_value(price) OVER w FROM stock @@ -912,6 +1240,326 @@ SELECT pg_get_viewdef('v_window'); down AS (price < prev(price)) ); (1 row) +-- Test optimization: duplicate alternatives removal (A | B | A) -> (A | B) +CREATE TEMP VIEW v_opt_dup AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | B | A)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_dup'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a | b)+) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +-- Test optimization: duplicate group removal ((A | B)+ | (A | B)+) -> (A | B)+ +CREATE TEMP VIEW v_opt_dup_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | B)+ | (A | B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_dup_group'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a | b)+) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +-- Test optimization: consecutive vars merge (A A A) -> A{3} +CREATE TEMP VIEW v_opt_merge AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A A) + DEFINE + A AS price >= 140 AND price <= 150 +); +SELECT pg_get_viewdef('v_opt_merge'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{3}) + + DEFINE + + a AS ((price >= 140) AND (price <= 150)) ); +(1 row) + +-- Test {n} quantifier display +CREATE TEMP VIEW v_quantifier_n AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_quantifier_n'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{3}) + + DEFINE + + a AS (price > 100) ); +(1 row) + +-- Test {n,} quantifier display +CREATE TEMP VIEW v_quantifier_n_plus AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_quantifier_n_plus'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{2,}) + + DEFINE + + a AS (price > 100) ); +(1 row) + +-- Test optimization: flatten nested SEQ (A (B C)) -> (A B C) +CREATE TEMP VIEW v_opt_flatten_seq AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A (B C)) + DEFINE + A AS price > 100, + B AS price > 150, + C AS price < 150 +); +SELECT pg_get_viewdef('v_opt_flatten_seq'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a b c) + + DEFINE + + a AS (price > 100), + + b AS (price > 150), + + c AS (price < 150) ); +(1 row) + +-- Test optimization: flatten nested ALT (A | (B | C)) -> (A | B | C) +CREATE TEMP VIEW v_opt_flatten_alt AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | (B | C))+) + DEFINE + A AS price > 200, + B AS price > 100, + C AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_flatten_alt'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a | b | c)+) + + DEFINE + + a AS (price > 200), + + b AS (price > 100), + + c AS (price <= 100) ); +(1 row) + +-- Test optimization: unwrap GROUP{1,1} ((A)) -> A +CREATE TEMP VIEW v_opt_unwrap_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (((A))) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_unwrap_group'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a) + + DEFINE + + a AS (price > 100) ); +(1 row) + +-- Test optimization: quantifier multiplication (A{2}){3} -> A{6} +CREATE TEMP VIEW v_opt_quant_mult AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A{2}){3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{6}) + + DEFINE + + a AS (price > 100) ); +(1 row) + +-- Test optimization: quantifier multiplication with range (A{2,4}){3} -> A{6,12} +CREATE TEMP VIEW v_opt_quant_mult_range AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A{2,4}){3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult_range'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{6,12}) + + DEFINE + + a AS (price > 100) ); +(1 row) + +-- Test optimization: quantifier multiplication with outer range (A{2}){3,5} -> A{6,10} +CREATE TEMP VIEW v_opt_quant_mult_range2 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A{2}){3,5}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult_range2'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{6,10}) + + DEFINE + + a AS (price > 100) ); +(1 row) + -- -- Error cases -- @@ -1030,7 +1678,7 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER UP AS price > PREV(1), DOWN AS price < PREV(1) ); -ERROR: unsupported quantifier +ERROR: unsupported quantifier "~" LINE 9: PATTERN (START UP~ DOWN+) ^ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w @@ -1047,6 +1695,4 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER UP AS price > PREV(1), DOWN AS price < PREV(1) ); -ERROR: unsupported quantifier -LINE 9: PATTERN (START UP+? DOWN+) - ^ +ERROR: row pattern navigation operation's argument must include at least one column reference diff --git a/src/test/regress/sql/rpr.sql b/src/test/regress/sql/rpr.sql index ffcf5476198..5634b22c1ec 100644 --- a/src/test/regress/sql/rpr.sql +++ b/src/test/regress/sql/rpr.sql @@ -90,6 +90,91 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER DOWN AS price < PREV(price) ); +-- test using alternation (|) with sequence +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START (UP | DOWN)) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- test using alternation (|) with group quantifier (not yet supported) +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START (UP | DOWN)+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- test using nested alternation +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START ((UP DOWN) | FLAT)+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price), + FLAT AS price = PREV(price) +); + +-- test using group with quantifier +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((UP DOWN)+) + DEFINE + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- test using absolute threshold values (not relative PREV) +-- HIGH: price > 150, LOW: price < 100, MID: neutral range +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOW MID* HIGH) + DEFINE + LOW AS price < 100, + MID AS price >= 100 AND price <= 150, + HIGH AS price > 150 +); + +-- test threshold-based pattern with alternation +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOW (MID | HIGH)+) + DEFINE + LOW AS price < 100, + MID AS price >= 100 AND price <= 150, + HIGH AS price > 150 +); + -- basic test with none-greedy pattern SELECT company, tdate, price, count(*) OVER w FROM stock @@ -102,6 +187,42 @@ SELECT company, tdate, price, count(*) OVER w A AS price >= 140 AND price <= 150 ); +-- test using {n} quantifier (A A A should be optimized to A{3}) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{3}) + DEFINE + A AS price >= 140 AND price <= 150 +); + +-- test using {n,} quantifier (2 or more) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE + A AS price > 100 +); + +-- test using {n,m} quantifier (2 to 4) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,4}) + DEFINE + A AS price > 100 +); + -- last_value() should remain consistent SELECT company, tdate, price, last_value(price) OVER w FROM stock @@ -405,6 +526,166 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER SELECT * FROM v_window; SELECT pg_get_viewdef('v_window'); +-- Test optimization: duplicate alternatives removal (A | B | A) -> (A | B) +CREATE TEMP VIEW v_opt_dup AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | B | A)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_dup'); + +-- Test optimization: duplicate group removal ((A | B)+ | (A | B)+) -> (A | B)+ +CREATE TEMP VIEW v_opt_dup_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | B)+ | (A | B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_dup_group'); + +-- Test optimization: consecutive vars merge (A A A) -> A{3} +CREATE TEMP VIEW v_opt_merge AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A A) + DEFINE + A AS price >= 140 AND price <= 150 +); +SELECT pg_get_viewdef('v_opt_merge'); + +-- Test {n} quantifier display +CREATE TEMP VIEW v_quantifier_n AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_quantifier_n'); + +-- Test {n,} quantifier display +CREATE TEMP VIEW v_quantifier_n_plus AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_quantifier_n_plus'); + +-- Test optimization: flatten nested SEQ (A (B C)) -> (A B C) +CREATE TEMP VIEW v_opt_flatten_seq AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A (B C)) + DEFINE + A AS price > 100, + B AS price > 150, + C AS price < 150 +); +SELECT pg_get_viewdef('v_opt_flatten_seq'); + +-- Test optimization: flatten nested ALT (A | (B | C)) -> (A | B | C) +CREATE TEMP VIEW v_opt_flatten_alt AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | (B | C))+) + DEFINE + A AS price > 200, + B AS price > 100, + C AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_flatten_alt'); + +-- Test optimization: unwrap GROUP{1,1} ((A)) -> A +CREATE TEMP VIEW v_opt_unwrap_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (((A))) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_unwrap_group'); + +-- Test optimization: quantifier multiplication (A{2}){3} -> A{6} +CREATE TEMP VIEW v_opt_quant_mult AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A{2}){3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult'); + +-- Test optimization: quantifier multiplication with range (A{2,4}){3} -> A{6,12} +CREATE TEMP VIEW v_opt_quant_mult_range AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A{2,4}){3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult_range'); + +-- Test optimization: quantifier multiplication with outer range (A{2}){3,5} -> A{6,10} +CREATE TEMP VIEW v_opt_quant_mult_range2 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A{2}){3,5}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult_range2'); + -- -- Error cases -- -- 2.50.1 (Apple Git-155)