diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 7c7fedf..004e896 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -26,11 +26,16 @@ #include "foreign/foreign.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" +#include "optimizer/paths.h" #include "optimizer/planmain.h" #include "optimizer/restrictinfo.h" #include "optimizer/var.h" +#include "parser/parse_oper.h" +#include "parser/parse_relation.h" +#include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -66,6 +71,10 @@ static const struct FileFdwOption valid_options[] = { {"quote", ForeignTableRelationId}, {"escape", ForeignTableRelationId}, {"null", ForeignTableRelationId}, + {"sorted", ForeignTableRelationId}, + {"key", ForeignTableRelationId}, + {"direction", ForeignTableRelationId}, + {"nulls", ForeignTableRelationId}, {"encoding", ForeignTableRelationId}, {"force_not_null", AttributeRelationId}, @@ -83,6 +92,10 @@ static const struct FileFdwOption valid_options[] = { typedef struct FileFdwPlanState { char *filename; /* file to read */ + bool sorted; /* sorted? */ + char *key; /* key column name */ + bool reverse; /* specified desc? */ + bool nulls_first; /* specified nulls first? */ List *options; /* merged COPY options, excluding filename */ BlockNumber pages; /* estimate of file's physical size */ double ntuples; /* estimate of number of rows in file */ @@ -136,11 +149,20 @@ static bool fileAnalyzeForeignTable(Relation relation, */ static bool is_valid_option(const char *option, Oid context); static void fileGetOptions(Oid foreigntableid, - char **filename, List **other_options); + char **filename, + bool *sorted, + char **key, + bool *reverse, + bool *nulls_first, + List **other_options); static List *get_file_fdw_attribute_options(Oid relid); static bool check_selective_binary_conversion(RelOptInfo *baserel, Oid foreigntableid, List **columns); +static List *build_usable_pathkeys(PlannerInfo *root, + RelOptInfo *baserel, + Oid foreigntableid, + FileFdwPlanState *fdw_private); static void estimate_size(PlannerInfo *root, RelOptInfo *baserel, FileFdwPlanState *fdw_private); static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel, @@ -185,6 +207,10 @@ file_fdw_validator(PG_FUNCTION_ARGS) List *options_list = untransformRelOptions(PG_GETARG_DATUM(0)); Oid catalog = PG_GETARG_OID(1); char *filename = NULL; + DefElem *sorted = NULL; + char *key = NULL; + char *dir = NULL; + char *nulls = NULL; DefElem *force_not_null = NULL; List *other_options = NIL; ListCell *cell; @@ -254,6 +280,48 @@ file_fdw_validator(PG_FUNCTION_ARGS) errmsg("conflicting or redundant options"))); filename = defGetString(def); } + else if (strcmp(def->defname, "sorted") == 0) + { + if (sorted) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + sorted = def; + /* Don't care what the value is, as long as it's a legal boolean */ + (void) defGetBoolean(def); + } + else if (strcmp(def->defname, "key") == 0) + { + if (key) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + key = defGetString(def); + } + else if (strcmp(def->defname, "direction") == 0) + { + if (dir) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + dir = defGetString(def); + if (strcmp(dir, "asc") != 0 && strcmp(dir, "desc") != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument to option \"%s\" must be \"asc\" or \"desc\"", def->defname))); + } + else if (strcmp(def->defname, "nulls") == 0) + { + if (nulls) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + nulls = defGetString(def); + if (strcmp(nulls, "first") != 0 && strcmp(nulls, "last") != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument to option \"%s\" must be \"first\" or \"last\"", def->defname))); + } else if (strcmp(def->defname, "force_not_null") == 0) { if (force_not_null) @@ -281,6 +349,14 @@ file_fdw_validator(PG_FUNCTION_ARGS) (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED), errmsg("filename is required for file_fdw foreign tables"))); + /* + * Key option must be set when sorted option is set to true. + */ + if (sorted != NULL && defGetBoolean(sorted) && key == NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED), + errmsg("key must be set to specify a column sorting this foreign table"))); + PG_RETURN_VOID(); } @@ -309,14 +385,22 @@ is_valid_option(const char *option, Oid context) */ static void fileGetOptions(Oid foreigntableid, - char **filename, List **other_options) + char **filename, + bool *sorted, + char **key, + bool *reverse, + bool *nulls_first, + List **other_options) { ForeignTable *table; ForeignServer *server; ForeignDataWrapper *wrapper; List *options; + char *dir = NULL; + char *nulls = NULL; ListCell *lc, - *prev; + *prev, + *next; /* * Extract options from FDW objects. We ignore user mappings because @@ -337,30 +421,70 @@ fileGetOptions(Oid foreigntableid, options = list_concat(options, get_file_fdw_attribute_options(foreigntableid)); /* - * Separate out the filename. + * Separate out the filename and sortedness options. */ - *filename = NULL; + *filename = *key = NULL; + *sorted = *reverse = false; prev = NULL; - foreach(lc, options) + for (lc = list_head(options); lc; lc = next) { DefElem *def = (DefElem *) lfirst(lc); + next = lnext(lc); if (strcmp(def->defname, "filename") == 0) { *filename = defGetString(def); options = list_delete_cell(options, lc, prev); - break; } - prev = lc; + else if (strcmp(def->defname, "sorted") == 0) + { + *sorted = defGetBoolean(def); + options = list_delete_cell(options, lc, prev); + } + else if (strcmp(def->defname, "key") == 0) + { + *key = defGetString(def); + options = list_delete_cell(options, lc, prev); + } + else if (strcmp(def->defname, "direction") == 0) + { + dir = defGetString(def); + if (strcmp(dir, "desc") == 0) + *reverse = true; + options = list_delete_cell(options, lc, prev); + } + else if (strcmp(def->defname, "nulls") == 0) + { + nulls = defGetString(def); + options = list_delete_cell(options, lc, prev); + } + else + prev = lc; + } + + /* + * Set the default for nulls_first and adjust it if neccesary. + */ + *nulls_first = *reverse; + if (nulls != NULL) + { + if (strcmp(nulls, "fisrt") == 0) + *nulls_first = true; + else + *nulls_first = false; } /* * The validator should have checked that a filename was included in the - * options, but check again, just in case. + * options and that a key was included if neccesary, but check again, + * just in case. */ if (*filename == NULL) elog(ERROR, "filename is required for file_fdw foreign tables"); + if (*sorted && *key == NULL) + elog(ERROR, "key must be set to specify a column sorting this foreign table"); + *other_options = options; } @@ -439,7 +563,12 @@ fileGetForeignRelSize(PlannerInfo *root, */ fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState)); fileGetOptions(foreigntableid, - &fdw_private->filename, &fdw_private->options); + &fdw_private->filename, + &fdw_private->sorted, + &fdw_private->key, + &fdw_private->reverse, + &fdw_private->nulls_first, + &fdw_private->options); baserel->fdw_private = (void *) fdw_private; /* Estimate relation size */ @@ -464,6 +593,7 @@ fileGetForeignPaths(PlannerInfo *root, Cost total_cost; List *columns; List *coptions = NIL; + List *useful_pathkeys = NIL; /* Decide whether to selectively perform binary conversion */ if (check_selective_binary_conversion(baserel, @@ -472,6 +602,21 @@ fileGetForeignPaths(PlannerInfo *root, coptions = list_make1(makeDefElem("convert_selectively", (Node *) columns)); + /* + * Build a single pathkey list describing the sort ordering in the file, + * if any, then see if it is actually useful for this query. + */ + if (fdw_private->sorted && has_useful_pathkeys(root, baserel)) + { + List *usable_pathkeys; + + usable_pathkeys = build_usable_pathkeys(root, baserel, + foreigntableid, + fdw_private); + useful_pathkeys = truncate_useless_pathkeys(root, baserel, + usable_pathkeys); + } + /* Estimate costs */ estimate_costs(root, baserel, fdw_private, &startup_cost, &total_cost); @@ -486,7 +631,7 @@ fileGetForeignPaths(PlannerInfo *root, baserel->rows, startup_cost, total_cost, - NIL, /* no pathkeys */ + useful_pathkeys, NULL, /* no outer rel either */ coptions)); @@ -536,11 +681,20 @@ static void fileExplainForeignScan(ForeignScanState *node, ExplainState *es) { char *filename; + bool sorted; + char *key; + char reverse; + char nulls_first; List *options; /* Fetch options --- we only need filename at this point */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &options); + &filename, + &sorted, + &key, + &reverse, + &nulls_first, + &options); ExplainPropertyText("Foreign File", filename, es); @@ -564,6 +718,10 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) { ForeignScan *plan = (ForeignScan *) node->ss.ps.plan; char *filename; + bool sorted; + char *key; + bool reverse; + bool nulls_first; List *options; CopyState cstate; FileFdwExecutionState *festate; @@ -576,7 +734,12 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) /* Fetch options of foreign table */ fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), - &filename, &options); + &filename, + &sorted, + &key, + &reverse, + &nulls_first, + &options); /* Add any options from the plan (currently only convert_selectively) */ options = list_concat(options, plan->fdw_private); @@ -687,11 +850,21 @@ fileAnalyzeForeignTable(Relation relation, BlockNumber *totalpages) { char *filename; + bool sorted; + char *key; + bool reverse; + bool nulls_first; List *options; struct stat stat_buf; /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(relation), &filename, &options); + fileGetOptions(RelationGetRelid(relation), + &filename, + &sorted, + &key, + &reverse, + &nulls_first, + &options); /* * Get size of the file. (XXX if we fail here, would it be better to just @@ -836,6 +1009,80 @@ check_selective_binary_conversion(RelOptInfo *baserel, } /* + * Build a single pathkey list describing the sort ordering in the data file. + */ +static List * +build_usable_pathkeys(PlannerInfo *root, RelOptInfo *baserel, + Oid foreigntableid, FileFdwPlanState *fdw_private) +{ + Relation rel; + AttrNumber attnum; + Form_pg_attribute atttup; + Index varno = baserel->relid; + Expr *expr; + Oid restype; + Oid sortop; + Oid opfamily; + Oid opcintype; + int16 strategy; + Oid collation; + PathKey *cpathkey = NULL; + + rel = heap_open(foreigntableid, AccessShareLock); + attnum = attnameAttNum(rel, fdw_private->key, false); + if (attnum == InvalidAttrNumber) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + fdw_private->key, RelationGetRelationName(rel)))); + atttup = rel->rd_att->attrs[attnum - 1]; + expr = (Expr *) makeVar(varno, + attnum, + atttup->atttypid, + atttup->atttypmod, + atttup->attcollation, + 0); + heap_close(rel, AccessShareLock); + + restype = exprType((Node *) expr); + + if (fdw_private->reverse == false) + get_sort_group_operators(restype, + true, false, false, + &sortop, NULL, NULL, + NULL); + else + get_sort_group_operators(restype, + false, false, true, + NULL, NULL, &sortop, + NULL); + + if (!get_ordering_op_properties(sortop, + &opfamily, &opcintype, &strategy)) + elog(ERROR, "operator %u is not a valid ordering operator", sortop); + + collation = exprCollation((Node *) expr); + + /* Try to make a canonical pathkey for this key */ + cpathkey = make_pathkey_from_sortinfo(root, + expr, + opfamily, + opcintype, + collation, + fdw_private->reverse, + fdw_private->nulls_first, + 0, + baserel->relids, + false, + true); + + if (cpathkey != NULL) + return list_make1(cpathkey); + else + return NIL; +} + +/* * Estimate size of a foreign table. * * The main result is returned in baserel->rows. We also set @@ -973,6 +1220,10 @@ file_acquire_sample_rows(Relation onerel, int elevel, bool *nulls; bool found; char *filename; + bool sorted; + char *key; + bool reverse; + bool nulls_first; List *options; CopyState cstate; ErrorContextCallback errcontext; @@ -987,7 +1238,13 @@ file_acquire_sample_rows(Relation onerel, int elevel, nulls = (bool *) palloc(tupDesc->natts * sizeof(bool)); /* Fetch options of foreign table */ - fileGetOptions(RelationGetRelid(onerel), &filename, &options); + fileGetOptions(RelationGetRelid(onerel), + &filename, + &sorted, + &key, + &reverse, + &nulls_first, + &options); /* * Create CopyState from FDW options. diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source index 6f906e1..899fe67 100644 --- a/contrib/file_fdw/output/file_fdw.source +++ b/contrib/file_fdw/output/file_fdw.source @@ -123,7 +123,7 @@ ERROR: invalid option "force_not_null" HINT: There are no valid options in this context. CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- ERROR ERROR: invalid option "force_not_null" -HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding +HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, sorted, key, direction, nulls, encoding -- basic query tests SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; a | b diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml index 889aa77..f9a832e 100644 --- a/doc/src/sgml/file-fdw.sgml +++ b/doc/src/sgml/file-fdw.sgml @@ -98,6 +98,57 @@ + sorted + + + + This is a Boolean option and + specifies whether the data in the file is sorted by a column. + + + + + + key + + + + Specifies the name of a column by which the data in the file is sorted. + Required when sorted is set to TRUE. + + + + + + direction + + + + Specifies the sort order + when the data in the file is sorted by a column: + asc or desc. + The default is asc. + + + + + + nulls + + + + Specifies that nulls sort before or after non-nulls + when the data in the file is sorted by a column: + first or last. + first is the default when direction is set to + desc. + When direction is not set to desc, + last is the default. + + + + + encoding diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 20a5644..64e4142 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -233,7 +233,7 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys) * canonicalize should always be TRUE after EquivalenceClass merging has * been performed, but FALSE if we haven't done EquivalenceClass merging yet. */ -static PathKey * +PathKey * make_pathkey_from_sortinfo(PlannerInfo *root, Expr *expr, Oid opfamily, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index b6fb8ee..d827333 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -149,6 +149,17 @@ typedef enum } PathKeysComparison; extern List *canonicalize_pathkeys(PlannerInfo *root, List *pathkeys); +extern PathKey *make_pathkey_from_sortinfo(PlannerInfo *root, + Expr *expr, + Oid opfamily, + Oid opcintype, + Oid collation, + bool reverse_sort, + bool nulls_first, + Index sortref, + Relids rel, + bool create_it, + bool canonicalize); extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2); extern bool pathkeys_contained_in(List *keys1, List *keys2); extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,