From 83e2b9c24db94a251990b10d345949487edba194 Mon Sep 17 00:00:00 2001 From: Joel Jacobson Date: Sat, 12 Oct 2024 08:29:51 +0200 Subject: [PATCH 2/2] Reorganize ProcessCopyOptions for clarity and consistent option handling. No changes to the function's signature or behavior; the refactoring solely improves code structure and readability. Changes: * Refactored ProcessCopyOptions to improve readability and maintainability by grouping per-option checks and default assignments into dedicated sections. This enhances the logical flow and makes it easier to understand how each COPY option is processed. * Explicitly set the default format to COPY_FORMAT_TEXT when the FORMAT option is not specified. Previously, the default was implied due to zero-initialization, but making it explicit clarifies the default behavior. * Consistently use boolean specified-variables to determine if an option has been provided, rather than relying on default values from zero-initialization. * Added assertions to ensure necessary options are set before performing dependent checks, explicitly indicating that they have been assigned either specified or default values. * Relocated interdependent option validations to a dedicated section for additional clarity. --- src/backend/commands/copy.c | 452 ++++++++++++++++++++++-------------- 1 file changed, 282 insertions(+), 170 deletions(-) diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 68340e534a..493ca5f487 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -672,195 +672,272 @@ ProcessCopyOptions(ParseState *pstate, } /* - * Check for incompatible options (must do these three before inserting - * defaults) + * Set default format if not specified. + * This isn't strictly necessary since COPY_FORMAT_TEXT is 0 and + * opts_out is palloc0'd, but do it for clarity. */ - if (opts_out->format == COPY_FORMAT_BINARY && opts_out->delim) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ - errmsg("cannot specify %s in BINARY mode", "DELIMITER"))); - - if (opts_out->format == COPY_FORMAT_BINARY && opts_out->null_print) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("cannot specify %s in BINARY mode", "NULL"))); + if (!format_specified) + opts_out->format = COPY_FORMAT_TEXT; - if (opts_out->format == COPY_FORMAT_BINARY && opts_out->default_print) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("cannot specify %s in BINARY mode", "DEFAULT"))); - - /* Set defaults for omitted options */ - if (!opts_out->delim) - opts_out->delim = opts_out->format == COPY_FORMAT_CSV ? "," : "\t"; + /* + * Begin per-option checks and set defaults where necessary + */ - if (!opts_out->null_print) - opts_out->null_print = opts_out->format == COPY_FORMAT_CSV ? "" : "\\N"; - opts_out->null_print_len = strlen(opts_out->null_print); + /* --- FORMAT option is always allowed; no additional checks needed --- */ - if (opts_out->format == COPY_FORMAT_CSV) + /* --- FREEZE option --- */ + if (freeze_specified) + { + if (!is_from) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + /*- translator: first %s is the name of a COPY option, e.g. ON_ERROR, + second %s is a COPY with direction, e.g. COPY TO */ + errmsg("COPY %s cannot be used with %s", "FREEZE", + "COPY TO"))); + } + else { - if (!opts_out->quote) - opts_out->quote = "\""; - if (!opts_out->escape) - opts_out->escape = opts_out->quote; + /* Default is false; no action needed */ } - /* Only single-byte delimiter strings are supported. */ - if (strlen(opts_out->delim) != 1) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("COPY delimiter must be a single one-byte character"))); + /* --- DELIMITER option --- */ + if (opts_out->delim) + { + if (opts_out->format == COPY_FORMAT_BINARY) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ + errmsg("cannot specify %s in BINARY mode", "DELIMITER"))); - /* Disallow end-of-line characters */ - if (strchr(opts_out->delim, '\r') != NULL || - strchr(opts_out->delim, '\n') != NULL) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("COPY delimiter cannot be newline or carriage return"))); + /* Only single-byte delimiter strings are supported. */ + if (strlen(opts_out->delim) != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY delimiter must be a single one-byte character"))); - if (strchr(opts_out->null_print, '\r') != NULL || - strchr(opts_out->null_print, '\n') != NULL) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("COPY null representation cannot use newline or carriage return"))); + /* Disallow end-of-line characters */ + if (strchr(opts_out->delim, '\r') != NULL || + strchr(opts_out->delim, '\n') != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY delimiter cannot be newline or carriage return"))); - if (opts_out->default_print) + /* + * Disallow unsafe delimiter characters in non-CSV mode. We can't allow + * backslash because it would be ambiguous. We can't allow the other + * cases because data characters matching the delimiter must be + * backslashed, and certain backslash combinations are interpreted + * non-literally by COPY IN. Disallowing all lower case ASCII letters is + * more than strictly necessary, but seems best for consistency and + * future-proofing. Likewise we disallow all digits though only octal + * digits are actually dangerous. + */ + if (opts_out->format != COPY_FORMAT_CSV && + strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789", + opts_out->delim[0]) != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim))); + } + else if (opts_out->format != COPY_FORMAT_BINARY) { - opts_out->default_print_len = strlen(opts_out->default_print); + /* Set default delimiter */ + opts_out->delim = opts_out->format == COPY_FORMAT_CSV ? "," : "\t"; + } - if (strchr(opts_out->default_print, '\r') != NULL || - strchr(opts_out->default_print, '\n') != NULL) + /* --- NULL option --- */ + if (opts_out->null_print) + { + if (opts_out->format == COPY_FORMAT_BINARY) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify %s in BINARY mode", "NULL"))); + + /* Disallow end-of-line characters */ + if (strchr(opts_out->null_print, '\r') != NULL || + strchr(opts_out->null_print, '\n') != NULL) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("COPY default representation cannot use newline or carriage return"))); + errmsg("COPY null representation cannot use newline or carriage return"))); + } + else if (opts_out->format != COPY_FORMAT_BINARY) + { + /* Set default null_print */ + opts_out->null_print = opts_out->format == COPY_FORMAT_CSV ? "" : "\\N"; } + if (opts_out->null_print) + opts_out->null_print_len = strlen(opts_out->null_print); - /* - * Disallow unsafe delimiter characters in non-CSV mode. We can't allow - * backslash because it would be ambiguous. We can't allow the other - * cases because data characters matching the delimiter must be - * backslashed, and certain backslash combinations are interpreted - * non-literally by COPY IN. Disallowing all lower case ASCII letters is - * more than strictly necessary, but seems best for consistency and - * future-proofing. Likewise we disallow all digits though only octal - * digits are actually dangerous. - */ - if (opts_out->format != COPY_FORMAT_CSV && - strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789", - opts_out->delim[0]) != NULL) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim))); + /* --- HEADER option --- */ + if (header_specified) + { + if (opts_out->format == COPY_FORMAT_BINARY) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ + errmsg("cannot specify %s in BINARY mode", "HEADER"))); + } + else + { + /* Default is false; no action needed */ + } - /* Check header */ - if (opts_out->format == COPY_FORMAT_BINARY && opts_out->header_line) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ - errmsg("cannot specify %s in BINARY mode", "HEADER"))); + /* --- QUOTE option --- */ + if (opts_out->quote) + { + if (opts_out->format != COPY_FORMAT_CSV) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ + errmsg("COPY %s requires CSV mode", "QUOTE"))); - /* Check quote */ - if (opts_out->format != COPY_FORMAT_CSV && opts_out->quote != NULL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ - errmsg("COPY %s requires CSV mode", "QUOTE"))); + if (strlen(opts_out->quote) != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY quote must be a single one-byte character"))); + } + else if (opts_out->format == COPY_FORMAT_CSV) + { + /* Set default quote */ + opts_out->quote = "\""; + } - if (opts_out->format == COPY_FORMAT_CSV && strlen(opts_out->quote) != 1) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("COPY quote must be a single one-byte character"))); + /* --- ESCAPE option --- */ + if (opts_out->escape) + { + if (opts_out->format != COPY_FORMAT_CSV) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ + errmsg("COPY %s requires CSV mode", "ESCAPE"))); - if (opts_out->format == COPY_FORMAT_CSV && opts_out->delim[0] == opts_out->quote[0]) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("COPY delimiter and quote must be different"))); + if (strlen(opts_out->escape) != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY escape must be a single one-byte character"))); + } + else if (opts_out->format == COPY_FORMAT_CSV) + { + /* Set default escape to quote character */ + opts_out->escape = opts_out->quote; + } - /* Check escape */ - if (opts_out->format != COPY_FORMAT_CSV && opts_out->escape != NULL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ - errmsg("COPY %s requires CSV mode", "ESCAPE"))); + /* --- FORCE_QUOTE option --- */ + if (opts_out->force_quote || opts_out->force_quote_all) + { + if (opts_out->format != COPY_FORMAT_CSV) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ + errmsg("COPY %s requires CSV mode", "FORCE_QUOTE"))); - if (opts_out->format == COPY_FORMAT_CSV && strlen(opts_out->escape) != 1) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("COPY escape must be a single one-byte character"))); + if (is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /*- translator: first %s is the name of a COPY option, e.g. ON_ERROR, + second %s is a COPY with direction, e.g. COPY TO */ + errmsg("COPY %s cannot be used with %s", "FORCE_QUOTE", + "COPY FROM"))); + } + else + { + /* No default action needed */ + } - /* Check force_quote */ - if (opts_out->format != COPY_FORMAT_CSV && (opts_out->force_quote || opts_out->force_quote_all)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ - errmsg("COPY %s requires CSV mode", "FORCE_QUOTE"))); - if ((opts_out->force_quote || opts_out->force_quote_all) && is_from) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - /*- translator: first %s is the name of a COPY option, e.g. ON_ERROR, - second %s is a COPY with direction, e.g. COPY TO */ - errmsg("COPY %s cannot be used with %s", "FORCE_QUOTE", - "COPY FROM"))); + /* --- FORCE_NOT_NULL option --- */ + if (opts_out->force_notnull != NIL || opts_out->force_notnull_all) + { + if (opts_out->format != COPY_FORMAT_CSV) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ + errmsg("COPY %s requires CSV mode", "FORCE_NOT_NULL"))); - /* Check force_notnull */ - if (opts_out->format != COPY_FORMAT_CSV && opts_out->force_notnull != NIL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ - errmsg("COPY %s requires CSV mode", "FORCE_NOT_NULL"))); - if (opts_out->force_notnull != NIL && !is_from) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - /*- translator: first %s is the name of a COPY option, e.g. ON_ERROR, - second %s is a COPY with direction, e.g. COPY TO */ - errmsg("COPY %s cannot be used with %s", "FORCE_NOT_NULL", - "COPY TO"))); + if (!is_from) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + /*- translator: first %s is the name of a COPY option, e.g. ON_ERROR, + second %s is a COPY with direction, e.g. COPY TO */ + errmsg("COPY %s cannot be used with %s", "FORCE_NOT_NULL", + "COPY TO"))); + } + else + { + /* No default action needed */ + } - /* Check force_null */ - if (opts_out->format != COPY_FORMAT_CSV && opts_out->force_null != NIL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ - errmsg("COPY %s requires CSV mode", "FORCE_NULL"))); + /* --- FORCE_NULL option --- */ + if (opts_out->force_null != NIL || opts_out->force_null_all) + { + if (opts_out->format != COPY_FORMAT_CSV) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ + errmsg("COPY %s requires CSV mode", "FORCE_NULL"))); - if (opts_out->force_null != NIL && !is_from) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - /*- translator: first %s is the name of a COPY option, e.g. ON_ERROR, - second %s is a COPY with direction, e.g. COPY TO */ - errmsg("COPY %s cannot be used with %s", "FORCE_NULL", - "COPY TO"))); + if (!is_from) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + /*- translator: first %s is the name of a COPY option, e.g. ON_ERROR, + second %s is a COPY with direction, e.g. COPY TO */ + errmsg("COPY %s cannot be used with %s", "FORCE_NULL", + "COPY TO"))); + } + else + { + /* No default action needed */ + } - /* Don't allow the delimiter to appear in the null string. */ - if (strchr(opts_out->null_print, opts_out->delim[0]) != NULL) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - /*- translator: %s is the name of a COPY option, e.g. NULL */ - errmsg("COPY delimiter character must not appear in the %s specification", - "NULL"))); + /* --- ON_ERROR option --- */ + if (on_error_specified) + { + if (opts_out->format == COPY_FORMAT_BINARY && + opts_out->on_error != COPY_ON_ERROR_STOP) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("only ON_ERROR STOP is allowed in BINARY mode"))); - /* Don't allow the CSV quote char to appear in the null string. */ - if (opts_out->format == COPY_FORMAT_CSV && - strchr(opts_out->null_print, opts_out->quote[0]) != NULL) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - /*- translator: %s is the name of a COPY option, e.g. NULL */ - errmsg("CSV quote character must not appear in the %s specification", - "NULL"))); + } + else + { + /* Default is COPY_ON_ERROR_STOP */ + opts_out->on_error = COPY_ON_ERROR_STOP; + } - /* Check freeze */ - if (opts_out->freeze && !is_from) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - /*- translator: first %s is the name of a COPY option, e.g. ON_ERROR, - second %s is a COPY with direction, e.g. COPY TO */ - errmsg("COPY %s cannot be used with %s", "FREEZE", - "COPY TO"))); + /* --- REJECT_LIMIT option --- */ + if (reject_limit_specified) + { + if (opts_out->on_error != COPY_ON_ERROR_IGNORE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + /*- translator: first and second %s are the names of COPY option, e.g. + * ON_ERROR, third is the value of the COPY option, e.g. IGNORE */ + errmsg("COPY %s requires %s to be set to %s", + "REJECT_LIMIT", "ON_ERROR", "IGNORE"))); + } + /* --- DEFAULT option --- */ if (opts_out->default_print) { + if (opts_out->format == COPY_FORMAT_BINARY) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify %s in BINARY mode", "DEFAULT"))); + + /* Assert options have been set (defaults applied if not specified) */ + Assert(opts_out->delim); + Assert(opts_out->quote); + Assert(opts_out->null_print); + + opts_out->default_print_len = strlen(opts_out->default_print); + + if (strchr(opts_out->default_print, '\r') != NULL || + strchr(opts_out->default_print, '\n') != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY default representation cannot use newline or carriage return"))); + if (!is_from) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -894,20 +971,55 @@ ProcessCopyOptions(ParseState *pstate, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("NULL specification and DEFAULT specification cannot be the same"))); } - /* Check on_error */ - if (opts_out->format == COPY_FORMAT_BINARY && - opts_out->on_error != COPY_ON_ERROR_STOP) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("only ON_ERROR STOP is allowed in BINARY mode"))); + else + { + /* No default for default_print; remains NULL */ + } - if (opts_out->reject_limit && !opts_out->on_error) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - /*- translator: first and second %s are the names of COPY option, e.g. - * ON_ERROR, third is the value of the COPY option, e.g. IGNORE */ - errmsg("COPY %s requires %s to be set to %s", - "REJECT_LIMIT", "ON_ERROR", "IGNORE"))); + /* + * Additional checks for interdependent options + */ + + /* Checks specific to the CSV and TEXT formats */ + if (opts_out->format == COPY_FORMAT_TEXT || + opts_out->format == COPY_FORMAT_CSV) + { + /* Assert options have been set (defaults applied if not specified) */ + Assert(opts_out->delim); + Assert(opts_out->null_print); + + /* Don't allow the delimiter to appear in the NULL or DEFAULT strings */ + + if (strchr(opts_out->null_print, opts_out->delim[0]) != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + /*- translator: %s is the name of a COPY option, e.g. NULL */ + errmsg("COPY delimiter character must not appear in the %s specification", + "NULL"))); + } + + /* Checks specific to the CSV format */ + if (opts_out->format == COPY_FORMAT_CSV) + { + /* Assert options have been set (defaults applied if not specified) */ + Assert(opts_out->delim); + Assert(opts_out->quote); + Assert(opts_out->null_print); + + if (opts_out->delim[0] == opts_out->quote[0]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY delimiter and quote must be different"))); + + /* Don't allow the CSV quote character in the NULL or DEFAULT strings */ + + if (strchr(opts_out->null_print, opts_out->quote[0]) != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + /*- translator: %s is the name of a COPY option, e.g. NULL */ + errmsg("CSV quote character must not appear in the %s specification", + "NULL"))); + } } /* -- 2.45.1