proposal: a width specification for s specifier (format function), fix behave when positional and ordered placeholders are used
Hello
I found so we doesn't have functionality for simply text aligning - so
I propose support width for %s like printf's behave. glibc
implementation knows a rule for precision, that I don't would to
implement, because it is oriented to bytes and not to chars - and it
can be confusing. Still I would to have implementation and design of
"format" function maximally simple - and a rule for "s" specifier and
width is clean and simple.
postgres=# select format('||%4s|| ||%-4s||', 'ab', 'ab');
format
-------------------
|| ab|| ||ab ||
I also found so our implementation of positional and ordered
placeholders are not correct.
-- correct
postgres=# select format('%s %2$s %s', 'Hello', 'World');
format
-------------------
Hello World World
-- our current behave
postgres=# select format('%s %2$s %s', 'Hello', 'World');
ERROR: too few arguments for format
postgres=#
Comments, notices?
Regards
Pavel Stehule
Attachments:
format_with_width.diffapplication/octet-stream; name=format_with_width.diffDownload
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
***************
*** 77,83 **** static bytea *bytea_substring(Datum str,
static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
void text_format_string_conversion(StringInfo buf, char conversion,
! Oid typid, Datum value, bool isNull);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
--- 77,84 ----
static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
void text_format_string_conversion(StringInfo buf, char conversion,
! Oid typid, Datum value, bool isNull,
! bool with_width, int width);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
***************
*** 3947,3952 **** text_reverse(PG_FUNCTION_ARGS)
--- 3948,4028 ----
}
/*
+ * Returns ptr of first next char after digit. Raise error,
+ * when no digit is processed or when parsed values are not valid
+ */
+ static const char *
+ parse_digit_subformat(const char *start_ptr, const char *end_ptr, int *value)
+ {
+ const char *cp = start_ptr;
+ bool minus = false;
+ int inum = 0;
+
+ /* continue, only when start_ptr is less than end_ptr */
+ if (cp < end_ptr)
+ {
+ if (*cp == '-')
+ {
+ minus = true;
+ start_ptr = cp++;
+ }
+
+ while (cp < end_ptr)
+ {
+ if (*cp >= '0' && *cp <= '9')
+ {
+ int newnum = inum * 10 + (*cp - '0');
+
+ if (newnum / 10 != inum) /* overflow? */
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("number is out of range")));
+ inum = newnum;
+ ++cp;
+ }
+ else
+ break;
+ }
+
+ *value = minus ? - inum : inum;
+ }
+
+ /* digit cannot be last char */
+ if (cp >= end_ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unterminated conversion specifier")));
+
+ /* we call this routine, only when we expected number */
+ if (cp == start_ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("invalid format, missing number")));
+
+ /* argument number must by greather than zero */
+ if (*cp == '$')
+ {
+ if (minus)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument less 0, but arguments are numbered from 1")));
+
+ if (inum == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
+
+ /* $ must not be last char */
+ if (cp + 1 >= end_ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unterminated conversion specifier")));
+ }
+
+ return cp;
+ }
+
+ /*
* Returns a formated string
*/
Datum
***************
*** 3958,3964 **** text_format(PG_FUNCTION_ARGS)
const char *start_ptr;
const char *end_ptr;
text *result;
! int arg = 0;
/* When format string is null, returns null */
if (PG_ARGISNULL(0))
--- 4034,4040 ----
const char *start_ptr;
const char *end_ptr;
text *result;
! int args = 0;
/* When format string is null, returns null */
if (PG_ARGISNULL(0))
***************
*** 3976,3981 **** text_format(PG_FUNCTION_ARGS)
--- 4052,4060 ----
Datum value;
bool isNull;
Oid typid;
+ bool with_width;
+ int arg;
+ int width;
/*
* If it's not the start of a conversion specifier, just copy it to
***************
*** 4000,4013 **** text_format(PG_FUNCTION_ARGS)
continue;
}
! /*
! * If the user hasn't specified an argument position, we just advance
! * to the next one. If they have, we must parse it.
! */
! if (*cp < '0' || *cp > '9')
{
! ++arg;
! if (arg <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
--- 4079,4124 ----
continue;
}
! if (cp >= end_ptr)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("unterminated conversion specifier")));
!
! arg = 0;
! with_width = false;
!
! if (*cp == '-' || (*cp >= '0' && *cp <= '9'))
{
! cp = parse_digit_subformat(cp, end_ptr, &arg);
!
! if (*cp == '$')
! {
! ++cp;
! if (*cp == '-' || (*cp >= '0' && *cp <= '9'))
! {
! cp = parse_digit_subformat(cp, end_ptr, &width);
!
! if (*cp == '$')
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("argument number is defined yet")));
!
! with_width = true;
! }
! }
! else
! {
! /* previous digits was a width not argument number */
! width = arg;
! arg = 0;
! with_width = true;
! }
! }
!
! if (arg == 0)
! {
! ++args;
! if (args <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
***************
*** 4017,4064 **** text_format(PG_FUNCTION_ARGS)
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
- }
- else
- {
- bool unterminated = false;
-
- /* Parse digit string. */
- arg = 0;
- do
- {
- int newarg = arg * 10 + (*cp - '0');
-
- if (newarg / 10 != arg) /* overflow? */
- ereport(ERROR,
- (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("argument number is out of range")));
- arg = newarg;
- ++cp;
- } while (cp < end_ptr && *cp >= '0' && *cp <= '9');
-
- /*
- * If we ran off the end, or if there's not a $ next, or if the $
- * is the last character, the conversion specifier is improperly
- * terminated.
- */
- if (cp == end_ptr || *cp != '$')
- unterminated = true;
- else
- {
- ++cp;
- if (cp == end_ptr)
- unterminated = true;
- }
- if (unterminated)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("unterminated conversion specifier")));
! /* There's no argument 0. */
! if (arg == 0)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
}
/* Not enough arguments? Deduct 1 to avoid counting format string. */
--- 4128,4135 ----
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
! arg = args;
}
/* Not enough arguments? Deduct 1 to avoid counting format string. */
***************
*** 4081,4087 **** text_format(PG_FUNCTION_ARGS)
case 's':
case 'I':
case 'L':
! text_format_string_conversion(&str, *cp, typid, value, isNull);
break;
default:
ereport(ERROR,
--- 4152,4159 ----
case 's':
case 'I':
case 'L':
! text_format_string_conversion(&str, *cp, typid, value, isNull,
! with_width, width);
break;
default:
ereport(ERROR,
***************
*** 4101,4112 **** text_format(PG_FUNCTION_ARGS)
/* Format a %s, %I, or %L conversion. */
void
text_format_string_conversion(StringInfo buf, char conversion,
! Oid typid, Datum value, bool isNull)
{
Oid typOutput;
bool typIsVarlena;
char *str;
/* Handle NULL arguments before trying to stringify the value. */
if (isNull)
{
--- 4173,4190 ----
/* Format a %s, %I, or %L conversion. */
void
text_format_string_conversion(StringInfo buf, char conversion,
! Oid typid, Datum value, bool isNull,
! bool with_width, int width)
{
Oid typOutput;
bool typIsVarlena;
char *str;
+ if (conversion != 's' && with_width)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("width's specification is available only for %%s specifier")));
+
/* Handle NULL arguments before trying to stringify the value. */
if (isNull)
{
***************
*** 4138,4144 **** text_format_string_conversion(StringInfo buf, char conversion,
pfree(qstr);
}
else
! appendStringInfoString(buf, str);
/* Cleanup. */
pfree(str);
--- 4216,4245 ----
pfree(qstr);
}
else
! {
! /* fast path */
! if (!with_width)
! appendStringInfoString(buf, str);
! else
! {
! int len = pg_mbstrlen(str);
!
! if (width < 0)
! {
! /* allign to left */
! appendStringInfoString(buf, str);
! if (len < (-width))
! appendStringInfoSpaces(buf, - (len + width));
! }
! else
! {
! /* allign to right */
! if (len < width)
! appendStringInfoSpaces(buf, width - len);
! appendStringInfoString(buf, str);
! }
! }
! }
/* Cleanup. */
pfree(str);
*** a/src/test/regress/expected/text.out
--- b/src/test/regress/expected/text.out
***************
*** 223,233 **** ERROR: too few arguments for format
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
select format('%1s', 1);
! ERROR: unterminated conversion specifier
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unrecognized conversion specifier "1"
--checkk mix of positional and ordered placeholders
select format('Hello %s %1$s %s', 'World', 'Hello again');
format
--- 223,237 ----
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
select format('%1s', 1);
! format
! --------
! 1
! (1 row)
!
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unterminated conversion specifier
--checkk mix of positional and ordered placeholders
select format('Hello %s %1$s %s', 'World', 'Hello again');
format
***************
*** 241,243 **** select format('Hello %s %s, %2$s %2$s', 'World', 'Hello again');
--- 245,263 ----
Hello World Hello again, Hello again Hello again
(1 row)
+ select format('%s %2$s %s', 'Hello', 'World');
+ format
+ -------------------
+ Hello World World
+ (1 row)
+
+ --check width
+ select format('||%4s|| ||%-4s||', 'ab', 'ab');
+ format
+ -------------------
+ || ab|| ||ab ||
+ (1 row)
+
+ --should fail
+ select format('INSERT INTO %10I VALUES(%L,%L)', 'mytab', 10, 'Hello');
+ ERROR: width's specification is available only for %s specifier
*** a/src/test/regress/sql/text.sql
--- b/src/test/regress/sql/text.sql
***************
*** 76,78 **** select format('%1$1', 1);
--- 76,86 ----
--checkk mix of positional and ordered placeholders
select format('Hello %s %1$s %s', 'World', 'Hello again');
select format('Hello %s %s, %2$s %2$s', 'World', 'Hello again');
+
+ select format('%s %2$s %s', 'Hello', 'World');
+
+ --check width
+ select format('||%4s|| ||%-4s||', 'ab', 'ab');
+
+ --should fail
+ select format('INSERT INTO %10I VALUES(%L,%L)', 'mytab', 10, 'Hello');
Pavel,
* Pavel Stehule (pavel.stehule@gmail.com) wrote:
I found so we doesn't have functionality for simply text aligning - so
I propose support width for %s like printf's behave. glibc
implementation knows a rule for precision, that I don't would to
implement, because it is oriented to bytes and not to chars - and it
can be confusing. Still I would to have implementation and design of
"format" function maximally simple - and a rule for "s" specifier and
width is clean and simple.
I started looking at this patch to get a head-start on the next
commitfest. There's no documentation, which certainly needs to be
fixed, but worse, this doesn't appear to match glibc printf and it's not
entirely clear to me why it doesn't.
-- our current behave
postgres=# select format('%s %2$s %s', 'Hello', 'World');
ERROR: too few arguments for format
postgres=#
This is correct, if we're matching glibc (and SUS, I believe), isn't it?
You're not allowed to mix '%2$s' type parameters and '%s' in a single
format.
Thanks,
Stephen
Hello
2012/12/29 Stephen Frost <sfrost@snowman.net>:
Pavel,
* Pavel Stehule (pavel.stehule@gmail.com) wrote:
I found so we doesn't have functionality for simply text aligning - so
I propose support width for %s like printf's behave. glibc
implementation knows a rule for precision, that I don't would to
implement, because it is oriented to bytes and not to chars - and it
can be confusing. Still I would to have implementation and design of
"format" function maximally simple - and a rule for "s" specifier and
width is clean and simple.I started looking at this patch to get a head-start on the next
commitfest. There's no documentation, which certainly needs to be
fixed, but worse, this doesn't appear to match glibc printf and it's not
entirely clear to me why it doesn't.-- our current behave
postgres=# select format('%s %2$s %s', 'Hello', 'World');
ERROR: too few arguments for format
postgres=#This is correct, if we're matching glibc (and SUS, I believe), isn't it?
You're not allowed to mix '%2$s' type parameters and '%s' in a single
format.
I am not sure, please recheck
pavel ~ $ cat test.c
#include <stdio.h>
void main()
{
printf("%s %2$s %s\n", "AHOJ", "Svete");
}
pavel ~ $ gcc test.c # no warning here
pavel ~ $ gcc --version
gcc (GCC) 4.7.2 20120921 (Red Hat 4.7.2-2)
Copyright (C) 2012 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
pavel ~ $ ./a.out
AHOJ Svete Svete
pavel ~ $
Regards
Pavel Stehule
Thanks,
Stephen
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Pavel,
* Pavel Stehule (pavel.stehule@gmail.com) wrote:
2012/12/29 Stephen Frost <sfrost@snowman.net>:
This is correct, if we're matching glibc (and SUS, I believe), isn't it?
You're not allowed to mix '%2$s' type parameters and '%s' in a single
format.I am not sure, please recheck
According to the man pages on my Ubuntu system, under 'Format of the
format string':
-------------------
If the style using '$' is used, it must be used throughout for
all conversions taking an argument and all width and precision
arguments, but it may be mixed with "%%" formats which do not consume
an argument.
-------------------
pavel ~ $ cat test.c
#include <stdio.h>void main()
{printf("%s %2$s %s\n", "AHOJ", "Svete");
}pavel ~ $ gcc test.c # no warning here
You didn't turn any on...
sfrost@tamriel:/home/sfrost> gcc -o qq -Wall test.c
test.c: In function ‘main’:
test.c:5:3: warning: $ operand number used after format without operand number [-Wformat]
Thanks,
Stephen
2012/12/29 Stephen Frost <sfrost@snowman.net>:
Pavel,
* Pavel Stehule (pavel.stehule@gmail.com) wrote:
2012/12/29 Stephen Frost <sfrost@snowman.net>:
This is correct, if we're matching glibc (and SUS, I believe), isn't it?
You're not allowed to mix '%2$s' type parameters and '%s' in a single
format.I am not sure, please recheck
According to the man pages on my Ubuntu system, under 'Format of the
format string':-------------------
If the style using '$' is used, it must be used throughout for
all conversions taking an argument and all width and precision
arguments, but it may be mixed with "%%" formats which do not consume
an argument.
-------------------pavel ~ $ cat test.c
#include <stdio.h>void main()
{printf("%s %2$s %s\n", "AHOJ", "Svete");
}pavel ~ $ gcc test.c # no warning here
You didn't turn any on...
sfrost@tamriel:/home/sfrost> gcc -o qq -Wall test.c
test.c: In function ‘main’:
test.c:5:3: warning: $ operand number used after format without operand number [-Wformat]
ok, so what is proposed solution?
I see two possibilities - a) applying my current patch - although it
is not fully correct, b) new patch, that do necessary check and raise
more descriptive error message.
I have not strong preferences in this topic - both variants are
acceptable for me and I invite any community opinion. But current
state is not intuitive and should be fixed.
Regards
Pavel
Thanks,
Stephen
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
* Pavel Stehule (pavel.stehule@gmail.com) wrote:
ok, so what is proposed solution?
My recommendation would be to match what glibc's printf does.
I see two possibilities - a) applying my current patch - although it
is not fully correct, b) new patch, that do necessary check and raise
more descriptive error message.
Right, have a new patch that does error-checking and returns a better
error on that case, update the docs to reflect that restriction, and
then (ideally as an additional and independent patch..) implement the
width capability (and, ideally, the ability to pass the width as an
argument, as glibc supports) which matches the glibc arguments.
Part of the reason that this restriction is in place, I believe, is
because glibc expects the width to come before any explicit argument
being passed and if an explicit argument is used for width then an
explicit argument has to be used for the value also, otherwise it
wouldn't be clear from the format which was the argument number and
which was the explicit width size.
I don't think it's a good idea to come up with our own format
definition, particularly one which looks so similar to the well-known
printf() format.
I have not strong preferences in this topic - both variants are
acceptable for me and I invite any community opinion. But current
state is not intuitive and should be fixed.
Agreed.
Thanks,
Stephen
Hello Stephen
2012/12/29 Stephen Frost <sfrost@snowman.net>:
* Pavel Stehule (pavel.stehule@gmail.com) wrote:
ok, so what is proposed solution?
My recommendation would be to match what glibc's printf does.
I see two possibilities - a) applying my current patch - although it
is not fully correct, b) new patch, that do necessary check and raise
more descriptive error message.Right, have a new patch that does error-checking and returns a better
error on that case, update the docs to reflect that restriction, and
then (ideally as an additional and independent patch..) implement the
width capability (and, ideally, the ability to pass the width as an
argument, as glibc supports) which matches the glibc arguments.Part of the reason that this restriction is in place, I believe, is
because glibc expects the width to come before any explicit argument
being passed and if an explicit argument is used for width then an
explicit argument has to be used for the value also, otherwise it
wouldn't be clear from the format which was the argument number and
which was the explicit width size.
I found one issue - if I disallow mixing positional and ordered style
I break compatibility with previous implementation.
so maybe third way is better - use fix from my patch - a behave is
same like in glibc - and raise warning (instead errors) when mixing
styles is detected - we can replace warnings by errors in future.
What do you think?
Regards
Pavel
I don't think it's a good idea to come up with our own format
definition, particularly one which looks so similar to the well-known
printf() format.I have not strong preferences in this topic - both variants are
acceptable for me and I invite any community opinion. But current
state is not intuitive and should be fixed.Agreed.
Thanks,
Stephen
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
2012/12/30 Pavel Stehule <pavel.stehule@gmail.com>:
Hello Stephen
2012/12/29 Stephen Frost <sfrost@snowman.net>:
* Pavel Stehule (pavel.stehule@gmail.com) wrote:
ok, so what is proposed solution?
My recommendation would be to match what glibc's printf does.
I see two possibilities - a) applying my current patch - although it
is not fully correct, b) new patch, that do necessary check and raise
more descriptive error message.Right, have a new patch that does error-checking and returns a better
error on that case, update the docs to reflect that restriction, and
then (ideally as an additional and independent patch..) implement the
width capability (and, ideally, the ability to pass the width as an
argument, as glibc supports) which matches the glibc arguments.Part of the reason that this restriction is in place, I believe, is
because glibc expects the width to come before any explicit argument
being passed and if an explicit argument is used for width then an
explicit argument has to be used for the value also, otherwise it
wouldn't be clear from the format which was the argument number and
which was the explicit width size.I found one issue - if I disallow mixing positional and ordered style
I break compatibility with previous implementation.so maybe third way is better - use fix from my patch - a behave is
same like in glibc - and raise warning (instead errors) when mixing
styles is detected - we can replace warnings by errors in future.
this is exactly what gcc does - and without breaking applications.
What do you think?
Regards
Pavel
I don't think it's a good idea to come up with our own format
definition, particularly one which looks so similar to the well-known
printf() format.I have not strong preferences in this topic - both variants are
acceptable for me and I invite any community opinion. But current
state is not intuitive and should be fixed.Agreed.
Thanks,
Stephen
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Pavel,
* Pavel Stehule (pavel.stehule@gmail.com) wrote:
I found one issue - if I disallow mixing positional and ordered style
I break compatibility with previous implementation.
Can you elaborate? In the previous example, an error was returned when
mixing (not a terribly good one, but still an error). Returning a
better error won't be a problem.
so maybe third way is better - use fix from my patch - a behave is
same like in glibc - and raise warning (instead errors) when mixing
styles is detected - we can replace warnings by errors in future.What do you think?
If there are cases which work today then I agree that we should issue a
warning to avoid breaking existing applications. We should still use
the glibc format when adding width support, however.
Thanks,
Stephen
Hello
2012/12/31 Stephen Frost <sfrost@snowman.net>:
Pavel,
* Pavel Stehule (pavel.stehule@gmail.com) wrote:
I found one issue - if I disallow mixing positional and ordered style
I break compatibility with previous implementation.Can you elaborate? In the previous example, an error was returned when
mixing (not a terribly good one, but still an error). Returning a
better error won't be a problem.
A result from ours previous talk was a completely disabling mixing
positional and ordered placeholders - like is requested by man and gcc
raises warnings there.
But mixing is not explicitly disallowed in doc, and mixing was tested
in our regress tests. There are tests where placeholders are mixed -
so anybody can use it.
select format('Hello %s %1$s %s', 'World', 'Hello again'); -- is
enabled and supported and result is expected
-- but this raises error - and it is same situation like previous example
select format('%s %2$s %s', 'Hello', 'World');
-- so bot examples should be executed or should be disabled if this
functionality should be consistent. And I can't to break first
example, then I have to repair second example
Regards
so maybe third way is better - use fix from my patch - a behave is
same like in glibc - and raise warning (instead errors) when mixing
styles is detected - we can replace warnings by errors in future.What do you think?
If there are cases which work today then I agree that we should issue a
warning to avoid breaking existing applications. We should still use
the glibc format when adding width support, however.Thanks,
Stephen
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Pavel,
* Pavel Stehule (pavel.stehule@gmail.com) wrote:
A result from ours previous talk was a completely disabling mixing
positional and ordered placeholders - like is requested by man and gcc
raises warnings there.But mixing is not explicitly disallowed in doc, and mixing was tested
in our regress tests. There are tests where placeholders are mixed -
so anybody can use it.
select format('Hello %s %1$s %s', 'World', 'Hello again'); -- is
enabled and supported and result is expected
Alright, then I agree that raising a warning in that case makes sense
and let's update the docs to reflect that it shouldn't be done (like
what glibc/gcc do).
Thanks,
Stephen
2012/12/31 Stephen Frost <sfrost@snowman.net>:
Pavel,
* Pavel Stehule (pavel.stehule@gmail.com) wrote:
A result from ours previous talk was a completely disabling mixing
positional and ordered placeholders - like is requested by man and gcc
raises warnings there.But mixing is not explicitly disallowed in doc, and mixing was tested
in our regress tests. There are tests where placeholders are mixed -
so anybody can use it.
select format('Hello %s %1$s %s', 'World', 'Hello again'); -- is
enabled and supported and result is expectedAlright, then I agree that raising a warning in that case makes sense
and let's update the docs to reflect that it shouldn't be done (like
what glibc/gcc do).
ok, I prepare patch
Regards
Pavel
Thanks,
Stephen
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Hello
2012/12/31 Stephen Frost <sfrost@snowman.net>:
Pavel,
* Pavel Stehule (pavel.stehule@gmail.com) wrote:
A result from ours previous talk was a completely disabling mixing
positional and ordered placeholders - like is requested by man and gcc
raises warnings there.But mixing is not explicitly disallowed in doc, and mixing was tested
in our regress tests. There are tests where placeholders are mixed -
so anybody can use it.
select format('Hello %s %1$s %s', 'World', 'Hello again'); -- is
enabled and supported and result is expectedAlright, then I agree that raising a warning in that case makes sense
and let's update the docs to reflect that it shouldn't be done (like
what glibc/gcc do).
so there are two patches - first is fix in logic when positional and
ordered parameters are mixed + add warning in this situation. Second
patch enables possibility to specify width for %s conversion.
I didn't finalize documentation due my net good English skills -
probably there is necessary new paragraph about function "format"
elsewhere than in table
Regards
Pavel
Show quoted text
Thanks,
Stephen
Attachments:
fix_mixing_positinal_ordered_placeholders.patchapplication/octet-stream; name=fix_mixing_positinal_ordered_placeholders.patchDownload
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
***************
*** 1530,1536 ****
SQL literal; <literal>%%</literal> outputs a literal <literal>%</>.
A conversion can reference an explicit parameter position by preceding
the conversion specifier with <literal><replaceable>n</>$</>, where
! <replaceable>n</replaceable> is the argument position.
See also <xref linkend="plpgsql-quote-literal-example">.
</entry>
<entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
--- 1530,1537 ----
SQL literal; <literal>%%</literal> outputs a literal <literal>%</>.
A conversion can reference an explicit parameter position by preceding
the conversion specifier with <literal><replaceable>n</>$</>, where
! <replaceable>n</replaceable> is the argument position. A warnig is raised
! when positional and ordered placeholders are used together.
See also <xref linkend="plpgsql-quote-literal-example">.
</entry>
<entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
***************
*** 3958,3964 **** text_format(PG_FUNCTION_ARGS)
const char *start_ptr;
const char *end_ptr;
text *result;
! int arg = 0;
/* When format string is null, returns null */
if (PG_ARGISNULL(0))
--- 3958,3968 ----
const char *start_ptr;
const char *end_ptr;
text *result;
! int arg;
! int position = 0;
! bool positional = false;
! bool ordered = false;
! bool warning_emmited = false;
/* When format string is null, returns null */
if (PG_ARGISNULL(0))
***************
*** 4006,4013 **** text_format(PG_FUNCTION_ARGS)
*/
if (*cp < '0' || *cp > '9')
{
! ++arg;
! if (arg <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
--- 4010,4025 ----
*/
if (*cp < '0' || *cp > '9')
{
! /* don't allow mix styles - reflects glibc behave */
! if (positional && !warning_emmited)
! {
! elog(WARNING, "ordered and positional placeholders are used together");
! warning_emmited = true;
! }
! ordered = true;
!
! ++position;
! if (position <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
***************
*** 4017,4022 **** text_format(PG_FUNCTION_ARGS)
--- 4029,4035 ----
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
+ arg = position;
}
else
{
***************
*** 4054,4059 **** text_format(PG_FUNCTION_ARGS)
--- 4067,4079 ----
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unterminated conversion specifier")));
+ if (ordered && !warning_emmited)
+ {
+ elog(WARNING, "ordered and positional placeholders are used together");
+ warning_emmited = true;
+ }
+ positional = true;
+
/* There's no argument 0. */
if (arg == 0)
ereport(ERROR,
*** a/src/test/regress/expected/text.out
--- b/src/test/regress/expected/text.out
***************
*** 229,243 **** ERROR: unterminated conversion specifier
--- 229,253 ----
select format('%1$1', 1);
ERROR: unrecognized conversion specifier "1"
--checkk mix of positional and ordered placeholders
+ --should raise warnings
select format('Hello %s %1$s %s', 'World', 'Hello again');
+ WARNING: ordered and positional placeholders are used together
format
-------------------------------
Hello World World Hello again
(1 row)
select format('Hello %s %s, %2$s %2$s', 'World', 'Hello again');
+ WARNING: ordered and positional placeholders are used together
format
--------------------------------------------------
Hello World Hello again, Hello again Hello again
(1 row)
+ select format('Hello %s %2$s %s', 'World', 'Hello again');
+ WARNING: ordered and positional placeholders are used together
+ format
+ -------------------------------------
+ Hello World Hello again Hello again
+ (1 row)
+
*** a/src/test/regress/sql/text.sql
--- b/src/test/regress/sql/text.sql
***************
*** 74,78 **** select format('%1s', 1);
--- 74,80 ----
select format('%1$', 1);
select format('%1$1', 1);
--checkk mix of positional and ordered placeholders
+ --should raise warnings
select format('Hello %s %1$s %s', 'World', 'Hello again');
select format('Hello %s %s, %2$s %2$s', 'World', 'Hello again');
+ select format('Hello %s %2$s %s', 'World', 'Hello again');
format_width.patchapplication/octet-stream; name=format_width.patchDownload
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
***************
*** 1530,1536 ****
SQL literal; <literal>%%</literal> outputs a literal <literal>%</>.
A conversion can reference an explicit parameter position by preceding
the conversion specifier with <literal><replaceable>n</>$</>, where
! <replaceable>n</replaceable> is the argument position. A warnig is raised
when positional and ordered placeholders are used together.
See also <xref linkend="plpgsql-quote-literal-example">.
</entry>
--- 1530,1538 ----
SQL literal; <literal>%%</literal> outputs a literal <literal>%</>.
A conversion can reference an explicit parameter position by preceding
the conversion specifier with <literal><replaceable>n</>$</>, where
! <replaceable>n</replaceable> is the argument position. Similary to C
! function <function>sprintf</>; is possible to specify a width for
! <literal>%s</literal> conversion. A warnig is raised
when positional and ordered placeholders are used together.
See also <xref linkend="plpgsql-quote-literal-example">.
</entry>
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
***************
*** 77,83 **** static bytea *bytea_substring(Datum str,
static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
void text_format_string_conversion(StringInfo buf, char conversion,
! Oid typid, Datum value, bool isNull);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
--- 77,84 ----
static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
void text_format_string_conversion(StringInfo buf, char conversion,
! Oid typid, Datum value, bool isNull,
! bool with_width, int width);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
***************
*** 3947,3952 **** text_reverse(PG_FUNCTION_ARGS)
--- 3948,4028 ----
}
/*
+ * Returns ptr of first non digit char. Raise error,
+ * when no digit is processed or when result of parsing is not number
+ */
+ static const char *
+ parse_digit_subformat(const char *start_ptr, const char *end_ptr, int *value)
+ {
+ const char *cp = start_ptr;
+ bool minus = false;
+ int inum = 0;
+
+ /* continue, only when start_ptr is less than end_ptr */
+ if (cp < end_ptr)
+ {
+ if (*cp == '-')
+ {
+ minus = true;
+ start_ptr = cp++;
+ }
+
+ while (cp < end_ptr)
+ {
+ if (*cp >= '0' && *cp <= '9')
+ {
+ int newnum = inum * 10 + (*cp - '0');
+
+ if (newnum / 10 != inum) /* overflow? */
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("number is out of range")));
+ inum = newnum;
+ ++cp;
+ }
+ else
+ break;
+ }
+
+ *value = minus ? - inum : inum;
+ }
+
+ /* digit cannot be last char */
+ if (cp >= end_ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unterminated conversion specifier")));
+
+ /* we call this routine, only when we expected number */
+ if (cp == start_ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("invalid format, missing expected number")));
+
+ /* argument number must by greather than zero */
+ if (*cp == '$')
+ {
+ if (minus)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
+
+ if (inum == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
+
+ /* $ must not be last char */
+ if (cp + 1 >= end_ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unterminated conversion specifier")));
+ }
+
+ return cp;
+ }
+
+ /*
* Returns a formated string
*/
Datum
***************
*** 3959,3965 **** text_format(PG_FUNCTION_ARGS)
const char *end_ptr;
text *result;
int arg;
! int position = 0;
bool positional = false;
bool ordered = false;
bool warning_emmited = false;
--- 4035,4041 ----
const char *end_ptr;
text *result;
int arg;
! int ordered_nargs = 0;
bool positional = false;
bool ordered = false;
bool warning_emmited = false;
***************
*** 3980,3985 **** text_format(PG_FUNCTION_ARGS)
--- 4056,4063 ----
Datum value;
bool isNull;
Oid typid;
+ int width = 0;
+ bool with_width = false;
/*
* If it's not the start of a conversion specifier, just copy it to
***************
*** 4004,4025 **** text_format(PG_FUNCTION_ARGS)
continue;
}
/*
! * If the user hasn't specified an argument position, we just advance
! * to the next one. If they have, we must parse it.
*/
! if (*cp < '0' || *cp > '9')
{
! /* don't allow mix styles - reflects glibc behave */
! if (positional && !warning_emmited)
{
! elog(WARNING, "ordered and positional placeholders are used together");
! warning_emmited = true;
}
! ordered = true;
! ++position;
! if (position <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
--- 4082,4125 ----
continue;
}
+ arg = 0;
/*
! * User can specify a width or a position or both. The width should
! * be negative.
*/
! if (*cp == '-' || (*cp >= '0' && *cp <= '9'))
{
! cp = parse_digit_subformat(cp, end_ptr, &arg);
!
! if (*cp == '$')
{
! ++cp;
! if (*cp == '-' || (*cp >= '0' && *cp <= '9'))
! {
! cp = parse_digit_subformat(cp, end_ptr, &width);
!
! if (*cp == '$')
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("argument number is defined yet")));
!
! with_width = true;
! }
}
! else
! {
! /* previous digits was a width not argument number */
! width = arg;
! arg = 0;
! with_width = true;
! }
! }
! /* when argument not specified yet, use a next one */
! if (arg == 0)
! {
! ++ordered_nargs;
! if (ordered_nargs <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
***************
*** 4029,4084 **** text_format(PG_FUNCTION_ARGS)
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
! arg = position;
! }
! else
! {
! bool unterminated = false;
!
! /* Parse digit string. */
! arg = 0;
! do
! {
! int newarg = arg * 10 + (*cp - '0');
! if (newarg / 10 != arg) /* overflow? */
! ereport(ERROR,
! (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
! errmsg("argument number is out of range")));
! arg = newarg;
! ++cp;
! } while (cp < end_ptr && *cp >= '0' && *cp <= '9');
!
! /*
! * If we ran off the end, or if there's not a $ next, or if the $
! * is the last character, the conversion specifier is improperly
! * terminated.
! */
! if (cp == end_ptr || *cp != '$')
! unterminated = true;
! else
{
! ++cp;
! if (cp == end_ptr)
! unterminated = true;
}
- if (unterminated)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("unterminated conversion specifier")));
if (ordered && !warning_emmited)
{
elog(WARNING, "ordered and positional placeholders are used together");
warning_emmited = true;
}
- positional = true;
! /* There's no argument 0. */
! if (arg == 0)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
}
/* Not enough arguments? Deduct 1 to avoid counting format string. */
--- 4129,4154 ----
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
! arg = ordered_nargs;
! /* raise warning when found mixed placeholders, but only once */
! if (positional && !warning_emmited)
{
! elog(WARNING, "ordered and positional placeholders are used together");
! warning_emmited = true;
}
+ ordered = true;
+ }
+ else
+ {
if (ordered && !warning_emmited)
{
elog(WARNING, "ordered and positional placeholders are used together");
warning_emmited = true;
}
! positional = true;
}
/* Not enough arguments? Deduct 1 to avoid counting format string. */
***************
*** 4099,4108 **** text_format(PG_FUNCTION_ARGS)
switch (*cp)
{
case 's':
case 'I':
case 'L':
! text_format_string_conversion(&str, *cp, typid, value, isNull);
break;
default:
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
--- 4169,4186 ----
switch (*cp)
{
case 's':
+ text_format_string_conversion(&str, *cp, typid, value, isNull, with_width, width);
+ break;
+
case 'I':
case 'L':
! if (with_width)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("conversion \"%c\" doesn't suppport the width", *cp)));
! text_format_string_conversion(&str, *cp, typid, value, isNull, false, 0);
break;
+
default:
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
***************
*** 4121,4127 **** text_format(PG_FUNCTION_ARGS)
/* Format a %s, %I, or %L conversion. */
void
text_format_string_conversion(StringInfo buf, char conversion,
! Oid typid, Datum value, bool isNull)
{
Oid typOutput;
bool typIsVarlena;
--- 4199,4206 ----
/* Format a %s, %I, or %L conversion. */
void
text_format_string_conversion(StringInfo buf, char conversion,
! Oid typid, Datum value, bool isNull,
! bool with_width, int width)
{
Oid typOutput;
bool typIsVarlena;
***************
*** 4158,4164 **** text_format_string_conversion(StringInfo buf, char conversion,
pfree(qstr);
}
else
! appendStringInfoString(buf, str);
/* Cleanup. */
pfree(str);
--- 4237,4266 ----
pfree(qstr);
}
else
! {
! /* fast path */
! if (!with_width)
! appendStringInfoString(buf, str);
! else
! {
! int len = pg_mbstrlen(str);
!
! if (width < 0)
! {
! /* allign to left */
! appendStringInfoString(buf, str);
! if (len < (-width))
! appendStringInfoSpaces(buf, - (len + width));
! }
! else
! {
! /* allign to right */
! if (len < width)
! appendStringInfoSpaces(buf, width - len);
! appendStringInfoString(buf, str);
! }
! }
! }
/* Cleanup. */
pfree(str);
*** a/src/test/regress/expected/text.out
--- b/src/test/regress/expected/text.out
***************
*** 222,233 **** select format('%1$s %4$s', 1, 2, 3);
ERROR: too few arguments for format
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
- select format('%1s', 1);
- ERROR: unterminated conversion specifier
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unrecognized conversion specifier "1"
--checkk mix of positional and ordered placeholders
--should raise warnings
select format('Hello %s %1$s %s', 'World', 'Hello again');
--- 222,231 ----
ERROR: too few arguments for format
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unterminated conversion specifier
--checkk mix of positional and ordered placeholders
--should raise warnings
select format('Hello %s %1$s %s', 'World', 'Hello again');
***************
*** 251,253 **** WARNING: ordered and positional placeholders are used together
--- 249,283 ----
Hello World Hello again Hello again
(1 row)
+ -- format with a width
+ select format('>>%10s<<', 'Hello');
+ format
+ ----------------
+ >> Hello<<
+ (1 row)
+
+ select format('>>%-10s<<', 'Hello');
+ format
+ ----------------
+ >>Hello <<
+ (1 row)
+
+ select format('>>%1$10s<<', 'Hello');
+ format
+ ----------------
+ >> Hello<<
+ (1 row)
+
+ select format('>>%1$-10s<<', 'Hello');
+ format
+ ----------------
+ >>Hello <<
+ (1 row)
+
+ -- should fail
+ select format('>>%1$1$s<<', 'Hello');
+ ERROR: argument number is defined yet
+ select format('>>%10I<<', 'tablename');
+ ERROR: conversion "I" doesn't suppport the width
+ select format('>>%1$10L<<', 'tablename');
+ ERROR: conversion "L" doesn't suppport the width
*** a/src/test/regress/sql/text.sql
--- b/src/test/regress/sql/text.sql
***************
*** 70,76 **** select format('%1$s %12$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
-- should fail
select format('%1$s %4$s', 1, 2, 3);
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
- select format('%1s', 1);
select format('%1$', 1);
select format('%1$1', 1);
--checkk mix of positional and ordered placeholders
--- 70,75 ----
***************
*** 78,80 **** select format('%1$1', 1);
--- 77,90 ----
select format('Hello %s %1$s %s', 'World', 'Hello again');
select format('Hello %s %s, %2$s %2$s', 'World', 'Hello again');
select format('Hello %s %2$s %s', 'World', 'Hello again');
+
+ -- format with a width
+ select format('>>%10s<<', 'Hello');
+ select format('>>%-10s<<', 'Hello');
+ select format('>>%1$10s<<', 'Hello');
+ select format('>>%1$-10s<<', 'Hello');
+
+ -- should fail
+ select format('>>%1$1$s<<', 'Hello');
+ select format('>>%10I<<', 'tablename');
+ select format('>>%1$10L<<', 'tablename');
Hello
2012/12/31 Pavel Stehule <pavel.stehule@gmail.com>:
Hello
2012/12/31 Stephen Frost <sfrost@snowman.net>:
Pavel,
* Pavel Stehule (pavel.stehule@gmail.com) wrote:
A result from ours previous talk was a completely disabling mixing
positional and ordered placeholders - like is requested by man and gcc
raises warnings there.But mixing is not explicitly disallowed in doc, and mixing was tested
in our regress tests. There are tests where placeholders are mixed -
so anybody can use it.
select format('Hello %s %1$s %s', 'World', 'Hello again'); -- is
enabled and supported and result is expectedAlright, then I agree that raising a warning in that case makes sense
and let's update the docs to reflect that it shouldn't be done (like
what glibc/gcc do).so there are two patches - first is fix in logic when positional and
ordered parameters are mixed + add warning in this situation. Second
patch enables possibility to specify width for %s conversion.I didn't finalize documentation due my net good English skills -
probably there is necessary new paragraph about function "format"
elsewhere than in tableRegards
Pavel
updated patches due changes for better variadic "any" function.
apply fix_mixing_positinal_ordered_placeholders_warnings_20130126.patch first
Regards
Pavel
Attachments:
fix_mixing_positinal_ordered_placeholders_warnings_20130126.patchapplication/octet-stream; name=fix_mixing_positinal_ordered_placeholders_warnings_20130126.patchDownload
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
***************
*** 1532,1538 ****
SQL literal; <literal>%%</literal> outputs a literal <literal>%</>.
A conversion can reference an explicit parameter position by preceding
the conversion specifier with <literal><replaceable>n</>$</>, where
! <replaceable>n</replaceable> is the argument position.
See also <xref linkend="plpgsql-quote-literal-example">.
</entry>
<entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
--- 1532,1539 ----
SQL literal; <literal>%%</literal> outputs a literal <literal>%</>.
A conversion can reference an explicit parameter position by preceding
the conversion specifier with <literal><replaceable>n</>$</>, where
! <replaceable>n</replaceable> is the argument position. A warnig is raised
! when positional and ordered placeholders are used together.
See also <xref linkend="plpgsql-quote-literal-example">.
</entry>
<entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
***************
*** 4003,4008 **** text_format(PG_FUNCTION_ARGS)
--- 4003,4012 ----
Oid element_type = InvalidOid;
Oid prev_type = InvalidOid;
FmgrInfo typoutputfinfo;
+ int position = 0;
+ bool positional = false;
+ bool ordered = false;
+ bool warning_emmited = false;
/* When format string is null, returns null */
if (PG_ARGISNULL(0))
***************
*** 4092,4099 **** text_format(PG_FUNCTION_ARGS)
*/
if (*cp < '0' || *cp > '9')
{
! ++arg;
! if (arg <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
--- 4096,4111 ----
*/
if (*cp < '0' || *cp > '9')
{
! /* don't allow mix styles - reflects glibc behave */
! if (positional && !warning_emmited)
! {
! elog(WARNING, "ordered and positional placeholders are used together");
! warning_emmited = true;
! }
! ordered = true;
!
! ++position;
! if (position <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
***************
*** 4103,4108 **** text_format(PG_FUNCTION_ARGS)
--- 4115,4121 ----
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
+ arg = position;
}
else
{
***************
*** 4140,4145 **** text_format(PG_FUNCTION_ARGS)
--- 4153,4165 ----
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unterminated conversion specifier")));
+ if (ordered && !warning_emmited)
+ {
+ elog(WARNING, "ordered and positional placeholders are used together");
+ warning_emmited = true;
+ }
+ positional = true;
+
/* There's no argument 0. */
if (arg == 0)
ereport(ERROR,
*** a/src/test/regress/expected/text.out
--- b/src/test/regress/expected/text.out
***************
*** 265,282 **** ERROR: unterminated conversion specifier
--- 265,292 ----
select format('%1$1', 1);
ERROR: unrecognized conversion specifier "1"
-- check mix of positional and ordered placeholders
+ -- should raise warning
select format('Hello %s %1$s %s', 'World', 'Hello again');
+ WARNING: ordered and positional placeholders are used together
format
-------------------------------
Hello World World Hello again
(1 row)
select format('Hello %s %s, %2$s %2$s', 'World', 'Hello again');
+ WARNING: ordered and positional placeholders are used together
format
--------------------------------------------------
Hello World Hello again, Hello again Hello again
(1 row)
+ select format('Hello %s %2$s %s', 'World', 'Hello again');
+ WARNING: ordered and positional placeholders are used together
+ format
+ -------------------------------------
+ Hello World Hello again Hello again
+ (1 row)
+
-- check variadic labeled arguments
select format('%s, %s', variadic array['Hello','World']);
format
*** a/src/test/regress/sql/text.sql
--- b/src/test/regress/sql/text.sql
***************
*** 82,89 **** select format('%1s', 1);
--- 82,91 ----
select format('%1$', 1);
select format('%1$1', 1);
-- check mix of positional and ordered placeholders
+ -- should raise warning
select format('Hello %s %1$s %s', 'World', 'Hello again');
select format('Hello %s %s, %2$s %2$s', 'World', 'Hello again');
+ select format('Hello %s %2$s %s', 'World', 'Hello again');
-- check variadic labeled arguments
select format('%s, %s', variadic array['Hello','World']);
select format('%s, %s', variadic array[1, 2]);
format_width_20130126.patchapplication/octet-stream; name=format_width_20130126.patchDownload
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
***************
*** 1532,1538 ****
SQL literal; <literal>%%</literal> outputs a literal <literal>%</>.
A conversion can reference an explicit parameter position by preceding
the conversion specifier with <literal><replaceable>n</>$</>, where
! <replaceable>n</replaceable> is the argument position. A warnig is raised
when positional and ordered placeholders are used together.
See also <xref linkend="plpgsql-quote-literal-example">.
</entry>
--- 1532,1540 ----
SQL literal; <literal>%%</literal> outputs a literal <literal>%</>.
A conversion can reference an explicit parameter position by preceding
the conversion specifier with <literal><replaceable>n</>$</>, where
! <replaceable>n</replaceable> is the argument position. Similary to C
! function <function>sprintf</>; is possible to specify a width for
! <literal>%s</literal> conversion. A warnig is raised
when positional and ordered placeholders are used together.
See also <xref linkend="plpgsql-quote-literal-example">.
</entry>
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
***************
*** 78,84 **** static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
static void text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
const char *fldsep, const char *null_string);
--- 78,85 ----
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
static void text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull,
! bool use_width, int width);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
const char *fldsep, const char *null_string);
***************
*** 3984,3989 **** text_reverse(PG_FUNCTION_ARGS)
--- 3985,4065 ----
}
/*
+ * Returns ptr of first non digit char. Raise error,
+ * when no digit is processed or when result of parsing is not number
+ */
+ static const char *
+ parse_digit_subformat(const char *start_ptr, const char *end_ptr, int *value)
+ {
+ const char *cp = start_ptr;
+ bool minus = false;
+ int inum = 0;
+
+ /* continue, only when start_ptr is less than end_ptr */
+ if (cp < end_ptr)
+ {
+ if (*cp == '-')
+ {
+ minus = true;
+ start_ptr = cp++;
+ }
+
+ while (cp < end_ptr)
+ {
+ if (*cp >= '0' && *cp <= '9')
+ {
+ int newnum = inum * 10 + (*cp - '0');
+
+ if (newnum / 10 != inum) /* overflow? */
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("number is out of range")));
+ inum = newnum;
+ ++cp;
+ }
+ else
+ break;
+ }
+
+ *value = minus ? - inum : inum;
+ }
+
+ /* digit cannot be last char */
+ if (cp >= end_ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unterminated conversion specifier")));
+
+ /* we call this routine, only when we expected number */
+ if (cp == start_ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("invalid format, missing expected number")));
+
+ /* argument number must by greather than zero */
+ if (*cp == '$')
+ {
+ if (minus)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
+
+ if (inum == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
+
+ /* $ must not be last char */
+ if (cp + 1 >= end_ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unterminated conversion specifier")));
+ }
+
+ return cp;
+ }
+
+ /*
* Returns a formated string
*/
Datum
***************
*** 4003,4009 **** text_format(PG_FUNCTION_ARGS)
Oid element_type = InvalidOid;
Oid prev_type = InvalidOid;
FmgrInfo typoutputfinfo;
! int position = 0;
bool positional = false;
bool ordered = false;
bool warning_emmited = false;
--- 4079,4085 ----
Oid element_type = InvalidOid;
Oid prev_type = InvalidOid;
FmgrInfo typoutputfinfo;
! int ordered_nargs = 0;
bool positional = false;
bool ordered = false;
bool warning_emmited = false;
***************
*** 4066,4071 **** text_format(PG_FUNCTION_ARGS)
--- 4142,4149 ----
Datum value;
bool isNull;
Oid typid;
+ int width = 0;
+ bool use_width = false;
/*
* If it's not the start of a conversion specifier, just copy it to
***************
*** 4090,4111 **** text_format(PG_FUNCTION_ARGS)
continue;
}
/*
! * If the user hasn't specified an argument position, we just advance
! * to the next one. If they have, we must parse it.
*/
! if (*cp < '0' || *cp > '9')
{
! /* don't allow mix styles - reflects glibc behave */
! if (positional && !warning_emmited)
{
! elog(WARNING, "ordered and positional placeholders are used together");
! warning_emmited = true;
}
! ordered = true;
! ++position;
! if (position <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
--- 4168,4211 ----
continue;
}
+ arg = 0;
/*
! * User can specify a width or a position or both. The width should
! * be negative.
*/
! if (*cp == '-' || (*cp >= '0' && *cp <= '9'))
{
! cp = parse_digit_subformat(cp, end_ptr, &arg);
!
! if (*cp == '$')
{
! ++cp;
! if (*cp == '-' || (*cp >= '0' && *cp <= '9'))
! {
! cp = parse_digit_subformat(cp, end_ptr, &width);
!
! if (*cp == '$')
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("argument number is defined yet")));
!
! use_width = true;
! }
}
! else
! {
! /* previous digits was a width not argument number */
! width = arg;
! arg = 0;
! use_width = true;
! }
! }
! /* when argument not specified yet, use a next one */
! if (arg == 0)
! {
! ++ordered_nargs;
! if (ordered_nargs <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
***************
*** 4115,4170 **** text_format(PG_FUNCTION_ARGS)
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
! arg = position;
! }
! else
! {
! bool unterminated = false;
!
! /* Parse digit string. */
! arg = 0;
! do
! {
! int newarg = arg * 10 + (*cp - '0');
! if (newarg / 10 != arg) /* overflow? */
! ereport(ERROR,
! (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
! errmsg("argument number is out of range")));
! arg = newarg;
! ++cp;
! } while (cp < end_ptr && *cp >= '0' && *cp <= '9');
!
! /*
! * If we ran off the end, or if there's not a $ next, or if the $
! * is the last character, the conversion specifier is improperly
! * terminated.
! */
! if (cp == end_ptr || *cp != '$')
! unterminated = true;
! else
{
! ++cp;
! if (cp == end_ptr)
! unterminated = true;
}
- if (unterminated)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("unterminated conversion specifier")));
if (ordered && !warning_emmited)
{
elog(WARNING, "ordered and positional placeholders are used together");
warning_emmited = true;
}
- positional = true;
! /* There's no argument 0. */
! if (arg == 0)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
}
/* Not enough arguments? Deduct 1 to avoid counting format string. */
--- 4215,4240 ----
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
! arg = ordered_nargs;
! /* raise warning when found mixed placeholders, but only once */
! if (positional && !warning_emmited)
{
! elog(WARNING, "ordered and positional placeholders are used together");
! warning_emmited = true;
}
+ ordered = true;
+ }
+ else
+ {
if (ordered && !warning_emmited)
{
elog(WARNING, "ordered and positional placeholders are used together");
warning_emmited = true;
}
! positional = true;
}
/* Not enough arguments? Deduct 1 to avoid counting format string. */
***************
*** 4212,4221 **** text_format(PG_FUNCTION_ARGS)
switch (*cp)
{
case 's':
case 'I':
case 'L':
text_format_string_conversion(&str, *cp, &typoutputfinfo,
! value, isNull);
break;
default:
ereport(ERROR,
--- 4282,4301 ----
switch (*cp)
{
case 's':
+ text_format_string_conversion(&str, *cp, &typoutputfinfo,
+ value, isNull,
+ use_width, width);
+ break;
+
case 'I':
case 'L':
+ if (use_width)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion \"%c\" doesn't support the width specification", *cp)));
text_format_string_conversion(&str, *cp, &typoutputfinfo,
! value, isNull,
! false, 0);
break;
default:
ereport(ERROR,
***************
*** 4242,4248 **** text_format(PG_FUNCTION_ARGS)
static void
text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull)
{
char *str;
--- 4322,4329 ----
static void
text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull,
! bool use_width, int width)
{
char *str;
***************
*** 4276,4282 **** text_format_string_conversion(StringInfo buf, char conversion,
pfree(qstr);
}
else
! appendStringInfoString(buf, str);
/* Cleanup. */
pfree(str);
--- 4357,4386 ----
pfree(qstr);
}
else
! {
! /* fast path */
! if (!use_width)
! appendStringInfoString(buf, str);
! else
! {
! int len = pg_mbstrlen(str);
!
! if (width < 0)
! {
! /* allign to left */
! appendStringInfoString(buf, str);
! if (len < (-width))
! appendStringInfoSpaces(buf, - (len + width));
! }
! else
! {
! /* allign to right */
! if (len < width)
! appendStringInfoSpaces(buf, width - len);
! appendStringInfoString(buf, str);
! }
! }
! }
/* Cleanup. */
pfree(str);
*** a/src/test/regress/expected/text.out
--- b/src/test/regress/expected/text.out
***************
*** 258,269 **** select format('%1$s %4$s', 1, 2, 3);
ERROR: too few arguments for format
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
- select format('%1s', 1);
- ERROR: unterminated conversion specifier
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unrecognized conversion specifier "1"
-- check mix of positional and ordered placeholders
-- should raise warning
select format('Hello %s %1$s %s', 'World', 'Hello again');
--- 258,267 ----
ERROR: too few arguments for format
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unterminated conversion specifier
-- check mix of positional and ordered placeholders
-- should raise warning
select format('Hello %s %1$s %s', 'World', 'Hello again');
***************
*** 340,342 **** from generate_series(1,200) g(i);
--- 338,372 ----
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200
(1 row)
+ -- format with a width
+ select format('>>%10s<<', 'Hello');
+ format
+ ----------------
+ >> Hello<<
+ (1 row)
+
+ select format('>>%-10s<<', 'Hello');
+ format
+ ----------------
+ >>Hello <<
+ (1 row)
+
+ select format('>>%1$10s<<', 'Hello');
+ format
+ ----------------
+ >> Hello<<
+ (1 row)
+
+ select format('>>%1$-10s<<', 'Hello');
+ format
+ ----------------
+ >>Hello <<
+ (1 row)
+
+ -- should fail
+ select format('>>%1$1$s<<', 'Hello');
+ ERROR: argument number is defined yet
+ select format('>>%10I<<', 'tablename');
+ ERROR: conversion "I" doesn't support the width specification
+ select format('>>%1$10L<<', 'tablename');
+ ERROR: conversion "L" doesn't support the width specification
*** a/src/test/regress/sql/text.sql
--- b/src/test/regress/sql/text.sql
***************
*** 78,84 **** select format('%1$s %12$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
-- should fail
select format('%1$s %4$s', 1, 2, 3);
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
- select format('%1s', 1);
select format('%1$', 1);
select format('%1$1', 1);
-- check mix of positional and ordered placeholders
--- 78,83 ----
***************
*** 99,101 **** select format('Hello', variadic NULL::int[]);
--- 98,112 ----
-- variadic argument allows simulating more than FUNC_MAX_ARGS parameters
select format(string_agg('%s',','), variadic array_agg(i))
from generate_series(1,200) g(i);
+
+
+ -- format with a width
+ select format('>>%10s<<', 'Hello');
+ select format('>>%-10s<<', 'Hello');
+ select format('>>%1$10s<<', 'Hello');
+ select format('>>%1$-10s<<', 'Hello');
+
+ -- should fail
+ select format('>>%1$1$s<<', 'Hello');
+ select format('>>%10I<<', 'tablename');
+ select format('>>%1$10L<<', 'tablename');
On 26 January 2013 10:58, Pavel Stehule <pavel.stehule@gmail.com> wrote:
updated patches due changes for better variadic "any" function.
apply fix_mixing_positinal_ordered_placeholders_warnings_20130126.patch first
Hi,
No one is listed as a reviewer for this patch so I thought I would
take a look at it, since it looks like a useful enhancement to
format().
Starting with the first patch - it issues a new WARNING if the format
string contains a mixture of format specifiers with and without
parameter indexes (e.g., 'Hello %s, %1$s').
Having thought about it a bit, I really don't like this for a number of reasons:
* I actually quite like the current behaviour. Admittedly putting
ordered specifiers (like '%s') after positional ones (like '%3$s') is
probably not so useful, and potentially open to different
interpretations. But putting positional specifiers at the end is
completely unambiguous and can save a lot of typing (e.g.,
'%s,%s,%s,%s,%,s,%s,%s,%1$s').
* On backwards compatibility grounds. The fact that the only example
of format() in the manual is precisely a case of mixed positional and
ordered parameters makes it quite likely that people will have used
this feature already.
* Part of the justification for adding the warning is for
compatibility with glibc/SUS printf(). But if we are aiming for that,
then we should also produce a warning if positional parameters are
used and not all parameters are consumed. That would be a pain to
implement and I don't think it would be particularly helpful in
practice. Here is what the SUS says:
"""
The format can contain either numbered argument specifications (that
is, %n$ and *m$), or unnumbered argument specifications (that is, %
and *), but normally not both. The only exception to this is that %%
can be mixed with the %n$ form. The results of mixing numbered and
unnumbered argument specifications in a format string are undefined.
When numbered argument specifications are used, specifying the Nth
argument requires that all the leading arguments, from the first to
the (N-1)th, are specified in the format string.
"""
I think that if we are going to do anything, we should explicitly
document our current behaviour as a PostgreSQL extension to the SUS
printf(), describing how we handle mixed parameters, rather than
adding this warning.
The current PostgreSQL code isn't inconsistent with the SUS, except in
the error case, and so can reasonably be regarded as an extension.
Regards,
Dean
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Hello
2013/1/28 Dean Rasheed <dean.a.rasheed@gmail.com>:
On 26 January 2013 10:58, Pavel Stehule <pavel.stehule@gmail.com> wrote:
updated patches due changes for better variadic "any" function.
apply fix_mixing_positinal_ordered_placeholders_warnings_20130126.patch first
Hi,
No one is listed as a reviewer for this patch so I thought I would
take a look at it, since it looks like a useful enhancement to
format().Starting with the first patch - it issues a new WARNING if the format
string contains a mixture of format specifiers with and without
parameter indexes (e.g., 'Hello %s, %1$s').Having thought about it a bit, I really don't like this for a number of reasons:
* I actually quite like the current behaviour. Admittedly putting
ordered specifiers (like '%s') after positional ones (like '%3$s') is
probably not so useful, and potentially open to different
interpretations. But putting positional specifiers at the end is
completely unambiguous and can save a lot of typing (e.g.,
'%s,%s,%s,%s,%,s,%s,%s,%1$s').* On backwards compatibility grounds. The fact that the only example
of format() in the manual is precisely a case of mixed positional and
ordered parameters makes it quite likely that people will have used
this feature already.* Part of the justification for adding the warning is for
compatibility with glibc/SUS printf(). But if we are aiming for that,
then we should also produce a warning if positional parameters are
used and not all parameters are consumed. That would be a pain to
implement and I don't think it would be particularly helpful in
practice. Here is what the SUS says:"""
The format can contain either numbered argument specifications (that
is, %n$ and *m$), or unnumbered argument specifications (that is, %
and *), but normally not both. The only exception to this is that %%
can be mixed with the %n$ form. The results of mixing numbered and
unnumbered argument specifications in a format string are undefined.
When numbered argument specifications are used, specifying the Nth
argument requires that all the leading arguments, from the first to
the (N-1)th, are specified in the format string.
"""I think that if we are going to do anything, we should explicitly
document our current behaviour as a PostgreSQL extension to the SUS
printf(), describing how we handle mixed parameters, rather than
adding this warning.The current PostgreSQL code isn't inconsistent with the SUS, except in
the error case, and so can reasonably be regarded as an extension.
I am not sure what you dislike?
warnings or redesign of behave.
I can live without warnings, when this field will be documented - it
is not fully compatible with gcc, but gcc just raises warnings and
does correct implementation. Our warnings are on different level than
gcc warnings.
But I don't think so current implementation is correct
-- our current behave
postgres=# select format('%s %2$s %s', 'Hello', 'World');
ERROR: too few arguments for format
postgres=#
postgres=# select format('%s %1$s %s', 'Hello', 'World'); -- works
ordered parameters should be independent on positional parameters. And
this behave has glibc
Regards
Pavel
Regards,
Dean
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Pavel Stehule <pavel.stehule@gmail.com> writes:
2013/1/28 Dean Rasheed <dean.a.rasheed@gmail.com>:
Starting with the first patch - it issues a new WARNING if the format
string contains a mixture of format specifiers with and without
parameter indexes (e.g., 'Hello %s, %1$s').Having thought about it a bit, I really don't like this for a number of reasons:
I am not sure what you dislike?
warnings or redesign of behave.
Both. If we had done this when we first implemented format(), it'd be
fine, but it's too late to change it now. There very likely are
applications out there that depend on the current behavior. As Dean
says, it's not incompatible with SUS, just a superset, so ISTM this
patch is proposing to remove documented functionality --- for no very
strong reason.
I vote for rejecting this change entirely.
regards, tom lane
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
* Tom Lane (tgl@sss.pgh.pa.us) wrote:
Both. If we had done this when we first implemented format(), it'd be
fine, but it's too late to change it now. There very likely are
applications out there that depend on the current behavior. As Dean
says, it's not incompatible with SUS, just a superset, so ISTM this
patch is proposing to remove documented functionality --- for no very
strong reason.
It's only a "superset" of the very poor subset of printf()-like
functionality that we currently support through the format() function.
If we can actually match glibc/SUS (which I don't believe the initial
patch did..) and support a mix of explicitly specified arguments and
implicit arguments, along with the various width, precision, and other
format specifications, then fine by me.
I'm not convinced that's actually possible due to the ambiguity which
will certainly arise and I'm quite sure the documentation that explains
what we do in each case will deserve it's own chapter.
Thanks,
Stephen
On 28 January 2013 17:32, Stephen Frost <sfrost@snowman.net> wrote:
* Tom Lane (tgl@sss.pgh.pa.us) wrote:
Both. If we had done this when we first implemented format(), it'd be
fine, but it's too late to change it now. There very likely are
applications out there that depend on the current behavior. As Dean
says, it's not incompatible with SUS, just a superset, so ISTM this
patch is proposing to remove documented functionality --- for no very
strong reason.It's only a "superset" of the very poor subset of printf()-like
functionality that we currently support through the format() function.If we can actually match glibc/SUS (which I don't believe the initial
patch did..) and support a mix of explicitly specified arguments and
implicit arguments, along with the various width, precision, and other
format specifications, then fine by me.I'm not convinced that's actually possible due to the ambiguity which
will certainly arise and I'm quite sure the documentation that explains
what we do in each case will deserve it's own chapter.
There are a number of separate issues here, but I don't see this as an
intractable problem. In general a format specifier looks like:
%[parameter][flags][width][.precision][length]type
parameter - an optional n$. This is where we have implemented a
superset of the SUS printf(). But I think it is a useful superset, and
it's too late to remove it now. Any ambiguity lies here, where we go
beyond the SUS - some printf() implementations appear to do something
different (apparently without documenting what they do). I think our
documentation could be clearer here, to explain how mixed parameters
are handled.
flags - not currently implemented. Pavel's second patch adds support
for the '-' flag for left justified string output. However, I think
this should support all datatypes (i.e., %I and %L as well as %s).
width - not currently implemented. Pavel's second patch adds support
for this, but note that for full compatibility with the SUS it needs
to also support widths specified using * and *n$. Also, I think it
should support all supported datatypes, not just strings.
precision - only relevant to numeric datatypes, which we don't support.
length - only relevant to numeric datatypes, which we don't support.
type - this is where we only support a small subset of the SUS (plus a
couple of SQL-specific types). I'm not sure if anyone has any plans to
extend this, but that's certainly not on the cards for 9.3.
So the relevant pieces that Pavel's second patch is attempting to add
support for are the '-' flag and the width field. As noted above,
there are a couple of areas where the current patch falls short of the
SUS:
1). The '-' flag and width field are supposed to apply to all types. I
think that not supporting %I and %L will be somewhat limiting, and
goes against the intent of the SUS, even though those types are
PostgreSQL extensions.
2). The width field is supposed to support * (width specified by the
next function argument) and *n$ (width specified by the nth function
argument). If we supported this, then we could claim full
compatibility with the SUS in all fields except for the type support,
which would seem like a real step forward.
Regards,
Dean
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
2013/1/28 Dean Rasheed <dean.a.rasheed@gmail.com>:
On 28 January 2013 17:32, Stephen Frost <sfrost@snowman.net> wrote:
* Tom Lane (tgl@sss.pgh.pa.us) wrote:
Both. If we had done this when we first implemented format(), it'd be
fine, but it's too late to change it now. There very likely are
applications out there that depend on the current behavior. As Dean
says, it's not incompatible with SUS, just a superset, so ISTM this
patch is proposing to remove documented functionality --- for no very
strong reason.It's only a "superset" of the very poor subset of printf()-like
functionality that we currently support through the format() function.If we can actually match glibc/SUS (which I don't believe the initial
patch did..) and support a mix of explicitly specified arguments and
implicit arguments, along with the various width, precision, and other
format specifications, then fine by me.I'm not convinced that's actually possible due to the ambiguity which
will certainly arise and I'm quite sure the documentation that explains
what we do in each case will deserve it's own chapter.There are a number of separate issues here, but I don't see this as an
intractable problem. In general a format specifier looks like:%[parameter][flags][width][.precision][length]type
parameter - an optional n$. This is where we have implemented a
superset of the SUS printf(). But I think it is a useful superset, and
it's too late to remove it now. Any ambiguity lies here, where we go
beyond the SUS - some printf() implementations appear to do something
different (apparently without documenting what they do). I think our
documentation could be clearer here, to explain how mixed parameters
are handled.flags - not currently implemented. Pavel's second patch adds support
for the '-' flag for left justified string output. However, I think
this should support all datatypes (i.e., %I and %L as well as %s).
no - surely not - I% and L% is PostgreSQL extension and left or right
alignment is has no sense for PostgreSQL identifiers and PostgreSQL
literals.
Regards
Pavel
width - not currently implemented. Pavel's second patch adds support
for this, but note that for full compatibility with the SUS it needs
to also support widths specified using * and *n$. Also, I think it
should support all supported datatypes, not just strings.precision - only relevant to numeric datatypes, which we don't support.
length - only relevant to numeric datatypes, which we don't support.
type - this is where we only support a small subset of the SUS (plus a
couple of SQL-specific types). I'm not sure if anyone has any plans to
extend this, but that's certainly not on the cards for 9.3.So the relevant pieces that Pavel's second patch is attempting to add
support for are the '-' flag and the width field. As noted above,
there are a couple of areas where the current patch falls short of the
SUS:1). The '-' flag and width field are supposed to apply to all types. I
think that not supporting %I and %L will be somewhat limiting, and
goes against the intent of the SUS, even though those types are
PostgreSQL extensions.2). The width field is supposed to support * (width specified by the
next function argument) and *n$ (width specified by the nth function
argument). If we supported this, then we could claim full
compatibility with the SUS in all fields except for the type support,
which would seem like a real step forward.Regards,
Dean
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 28 January 2013 20:40, Pavel Stehule <pavel.stehule@gmail.com> wrote:
2013/1/28 Dean Rasheed <dean.a.rasheed@gmail.com>:
On 28 January 2013 17:32, Stephen Frost <sfrost@snowman.net> wrote:
* Tom Lane (tgl@sss.pgh.pa.us) wrote:
Both. If we had done this when we first implemented format(), it'd be
fine, but it's too late to change it now. There very likely are
applications out there that depend on the current behavior. As Dean
says, it's not incompatible with SUS, just a superset, so ISTM this
patch is proposing to remove documented functionality --- for no very
strong reason.It's only a "superset" of the very poor subset of printf()-like
functionality that we currently support through the format() function.If we can actually match glibc/SUS (which I don't believe the initial
patch did..) and support a mix of explicitly specified arguments and
implicit arguments, along with the various width, precision, and other
format specifications, then fine by me.I'm not convinced that's actually possible due to the ambiguity which
will certainly arise and I'm quite sure the documentation that explains
what we do in each case will deserve it's own chapter.There are a number of separate issues here, but I don't see this as an
intractable problem. In general a format specifier looks like:%[parameter][flags][width][.precision][length]type
parameter - an optional n$. This is where we have implemented a
superset of the SUS printf(). But I think it is a useful superset, and
it's too late to remove it now. Any ambiguity lies here, where we go
beyond the SUS - some printf() implementations appear to do something
different (apparently without documenting what they do). I think our
documentation could be clearer here, to explain how mixed parameters
are handled.flags - not currently implemented. Pavel's second patch adds support
for the '-' flag for left justified string output. However, I think
this should support all datatypes (i.e., %I and %L as well as %s).no - surely not - I% and L% is PostgreSQL extension and left or right
alignment is has no sense for PostgreSQL identifiers and PostgreSQL
literals.
Left/right alignment and padding in printf() apply to all types, after
the data value is converted to a string. Why shouldn't that same
principle apply to %I and %L? The obvious use-case is for producing
tabular output of data with columns neatly aligned. If we don't
support %I and %L then any alignment of columns to the right is lost.
Regards,
Dean
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Dean Rasheed <dean.a.rasheed@gmail.com> writes:
On 28 January 2013 20:40, Pavel Stehule <pavel.stehule@gmail.com> wrote:
2013/1/28 Dean Rasheed <dean.a.rasheed@gmail.com>:
flags - not currently implemented. Pavel's second patch adds support
for the '-' flag for left justified string output. However, I think
this should support all datatypes (i.e., %I and %L as well as %s).
no - surely not - I% and L% is PostgreSQL extension and left or right
alignment is has no sense for PostgreSQL identifiers and PostgreSQL
literals.
Left/right alignment and padding in printf() apply to all types, after
the data value is converted to a string. Why shouldn't that same
principle apply to %I and %L?
I agree with Dean --- it would be very strange for these features not to
apply to all conversion specifiers (excepting %% of course, which isn't
really a conversion specifier but an escaping hack).
regards, tom lane
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 28 January 2013 20:32, Dean Rasheed <dean.a.rasheed@gmail.com> wrote:
In general a format specifier looks like:
%[parameter][flags][width][.precision][length]type
This highlights another problem with the current implementation ---
the '-' flag and the width field need to be parsed separately. So
'%-3s' should be parsed as a '-' flag followed by a width of 3, not as
a width of -3, which is then interpreted as left-aligned. This might
seem like nitpicking, but actually it is important:
* In the future we might support more flags, and they can be specified
in any order, so the '-' flag won't necessarily come immediately
before the width.
* The width field is optional, even if the '-' flag is specified. So
'%-s' is perfectly legal and should be interpreted as '%s'. The
current implementation treats it as a width of 0, which is wrong.
* The width field might not be a number, it might be something like *
or *3$. Note that the SUS allows a negative width to be passed in as a
function argument using this syntax, in which case it should be
treated as if the '-' flag were specified.
Regards,
Dean
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 29 January 2013 08:19, Dean Rasheed <dean.a.rasheed@gmail.com> wrote:
* The width field is optional, even if the '-' flag is specified. So
'%-s' is perfectly legal and should be interpreted as '%s'. The
current implementation treats it as a width of 0, which is wrong.
Oh, but of course a width of 0 is the same as no width at all, so the
current code is correct after all. That's what happens if I try to
write emails before I've had my caffeine :-)
I think my other points remain valid though. It would still be neater
to parse the flags separately from the width field, and then all
literal numbers that appear in the format should be positive.
Regards,
Dean
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
2013/1/28 Tom Lane <tgl@sss.pgh.pa.us>:
Pavel Stehule <pavel.stehule@gmail.com> writes:
2013/1/28 Dean Rasheed <dean.a.rasheed@gmail.com>:
Starting with the first patch - it issues a new WARNING if the format
string contains a mixture of format specifiers with and without
parameter indexes (e.g., 'Hello %s, %1$s').Having thought about it a bit, I really don't like this for a number of reasons:
I am not sure what you dislike?
warnings or redesign of behave.Both. If we had done this when we first implemented format(), it'd be
fine, but it's too late to change it now. There very likely are
applications out there that depend on the current behavior. As Dean
says, it's not incompatible with SUS, just a superset, so ISTM this
patch is proposing to remove documented functionality --- for no very
strong reason.
I disagree - but I have not a arguments. I am thinking so current
implementation is wrong, and now is last time when we can to fix it -
format() function is not too old and there is relative chance to
minimal impact to users.
I didn't propose removing this functionality, but fixing via more
logical independent counter for ordered arguments. Dependency on
previous positional argument is unpractical and unclean. I am not
satisfied so it is undefined and then it is ok.
Regards
Pavel
I vote for rejecting this change entirely.
regards, tom lane
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
2013/1/28 Tom Lane <tgl@sss.pgh.pa.us>:
Dean Rasheed <dean.a.rasheed@gmail.com> writes:
On 28 January 2013 20:40, Pavel Stehule <pavel.stehule@gmail.com> wrote:
2013/1/28 Dean Rasheed <dean.a.rasheed@gmail.com>:
flags - not currently implemented. Pavel's second patch adds support
for the '-' flag for left justified string output. However, I think
this should support all datatypes (i.e., %I and %L as well as %s).no - surely not - I% and L% is PostgreSQL extension and left or right
alignment is has no sense for PostgreSQL identifiers and PostgreSQL
literals.Left/right alignment and padding in printf() apply to all types, after
the data value is converted to a string. Why shouldn't that same
principle apply to %I and %L?I agree with Dean --- it would be very strange for these features not to
apply to all conversion specifiers (excepting %% of course, which isn't
really a conversion specifier but an escaping hack).
ok - I have no problem with it - after some thinking - just remove one check.
Regards
Pavel
regards, tom lane
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
2013/1/29 Dean Rasheed <dean.a.rasheed@gmail.com>:
On 28 January 2013 20:32, Dean Rasheed <dean.a.rasheed@gmail.com> wrote:
In general a format specifier looks like:
%[parameter][flags][width][.precision][length]type
This highlights another problem with the current implementation ---
the '-' flag and the width field need to be parsed separately. So
'%-3s' should be parsed as a '-' flag followed by a width of 3, not as
a width of -3, which is then interpreted as left-aligned. This might
seem like nitpicking, but actually it is important:* In the future we might support more flags, and they can be specified
in any order, so the '-' flag won't necessarily come immediately
before the width.* The width field is optional, even if the '-' flag is specified. So
'%-s' is perfectly legal and should be interpreted as '%s'. The
current implementation treats it as a width of 0, which is wrong.* The width field might not be a number, it might be something like *
or *3$. Note that the SUS allows a negative width to be passed in as a
function argument using this syntax, in which case it should be
treated as if the '-' flag were specified.
A possibility to specify width as * can be implemented in future. The
format() function was not designed to be fully compatible with SUS -
it is simplified subset with pg enhancing.
There was a talks about integration to_char() formats to format() and
we didn't block it - and it was reason why I proposed and pushed name
"format" and not "printf", because there can be little bit different
purposes than generic printf function.
Regards
Pavel
Regards,
Dean
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Hello
2013/1/29 Dean Rasheed <dean.a.rasheed@gmail.com>:
On 29 January 2013 08:19, Dean Rasheed <dean.a.rasheed@gmail.com> wrote:
* The width field is optional, even if the '-' flag is specified. So
'%-s' is perfectly legal and should be interpreted as '%s'. The
current implementation treats it as a width of 0, which is wrong.Oh, but of course a width of 0 is the same as no width at all, so the
current code is correct after all. That's what happens if I try to
write emails before I've had my caffeine :-)I think my other points remain valid though. It would still be neater
to parse the flags separately from the width field, and then all
literal numbers that appear in the format should be positive.
I am sending rewritten code
It indirect width "*" and "*n$" is supported. It needs little bit more code.
There are a new question
what should be result of
format(">>%2$*1$s<<", NULL, "hello")
???
raise exception now, but I am able to modify to some agreement
Regards
Pavel
Show quoted text
Regards,
Dean
Attachments:
format_width_20130131.patchapplication/octet-stream; name=format_width_20130131.patchDownload
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
***************
*** 78,84 **** static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
static void text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
const char *fldsep, const char *null_string);
--- 78,85 ----
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
static void text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull,
! int flags, bool use_width, int width);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
const char *fldsep, const char *null_string);
***************
*** 3997,4002 **** text_reverse(PG_FUNCTION_ARGS)
--- 3998,4134 ----
}
/*
+ * Returns pointer of first non digit char
+ *
+ * When some digits are processed, then inum returns parsed number
+ * and is_valid is true, otherwise is_valid is false.
+ */
+ static const char *
+ text_format_parse_digits(const char *start_ptr, const char *end_ptr,
+ int *inum, /* OUT param */
+ bool *is_valid) /* OUT param */
+ {
+ const char *cp = start_ptr;
+
+ *is_valid = false;
+ *inum = 0;
+
+ /* continue, only when start_ptr is less than end_ptr */
+ while (cp < end_ptr)
+ {
+ if (*cp >= '0' && *cp <= '9')
+ {
+ int newnum = *inum * 10 + (*cp - '0');
+
+ if (newnum / 10 != *inum) /* overflow? */
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("number is out of range")));
+ *inum = newnum;
+ *is_valid = true;
+ ++cp;
+ }
+ else
+ break;
+ }
+
+ /* should not be after last char of format string */
+ if (cp >= end_ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unterminated conversion specifier")));
+
+ return cp;
+ }
+
+ #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus in format string? */
+
+ #define TEXT_FORMAT_NEXT_CHAR(ptr, end_ptr) \
+ do { \
+ if (++(ptr) >= (end_ptr)) \
+ ereport(ERROR, \
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
+ errmsg("unterminated conversion specifier"))); \
+ } while (0)
+
+ /*
+ * parse format specification
+ * [parameter][flags][width]type
+ *
+ * -1 in int OUT parameters signalize so this attribut is missing,
+ * all values should be zero or greather than zero. Raise exeption
+ * when some parameter is out of range.
+ */
+ static const char *
+ text_format_parse_format(const char *start_ptr, const char *end_ptr,
+ int *parameter, /* OUT param */
+ int *flags, /* OUT param */
+ int *width, /* OUT param */
+ bool *indirect_width, /* OUT param */
+ int *indirect_width_parameter) /* OUT param */
+ {
+ int inum;
+ bool is_valid;
+
+ /* set defaults to out parameters */
+ *parameter = -1;
+ *flags = 0;
+ *width = -1;
+ *indirect_width = false;
+ *indirect_width_parameter = -1;
+
+ /* try to identify first number */
+ start_ptr = text_format_parse_digits(start_ptr, end_ptr, &inum, &is_valid);
+ if (is_valid)
+ {
+ /* number on this position can be parameter number or the width */
+ if (*start_ptr != '$')
+ {
+ *width = inum;
+ return start_ptr;
+ }
+
+ *parameter = inum;
+ TEXT_FORMAT_NEXT_CHAR(start_ptr, end_ptr);
+ }
+
+ /* try to parse flags, only minus is supported now */
+ if (*start_ptr == '-')
+ {
+ *flags = *flags | TEXT_FORMAT_FLAG_MINUS;
+ TEXT_FORMAT_NEXT_CHAR(start_ptr, end_ptr);
+ }
+
+ /* try to parse indirect width */
+ if (*start_ptr == '*')
+ {
+ start_ptr = text_format_parse_digits(++start_ptr, end_ptr, &inum, &is_valid);
+ if (is_valid)
+ {
+ /* number in this position should be closed by $ */
+ if (*start_ptr != '$')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unexpected char \"%c\"", *start_ptr)));
+ TEXT_FORMAT_NEXT_CHAR(start_ptr, end_ptr);
+
+ *indirect_width_parameter = inum;
+ }
+ else
+ *indirect_width = true;
+
+ return start_ptr;
+ }
+
+ /* last possible nummber - width */
+ start_ptr = text_format_parse_digits(start_ptr, end_ptr, &inum, &is_valid);
+ if (is_valid)
+ *width = inum;
+
+ return start_ptr;
+ }
+
+ /*
* Returns a formated string
*/
Datum
***************
*** 4016,4021 **** text_format(PG_FUNCTION_ARGS)
--- 4148,4155 ----
Oid element_type = InvalidOid;
Oid prev_type = InvalidOid;
FmgrInfo typoutputfinfo;
+ FmgrInfo typoutputinfo_width;
+ Oid prev_type_width = InvalidOid;
/* When format string is null, returns null */
if (PG_ARGISNULL(0))
***************
*** 4088,4093 **** text_format(PG_FUNCTION_ARGS)
--- 4222,4233 ----
Datum value;
bool isNull;
Oid typid;
+ int parameter;
+ int flags;
+ int width;
+ bool indirect_width;
+ int indirect_width_parameter;
+ bool use_width;
/*
* If it's not the start of a conversion specifier, just copy it to
***************
*** 4099,4109 **** text_format(PG_FUNCTION_ARGS)
continue;
}
! /* Did we run off the end of the string? */
! if (++cp >= end_ptr)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("unterminated conversion specifier")));
/* Easy case: %% outputs a single % */
if (*cp == '%')
--- 4239,4246 ----
continue;
}
! /* go to next char */
! TEXT_FORMAT_NEXT_CHAR(cp, end_ptr);
/* Easy case: %% outputs a single % */
if (*cp == '%')
***************
*** 4112,4125 **** text_format(PG_FUNCTION_ARGS)
continue;
}
! /*
! * If the user hasn't specified an argument position, we just advance
! * to the next one. If they have, we must parse it.
! */
! if (*cp < '0' || *cp > '9')
{
! ++arg;
! if (arg <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
--- 4249,4264 ----
continue;
}
! cp = text_format_parse_format(cp, end_ptr,
! ¶meter,
! &flags,
! &width,
! &indirect_width,
! &indirect_width_parameter);
!
! if (indirect_width)
{
! if (++arg <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
***************
*** 4129,4177 **** text_format(PG_FUNCTION_ARGS)
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
}
! else
{
! bool unterminated = false;
!
! /* Parse digit string. */
! arg = 0;
! do
! {
! int newarg = arg * 10 + (*cp - '0');
!
! if (newarg / 10 != arg) /* overflow? */
! ereport(ERROR,
! (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
! errmsg("argument number is out of range")));
! arg = newarg;
! ++cp;
! } while (cp < end_ptr && *cp >= '0' && *cp <= '9');
! /*
! * If we ran off the end, or if there's not a $ next, or if the $
! * is the last character, the conversion specifier is improperly
! * terminated.
! */
! if (cp == end_ptr || *cp != '$')
! unterminated = true;
! else
{
! ++cp;
! if (cp == end_ptr)
! unterminated = true;
! }
! if (unterminated)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("unterminated conversion specifier")));
!
! /* There's no argument 0. */
! if (arg == 0)
ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
}
/* Not enough arguments? Deduct 1 to avoid counting format string. */
if (arg > nargs - 1)
--- 4268,4301 ----
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
+ indirect_width_parameter = arg;
}
! else if (indirect_width_parameter != -1)
{
! /* be consistent, move ordered argument together with positional */
! arg = indirect_width_parameter;
! }
! if (parameter == -1)
! {
! if (++arg <= 0) /* overflow? */
{
! /*
! * Should not happen, as you can't pass billions of arguments
! * to a function, but better safe than sorry.
! */
ereport(ERROR,
! (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
! errmsg("argument number is out of range")));
! }
}
+ else
+ arg = parameter;
+
+ if (arg == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
/* Not enough arguments? Deduct 1 to avoid counting format string. */
if (arg > nargs - 1)
***************
*** 4179,4184 **** text_format(PG_FUNCTION_ARGS)
--- 4303,4380 ----
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("too few arguments for format")));
+ if (indirect_width_parameter != -1)
+ {
+
+ if (indirect_width_parameter == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
+
+ /* Not enough arguments? Deduct 1 to avoid counting format string. */
+ if (indirect_width_parameter > nargs - 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("too few arguments for format")));
+
+ /* get value of related parameter that holds width and coerce to int */
+ if (!funcvariadic)
+ {
+ value = PG_GETARG_DATUM(indirect_width_parameter);
+ isNull = PG_ARGISNULL(indirect_width_parameter);
+ typid = get_fn_expr_argtype(fcinfo->flinfo, indirect_width_parameter);
+ }
+ else
+ {
+ value = elements[indirect_width_parameter - 1];
+ isNull = nulls[indirect_width_parameter - 1];
+ typid = element_type;
+ }
+ if (!OidIsValid(typid))
+ elog(ERROR, "could not determine data type of format() input");
+
+ /* QUESTION: is it correct? */
+ if (isNull)
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("width should not be NULL")));
+
+ /* simple IO cast to int */
+ if (typid != INT4OID)
+ {
+ char *str;
+
+ if (typid != prev_type_width)
+ {
+ Oid typoutputfunc;
+ bool typIsVarlena;
+
+ getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
+ fmgr_info(typoutputfunc, &typoutputinfo_width);
+ prev_type_width = typid;
+ }
+
+ /* Stringify. */
+ str = OutputFunctionCall(&typoutputinfo_width, value);
+
+ /* get int value */
+ width = pg_atoi(str, sizeof(int32), '\0');
+ pfree(str);
+ }
+ else
+ width = DatumGetInt32(value);
+
+ use_width = true;
+ }
+ else if (width != -1)
+ {
+ use_width = true;
+ }
+ else
+ {
+ use_width = false;
+ }
+
/* Get the value and type of the selected argument */
if (!funcvariadic)
{
***************
*** 4221,4227 **** text_format(PG_FUNCTION_ARGS)
case 'I':
case 'L':
text_format_string_conversion(&str, *cp, &typoutputfinfo,
! value, isNull);
break;
default:
ereport(ERROR,
--- 4417,4424 ----
case 'I':
case 'L':
text_format_string_conversion(&str, *cp, &typoutputfinfo,
! value, isNull,
! flags, use_width, width);
break;
default:
ereport(ERROR,
***************
*** 4244,4254 **** text_format(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(result);
}
/* Format a %s, %I, or %L conversion. */
static void
text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull)
{
char *str;
--- 4441,4497 ----
PG_RETURN_TEXT_P(result);
}
+ /*
+ * Add spaces on begin or on end when it is necessary
+ */
+ static void
+ text_format_append_string(StringInfo buf, const char *str,
+ int flags, bool use_width, int width)
+ {
+ bool align_to_left;
+ int len;
+
+ /* fast path */
+ if (!use_width)
+ {
+ appendStringInfoString(buf, str);
+ return;
+ }
+
+ if (width < 0)
+ {
+ width = -width;
+ align_to_left = true;
+ }
+ else if (flags & TEXT_FORMAT_FLAG_MINUS)
+ {
+ align_to_left = true;
+ }
+ else
+ align_to_left = false;
+
+ len = pg_mbstrlen(str);
+ if (align_to_left)
+ {
+ appendStringInfoString(buf, str);
+ if (len < width)
+ appendStringInfoSpaces(buf, width - len);
+ }
+ else
+ {
+ /* align_to_right */
+ if (len < width)
+ appendStringInfoSpaces(buf, width - len);
+ appendStringInfoString(buf, str);
+ }
+ }
+
/* Format a %s, %I, or %L conversion. */
static void
text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull,
! int flags, bool use_width, int width)
{
char *str;
***************
*** 4271,4288 **** text_format_string_conversion(StringInfo buf, char conversion,
if (conversion == 'I')
{
/* quote_identifier may or may not allocate a new string. */
! appendStringInfoString(buf, quote_identifier(str));
}
else if (conversion == 'L')
! {
! char *qstr = quote_literal_cstr(str);
! appendStringInfoString(buf, qstr);
/* quote_literal_cstr() always allocates a new string */
pfree(qstr);
}
else
! appendStringInfoString(buf, str);
/* Cleanup. */
pfree(str);
--- 4514,4533 ----
if (conversion == 'I')
{
/* quote_identifier may or may not allocate a new string. */
! text_format_append_string(buf, quote_identifier(str),
! flags, use_width, width);
}
else if (conversion == 'L')
! { char *qstr = quote_literal_cstr(str);
! text_format_append_string(buf, qstr,
! flags, use_width, width);
/* quote_literal_cstr() always allocates a new string */
pfree(qstr);
}
else
! text_format_append_string(buf, str,
! flags, use_width, width);
/* Cleanup. */
pfree(str);
*** a/src/test/regress/expected/text.out
--- b/src/test/regress/expected/text.out
***************
*** 257,267 **** ERROR: too few arguments for format
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
select format('%1s', 1);
! ERROR: unterminated conversion specifier
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unrecognized conversion specifier "1"
-- check mix of positional and ordered placeholders
select format('Hello %s %1$s %s', 'World', 'Hello again');
format
--- 257,271 ----
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
select format('%1s', 1);
! format
! --------
! 1
! (1 row)
!
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unterminated conversion specifier
-- check mix of positional and ordered placeholders
select format('Hello %s %1$s %s', 'World', 'Hello again');
format
***************
*** 328,330 **** from generate_series(1,200) g(i);
--- 332,371 ----
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200
(1 row)
+ -- left, right align
+ select format('>>%10s<<', 'Hello')
+ union all
+ select format('>>%-10s<<', 'Hello')
+ union all
+ select format('>>%1$10s<<', 'Hello')
+ union all
+ select format('>>%1$-10I<<', 'Hello')
+ union all
+ select format('>>%2$*1$L<<', 10, 'Hello')
+ union all
+ select format('>>%*s<<', 10, 'Hello');
+ format
+ ----------------
+ >> Hello<<
+ >>Hello <<
+ >> Hello<<
+ >>"Hello" <<
+ >> 'Hello'<<
+ >> Hello<<
+ (6 rows)
+
+ select format('>>%*1$s<<', 10, 'Hello');
+ format
+ ----------------
+ >> Hello<<
+ (1 row)
+
+ select format('>>%-s<<', 'Hello');
+ format
+ -----------
+ >>Hello<<
+ (1 row)
+
+ -- should fail
+ select format('>>%2$*1$L<<', NULL, 'Hello');
+ ERROR: width should not be NULL
*** a/src/test/regress/sql/text.sql
--- b/src/test/regress/sql/text.sql
***************
*** 97,99 **** select format('Hello', variadic NULL);
--- 97,120 ----
-- variadic argument allows simulating more than FUNC_MAX_ARGS parameters
select format(string_agg('%s',','), variadic array_agg(i))
from generate_series(1,200) g(i);
+
+ -- left, right align
+ select format('>>%10s<<', 'Hello')
+ union all
+ select format('>>%-10s<<', 'Hello')
+ union all
+ select format('>>%1$10s<<', 'Hello')
+ union all
+ select format('>>%1$-10I<<', 'Hello')
+ union all
+ select format('>>%2$*1$L<<', 10, 'Hello')
+ union all
+ select format('>>%*s<<', 10, 'Hello');
+
+ select format('>>%*1$s<<', 10, 'Hello');
+ select format('>>%-s<<', 'Hello');
+
+ -- should fail
+ select format('>>%2$*1$L<<', NULL, 'Hello');
+
+
Hello
minor update - fix align NULL for %L
Regards
Pavel
2013/1/31 Pavel Stehule <pavel.stehule@gmail.com>:
Show quoted text
Hello
2013/1/29 Dean Rasheed <dean.a.rasheed@gmail.com>:
On 29 January 2013 08:19, Dean Rasheed <dean.a.rasheed@gmail.com> wrote:
* The width field is optional, even if the '-' flag is specified. So
'%-s' is perfectly legal and should be interpreted as '%s'. The
current implementation treats it as a width of 0, which is wrong.Oh, but of course a width of 0 is the same as no width at all, so the
current code is correct after all. That's what happens if I try to
write emails before I've had my caffeine :-)I think my other points remain valid though. It would still be neater
to parse the flags separately from the width field, and then all
literal numbers that appear in the format should be positive.I am sending rewritten code
It indirect width "*" and "*n$" is supported. It needs little bit more code.
There are a new question
what should be result of
format(">>%2$*1$s<<", NULL, "hello")
???
raise exception now, but I am able to modify to some agreement
Regards
Pavel
Regards,
Dean
Attachments:
format_width_20130201.patchapplication/octet-stream; name=format_width_20130201.patchDownload
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
***************
*** 78,84 **** static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
static void text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
const char *fldsep, const char *null_string);
--- 78,85 ----
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
static void text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull,
! int flags, bool use_width, int width);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
const char *fldsep, const char *null_string);
***************
*** 3997,4002 **** text_reverse(PG_FUNCTION_ARGS)
--- 3998,4134 ----
}
/*
+ * Returns pointer of first non digit char
+ *
+ * When some digits are processed, then inum returns parsed number
+ * and is_valid is true, otherwise is_valid is false.
+ */
+ static const char *
+ text_format_parse_digits(const char *start_ptr, const char *end_ptr,
+ int *inum, /* OUT param */
+ bool *is_valid) /* OUT param */
+ {
+ const char *cp = start_ptr;
+
+ *is_valid = false;
+ *inum = 0;
+
+ /* continue, only when start_ptr is less than end_ptr */
+ while (cp < end_ptr)
+ {
+ if (*cp >= '0' && *cp <= '9')
+ {
+ int newnum = *inum * 10 + (*cp - '0');
+
+ if (newnum / 10 != *inum) /* overflow? */
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("number is out of range")));
+ *inum = newnum;
+ *is_valid = true;
+ ++cp;
+ }
+ else
+ break;
+ }
+
+ /* should not be after last char of format string */
+ if (cp >= end_ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unterminated conversion specifier")));
+
+ return cp;
+ }
+
+ #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus in format string? */
+
+ #define TEXT_FORMAT_NEXT_CHAR(ptr, end_ptr) \
+ do { \
+ if (++(ptr) >= (end_ptr)) \
+ ereport(ERROR, \
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
+ errmsg("unterminated conversion specifier"))); \
+ } while (0)
+
+ /*
+ * parse format specification
+ * [parameter][flags][width]type
+ *
+ * -1 in int OUT parameters signalize so this attribut is missing,
+ * all values should be zero or greather than zero. Raise exeption
+ * when some parameter is out of range.
+ */
+ static const char *
+ text_format_parse_format(const char *start_ptr, const char *end_ptr,
+ int *parameter, /* OUT param */
+ int *flags, /* OUT param */
+ int *width, /* OUT param */
+ bool *indirect_width, /* OUT param */
+ int *indirect_width_parameter) /* OUT param */
+ {
+ int inum;
+ bool is_valid;
+
+ /* set defaults to out parameters */
+ *parameter = -1;
+ *flags = 0;
+ *width = -1;
+ *indirect_width = false;
+ *indirect_width_parameter = -1;
+
+ /* try to identify first number */
+ start_ptr = text_format_parse_digits(start_ptr, end_ptr, &inum, &is_valid);
+ if (is_valid)
+ {
+ /* number on this position can be parameter number or the width */
+ if (*start_ptr != '$')
+ {
+ *width = inum;
+ return start_ptr;
+ }
+
+ *parameter = inum;
+ TEXT_FORMAT_NEXT_CHAR(start_ptr, end_ptr);
+ }
+
+ /* try to parse flags, only minus is supported now */
+ if (*start_ptr == '-')
+ {
+ *flags = *flags | TEXT_FORMAT_FLAG_MINUS;
+ TEXT_FORMAT_NEXT_CHAR(start_ptr, end_ptr);
+ }
+
+ /* try to parse indirect width */
+ if (*start_ptr == '*')
+ {
+ start_ptr = text_format_parse_digits(++start_ptr, end_ptr, &inum, &is_valid);
+ if (is_valid)
+ {
+ /* number in this position should be closed by $ */
+ if (*start_ptr != '$')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unexpected char \"%c\"", *start_ptr)));
+ TEXT_FORMAT_NEXT_CHAR(start_ptr, end_ptr);
+
+ *indirect_width_parameter = inum;
+ }
+ else
+ *indirect_width = true;
+
+ return start_ptr;
+ }
+
+ /* last possible nummber - width */
+ start_ptr = text_format_parse_digits(start_ptr, end_ptr, &inum, &is_valid);
+ if (is_valid)
+ *width = inum;
+
+ return start_ptr;
+ }
+
+ /*
* Returns a formated string
*/
Datum
***************
*** 4016,4021 **** text_format(PG_FUNCTION_ARGS)
--- 4148,4155 ----
Oid element_type = InvalidOid;
Oid prev_type = InvalidOid;
FmgrInfo typoutputfinfo;
+ FmgrInfo typoutputinfo_width;
+ Oid prev_type_width = InvalidOid;
/* When format string is null, returns null */
if (PG_ARGISNULL(0))
***************
*** 4088,4093 **** text_format(PG_FUNCTION_ARGS)
--- 4222,4233 ----
Datum value;
bool isNull;
Oid typid;
+ int parameter;
+ int flags;
+ int width;
+ bool indirect_width;
+ int indirect_width_parameter;
+ bool use_width;
/*
* If it's not the start of a conversion specifier, just copy it to
***************
*** 4099,4109 **** text_format(PG_FUNCTION_ARGS)
continue;
}
! /* Did we run off the end of the string? */
! if (++cp >= end_ptr)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("unterminated conversion specifier")));
/* Easy case: %% outputs a single % */
if (*cp == '%')
--- 4239,4246 ----
continue;
}
! /* go to next char */
! TEXT_FORMAT_NEXT_CHAR(cp, end_ptr);
/* Easy case: %% outputs a single % */
if (*cp == '%')
***************
*** 4112,4125 **** text_format(PG_FUNCTION_ARGS)
continue;
}
! /*
! * If the user hasn't specified an argument position, we just advance
! * to the next one. If they have, we must parse it.
! */
! if (*cp < '0' || *cp > '9')
{
! ++arg;
! if (arg <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
--- 4249,4264 ----
continue;
}
! cp = text_format_parse_format(cp, end_ptr,
! ¶meter,
! &flags,
! &width,
! &indirect_width,
! &indirect_width_parameter);
!
! if (indirect_width)
{
! if (++arg <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
***************
*** 4129,4177 **** text_format(PG_FUNCTION_ARGS)
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
}
! else
{
! bool unterminated = false;
!
! /* Parse digit string. */
! arg = 0;
! do
! {
! int newarg = arg * 10 + (*cp - '0');
!
! if (newarg / 10 != arg) /* overflow? */
! ereport(ERROR,
! (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
! errmsg("argument number is out of range")));
! arg = newarg;
! ++cp;
! } while (cp < end_ptr && *cp >= '0' && *cp <= '9');
! /*
! * If we ran off the end, or if there's not a $ next, or if the $
! * is the last character, the conversion specifier is improperly
! * terminated.
! */
! if (cp == end_ptr || *cp != '$')
! unterminated = true;
! else
{
! ++cp;
! if (cp == end_ptr)
! unterminated = true;
! }
! if (unterminated)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("unterminated conversion specifier")));
!
! /* There's no argument 0. */
! if (arg == 0)
ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
}
/* Not enough arguments? Deduct 1 to avoid counting format string. */
if (arg > nargs - 1)
--- 4268,4301 ----
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
+ indirect_width_parameter = arg;
}
! else if (indirect_width_parameter != -1)
{
! /* be consistent, move ordered argument together with positional */
! arg = indirect_width_parameter;
! }
! if (parameter == -1)
! {
! if (++arg <= 0) /* overflow? */
{
! /*
! * Should not happen, as you can't pass billions of arguments
! * to a function, but better safe than sorry.
! */
ereport(ERROR,
! (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
! errmsg("argument number is out of range")));
! }
}
+ else
+ arg = parameter;
+
+ if (arg == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
/* Not enough arguments? Deduct 1 to avoid counting format string. */
if (arg > nargs - 1)
***************
*** 4179,4184 **** text_format(PG_FUNCTION_ARGS)
--- 4303,4376 ----
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("too few arguments for format")));
+ if (indirect_width_parameter != -1)
+ {
+
+ if (indirect_width_parameter == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
+
+ /* Not enough arguments? Deduct 1 to avoid counting format string. */
+ if (indirect_width_parameter > nargs - 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("too few arguments for format")));
+
+ /* get value of related parameter that holds width and coerce to int */
+ if (!funcvariadic)
+ {
+ value = PG_GETARG_DATUM(indirect_width_parameter);
+ isNull = PG_ARGISNULL(indirect_width_parameter);
+ typid = get_fn_expr_argtype(fcinfo->flinfo, indirect_width_parameter);
+ }
+ else
+ {
+ value = elements[indirect_width_parameter - 1];
+ isNull = nulls[indirect_width_parameter - 1];
+ typid = element_type;
+ }
+ if (!OidIsValid(typid))
+ elog(ERROR, "could not determine data type of format() input");
+
+ /* QUESTION: is it correct? */
+ if (isNull)
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("width should not be NULL")));
+
+ /* simple IO cast to int */
+ if (typid != INT4OID)
+ {
+ char *str;
+
+ if (typid != prev_type_width)
+ {
+ Oid typoutputfunc;
+ bool typIsVarlena;
+
+ getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
+ fmgr_info(typoutputfunc, &typoutputinfo_width);
+ prev_type_width = typid;
+ }
+
+ /* Stringify. */
+ str = OutputFunctionCall(&typoutputinfo_width, value);
+
+ /* get int value */
+ width = pg_atoi(str, sizeof(int32), '\0');
+ pfree(str);
+ }
+ else
+ width = DatumGetInt32(value);
+
+ use_width = true;
+ }
+ else if (width != -1)
+ use_width = true;
+ else
+ use_width = false;
+
/* Get the value and type of the selected argument */
if (!funcvariadic)
{
***************
*** 4221,4227 **** text_format(PG_FUNCTION_ARGS)
case 'I':
case 'L':
text_format_string_conversion(&str, *cp, &typoutputfinfo,
! value, isNull);
break;
default:
ereport(ERROR,
--- 4413,4420 ----
case 'I':
case 'L':
text_format_string_conversion(&str, *cp, &typoutputfinfo,
! value, isNull,
! flags, use_width, width);
break;
default:
ereport(ERROR,
***************
*** 4244,4254 **** text_format(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(result);
}
/* Format a %s, %I, or %L conversion. */
static void
text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull)
{
char *str;
--- 4437,4493 ----
PG_RETURN_TEXT_P(result);
}
+ /*
+ * Add spaces on begin or on end when it is necessary
+ */
+ static void
+ text_format_append_string(StringInfo buf, const char *str,
+ int flags, bool use_width, int width)
+ {
+ bool align_to_left;
+ int len;
+
+ /* fast path */
+ if (!use_width)
+ {
+ appendStringInfoString(buf, str);
+ return;
+ }
+
+ if (width < 0)
+ {
+ width = -width;
+ align_to_left = true;
+ }
+ else if (flags & TEXT_FORMAT_FLAG_MINUS)
+ {
+ align_to_left = true;
+ }
+ else
+ align_to_left = false;
+
+ len = pg_mbstrlen(str);
+ if (align_to_left)
+ {
+ appendStringInfoString(buf, str);
+ if (len < width)
+ appendStringInfoSpaces(buf, width - len);
+ }
+ else
+ {
+ /* align_to_right */
+ if (len < width)
+ appendStringInfoSpaces(buf, width - len);
+ appendStringInfoString(buf, str);
+ }
+ }
+
/* Format a %s, %I, or %L conversion. */
static void
text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull,
! int flags, bool use_width, int width)
{
char *str;
***************
*** 4256,4262 **** text_format_string_conversion(StringInfo buf, char conversion,
if (isNull)
{
if (conversion == 'L')
! appendStringInfoString(buf, "NULL");
else if (conversion == 'I')
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
--- 4495,4502 ----
if (isNull)
{
if (conversion == 'L')
! text_format_append_string(buf, "NULL",
! flags, use_width, width);
else if (conversion == 'I')
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
***************
*** 4271,4288 **** text_format_string_conversion(StringInfo buf, char conversion,
if (conversion == 'I')
{
/* quote_identifier may or may not allocate a new string. */
! appendStringInfoString(buf, quote_identifier(str));
}
else if (conversion == 'L')
! {
! char *qstr = quote_literal_cstr(str);
! appendStringInfoString(buf, qstr);
/* quote_literal_cstr() always allocates a new string */
pfree(qstr);
}
else
! appendStringInfoString(buf, str);
/* Cleanup. */
pfree(str);
--- 4511,4530 ----
if (conversion == 'I')
{
/* quote_identifier may or may not allocate a new string. */
! text_format_append_string(buf, quote_identifier(str),
! flags, use_width, width);
}
else if (conversion == 'L')
! { char *qstr = quote_literal_cstr(str);
! text_format_append_string(buf, qstr,
! flags, use_width, width);
/* quote_literal_cstr() always allocates a new string */
pfree(qstr);
}
else
! text_format_append_string(buf, str,
! flags, use_width, width);
/* Cleanup. */
pfree(str);
*** a/src/test/regress/expected/text.out
--- b/src/test/regress/expected/text.out
***************
*** 257,267 **** ERROR: too few arguments for format
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
select format('%1s', 1);
! ERROR: unterminated conversion specifier
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unrecognized conversion specifier "1"
-- check mix of positional and ordered placeholders
select format('Hello %s %1$s %s', 'World', 'Hello again');
format
--- 257,271 ----
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
select format('%1s', 1);
! format
! --------
! 1
! (1 row)
!
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unterminated conversion specifier
-- check mix of positional and ordered placeholders
select format('Hello %s %1$s %s', 'World', 'Hello again');
format
***************
*** 328,330 **** from generate_series(1,200) g(i);
--- 332,377 ----
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200
(1 row)
+ -- left, right align
+ select format('>>%10s<<', 'Hello')
+ union all
+ select format('>>%-10s<<', 'Hello')
+ union all
+ select format('>>%1$10s<<', 'Hello')
+ union all
+ select format('>>%1$-10I<<', 'Hello')
+ union all
+ select format('>>%2$*1$L<<', 10, 'Hello')
+ union all
+ select format('>>%*s<<', 10, 'Hello');
+ format
+ ----------------
+ >> Hello<<
+ >>Hello <<
+ >> Hello<<
+ >>"Hello" <<
+ >> 'Hello'<<
+ >> Hello<<
+ (6 rows)
+
+ select format('>>%*1$s<<', 10, 'Hello');
+ format
+ ----------------
+ >> Hello<<
+ (1 row)
+
+ select format('>>%-s<<', 'Hello');
+ format
+ -----------
+ >>Hello<<
+ (1 row)
+
+ select format('>>%10L<<', NULL);
+ format
+ ----------------
+ >> NULL<<
+ (1 row)
+
+ -- should fail
+ select format('>>%2$*1$L<<', NULL, 'Hello');
+ ERROR: width should not be NULL
*** a/src/test/regress/sql/text.sql
--- b/src/test/regress/sql/text.sql
***************
*** 97,99 **** select format('Hello', variadic NULL);
--- 97,119 ----
-- variadic argument allows simulating more than FUNC_MAX_ARGS parameters
select format(string_agg('%s',','), variadic array_agg(i))
from generate_series(1,200) g(i);
+
+ -- left, right align
+ select format('>>%10s<<', 'Hello')
+ union all
+ select format('>>%-10s<<', 'Hello')
+ union all
+ select format('>>%1$10s<<', 'Hello')
+ union all
+ select format('>>%1$-10I<<', 'Hello')
+ union all
+ select format('>>%2$*1$L<<', 10, 'Hello')
+ union all
+ select format('>>%*s<<', 10, 'Hello');
+
+ select format('>>%*1$s<<', 10, 'Hello');
+ select format('>>%-s<<', 'Hello');
+ select format('>>%10L<<', NULL);
+
+ -- should fail
+ select format('>>%2$*1$L<<', NULL, 'Hello');
2013/1/31 Pavel Stehule <pavel.stehule@gmail.com>:
I am sending rewritten code
Nice. I think this will be very useful, and it looks like it now
supports everything that printf() does for %s format specifiers, and
it's good that %I and %L behave the same. Also the code is looking
cleaner.
It indirect width "*" and "*n$" is supported. It needs little bit more code.
There are a new question
what should be result of
format(">>%2$*1$s<<", NULL, "hello")
???
My first thought is that a NULL width should be treated the same as no
width at all (equivalent to width=0), rather than raising an
exception.
minor update - fix align NULL for %L
You need to do the same for a NULL value with %s, which currently
produces an empty string regardless of the width.
The documentation also needs to be updated. I'm thinking perhaps
format() should now have its own separate sub-section in the manual,
rather than trying to cram it's docs into a single table row. I can
help with the docs if you like.
Regards,
Dean
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
2013/2/9 Dean Rasheed <dean.a.rasheed@gmail.com>:
2013/1/31 Pavel Stehule <pavel.stehule@gmail.com>:
I am sending rewritten code
Nice. I think this will be very useful, and it looks like it now
supports everything that printf() does for %s format specifiers, and
it's good that %I and %L behave the same. Also the code is looking
cleaner.It indirect width "*" and "*n$" is supported. It needs little bit more code.
There are a new question
what should be result of
format(">>%2$*1$s<<", NULL, "hello")
???
My first thought is that a NULL width should be treated the same as no
width at all (equivalent to width=0), rather than raising an
exception.minor update - fix align NULL for %L
You need to do the same for a NULL value with %s, which currently
produces an empty string regardless of the width.
have others same opinion? Usually I don't like hide NULLs, but this is
corner case (and specific function) and I have not strong opinion on
this issue.
The documentation also needs to be updated. I'm thinking perhaps
format() should now have its own separate sub-section in the manual,
rather than trying to cram it's docs into a single table row. I can
help with the docs if you like.
please, if you can, write it. I am sure, so you do it significantly
better than me.
Thank you
Pavel
Regards,
Dean
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 9 February 2013 18:30, Pavel Stehule <pavel.stehule@gmail.com> wrote:
There are a new question
what should be result of
format(">>%2$*1$s<<", NULL, "hello")
???
My first thought is that a NULL width should be treated the same as no
width at all (equivalent to width=0), rather than raising an
exception.minor update - fix align NULL for %L
You need to do the same for a NULL value with %s, which currently
produces an empty string regardless of the width.have others same opinion? Usually I don't like hide NULLs, but this is
corner case (and specific function) and I have not strong opinion on
this issue.
One use case for this might be something like
SELECT format('%*s', minimum_width, value) FROM some_table;
Throwing an exception when minimum_width is NULL doesn't seem
particularly useful. Intuitively, it just means that row has no
minimum width, so I think we should allow it.
I think the case where the value is NULL is much more clear-cut.
format('%s') produces '' (empty string). So format('%3s') should
produce ' '.
The documentation also needs to be updated. I'm thinking perhaps
format() should now have its own separate sub-section in the manual,
rather than trying to cram it's docs into a single table row. I can
help with the docs if you like.please, if you can, write it. I am sure, so you do it significantly
better than me.
Here is my first draft. I've also attached the generated HTML page,
because it's not so easy to read an SGML patch.
Regards,
Dean
Attachments:
format-width.doc.patchapplication/octet-stream; name=format-width.doc.patchDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
new file mode 100644
index 3879186..737d677
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
***************
*** 1524,1539 ****
<entry><type>text</type></entry>
<entry>
Format arguments according to a format string.
! This function is similar to the C function
! <function>sprintf</>, but only the following conversion specifications
! are recognized: <literal>%s</literal> interpolates the corresponding
! argument as a string; <literal>%I</literal> escapes its argument as
! an SQL identifier; <literal>%L</literal> escapes its argument as an
! SQL literal; <literal>%%</literal> outputs a literal <literal>%</>.
! A conversion can reference an explicit parameter position by preceding
! the conversion specifier with <literal><replaceable>n</>$</>, where
! <replaceable>n</replaceable> is the argument position.
! See also <xref linkend="plpgsql-quote-literal-example">.
</entry>
<entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
<entry><literal>Hello World, World</literal></entry>
--- 1524,1531 ----
<entry><type>text</type></entry>
<entry>
Format arguments according to a format string.
! This function is similar to the C function <function>sprintf</>.
! See <xref linkend="functions-string-format">.
</entry>
<entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
<entry><literal>Hello World, World</literal></entry>
***************
*** 2847,2852 ****
--- 2839,3024 ----
</tgroup>
</table>
+ <sect2 id="functions-string-format">
+ <title><function>format</function></title>
+
+ <indexterm>
+ <primary>format</primary>
+ </indexterm>
+
+ <para>
+ The function <function>format</> produces formatted output according to
+ a format string in a similar way to the C function <function>sprintf</>.
+ </para>
+
+ <para>
+ <synopsis>
+ format(<parameter>formatstr</> <type>text</> [, <parameter>str</> <type>"any"</> [, ...] ])
+ </synopsis>
+ <replaceable>formatstr</> is a format string that specifies how the
+ result should be formatted. Text in the format string is copied directly
+ to the result, except where <firstterm>format specifiers</> are used.
+ Format specifiers act as placeholders in the string, allowing subsequent
+ function arguments to be formatted and inserted into the result.
+ </para>
+
+ <para>
+ Format specifiers are introduced by a <literal>%</> character and take
+ the form
+ <synopsis>
+ %[<replaceable>parameter</>][<replaceable>flags</>][<replaceable>width</>]<replaceable>type</>
+ </synopsis>
+ <variablelist>
+ <varlistentry>
+ <term><replaceable>parameter</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ An expression of the form <literal><replaceable>n</>$</> where
+ <replaceable>n</> is the index of the argument to use for the format
+ specifier's value. An index of 1 means the first argument after
+ <replaceable>formatstr</>. If the <replaceable>parameter</> field is
+ omitted, the default is to use the next argument.
+ </para>
+ <screen>
+ SELECT format('Testing %s, %s, %s', 'one', 'two', 'three');
+ <lineannotation>Result: </><computeroutput>Testing one, two, three</>
+
+ SELECT format('Testing %3$s, %2$s, %1$s', 'one', 'two', 'three');
+ <lineannotation>Result: </><computeroutput>Testing three, two, one</>
+ </screen>
+
+ <para>
+ Note that unlike the C function <function>sprintf</> defined in the
+ Single UNIX Specification, the <function>format</> function in
+ <productname>PostgreSQL</> allows format specifiers with and without
+ explicit <replaceable>parameter</> fields to be mixed in the same
+ format string. A format specifier without a
+ <replaceable>parameter</> field always uses the next argument after
+ the last argument consumed. In addition, the
+ <productname>PostgreSQL</> <function>format</> function does not
+ require all function arguments to be referred to in the format
+ string.
+ </para>
+ <screen>
+ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
+ <lineannotation>Result: </><computeroutput>Testing three, two, three</>
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>flags</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ Additional options controlling how the format specifier's output is
+ formatted. Currently the only supported flag is an minus sign
+ (<literal>-</>) which will cause the format specifier's output to be
+ left-aligned. This has no effect unless the <replaceable>width</>
+ field is also specified.
+ </para>
+ <screen>
+ SELECT format('|%10s|%-10s|', 'foo', 'bar');
+ <lineannotation>Result: </><computeroutput>| foo|bar |</>
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>width</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ Specifies the <emphasis>minimum</> number of characters to use to
+ display the format specifier's output. The width may be specified
+ using any of the following: a positive integer; an asterisk
+ (<literal>*</>) to use the next function argument as the width; or an
+ expression of the form <literal>*<replaceable>n</>$</> to use the
+ <replaceable>n</>th function argument as the width.
+ </para>
+
+ <para>
+ If the width comes from a function argument, that argument is
+ consumed <emphasis>before</> the argument that is used for the format
+ specifier's value. If the width argument is negative, the result is
+ left aligned, as if the <literal>-</> flag had been specified.
+ </para>
+ <screen>
+ SELECT format('|%10s|', 'foo');
+ <lineannotation>Result: </><computeroutput>| foo|</>
+
+ SELECT format('|%*s|', 10, 'foo');
+ <lineannotation>Result: </><computeroutput>| foo|</>
+
+ SELECT format('|%*s|', -10, 'foo');
+ <lineannotation>Result: </><computeroutput>|foo |</>
+
+ SELECT format('|%-*s|', 10, 'foo');
+ <lineannotation>Result: </><computeroutput>|foo |</>
+
+ SELECT format('|%-*s|', -10, 'foo');
+ <lineannotation>Result: </><computeroutput>|foo |</>
+
+ SELECT format('|%*2$s|', 'foo', 10, 'bar');
+ <lineannotation>Result: </><computeroutput>| bar|</>
+
+ SELECT format('|%3$*2$s|', 'foo', 10, 'bar');
+ <lineannotation>Result: </><computeroutput>| bar|</>
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>type</replaceable> (required)</term>
+ <listitem>
+ <para>
+ The type of format conversion to use to produce the format
+ specifier's output. The following types are supported:
+ <itemizedlist>
+ <listitem>
+ <para>
+ <literal>s</literal> formats the argument value as a simple
+ string. A null value is treated as an empty string.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>I</literal> escapes the value as an SQL identifier. It
+ is an error for the value to be null.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>L</literal> escapes the value as an SQL literal. A null
+ value is displayed as the literal value <literal>NULL</>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ <screen>
+ SELECT format('Hello %s', 'World');
+ <lineannotation>Result: </lineannotation><computeroutput>Hello World</computeroutput>
+
+ SELECT format('DROP TABLE %I', 'Foo bar');
+ <lineannotation>Result: </lineannotation><computeroutput>DROP TABLE "Foo bar"</computeroutput>
+
+ SELECT format('SELECT %L', E'O\'Reilly');
+ <lineannotation>Result: </lineannotation><computeroutput>SELECT 'O''Reilly'</computeroutput>
+ </screen>
+
+ <para>
+ The <literal>%I</> and <literal>%L</> format specifiers may be used
+ to safely construct dynamic SQL statements. See
+ <xref linkend="plpgsql-quote-literal-example">.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </para>
+
+ <para>
+ In addition to the format specifiers above, the special escape sequence
+ <literal>%%</> may be used to output a literal <literal>%</> character.
+ </para>
+ </sect2>
</sect1>
2013/2/10 Dean Rasheed <dean.a.rasheed@gmail.com>:
On 9 February 2013 18:30, Pavel Stehule <pavel.stehule@gmail.com> wrote:
There are a new question
what should be result of
format(">>%2$*1$s<<", NULL, "hello")
???
My first thought is that a NULL width should be treated the same as no
width at all (equivalent to width=0), rather than raising an
exception.minor update - fix align NULL for %L
You need to do the same for a NULL value with %s, which currently
produces an empty string regardless of the width.have others same opinion? Usually I don't like hide NULLs, but this is
corner case (and specific function) and I have not strong opinion on
this issue.One use case for this might be something like
SELECT format('%*s', minimum_width, value) FROM some_table;
Throwing an exception when minimum_width is NULL doesn't seem
particularly useful. Intuitively, it just means that row has no
minimum width, so I think we should allow it.I think the case where the value is NULL is much more clear-cut.
format('%s') produces '' (empty string). So format('%3s') should
produce ' '.
ok - in this case I can accept NULL as "ignore width"
The documentation also needs to be updated. I'm thinking perhaps
format() should now have its own separate sub-section in the manual,
rather than trying to cram it's docs into a single table row. I can
help with the docs if you like.please, if you can, write it. I am sure, so you do it significantly
better than me.Here is my first draft. I've also attached the generated HTML page,
because it's not so easy to read an SGML patch.
nice
I have only one point - I am think, so format function should be in
table 9-6 - some small text with reference to special section.
Regards
Pavel
Regards,
Dean
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 10 February 2013 12:37, Pavel Stehule <pavel.stehule@gmail.com> >>
Here is my first draft. I've also attached the generated HTML page,
because it's not so easy to read an SGML patch.
nice
I have only one point - I am think, so format function should be in
table 9-6 - some small text with reference to special section.
It is already there in table 9-6, referring to the new section.
Here is a minor update though -- I changed the name of the first
optional argument from "str" to "formatarg", since they are no longer
necessarily strings.
Regards,
Dean
Attachments:
format-width.doc.v2.patchapplication/octet-stream; name=format-width.doc.v2.patchDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
new file mode 100644
index 3879186..240a6e7
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
***************
*** 1519,1539 ****
<primary>format</primary>
</indexterm>
<literal><function>format</function>(<parameter>formatstr</parameter> <type>text</type>
! [, <parameter>str</parameter> <type>"any"</type> [, ...] ])</literal>
</entry>
<entry><type>text</type></entry>
<entry>
Format arguments according to a format string.
! This function is similar to the C function
! <function>sprintf</>, but only the following conversion specifications
! are recognized: <literal>%s</literal> interpolates the corresponding
! argument as a string; <literal>%I</literal> escapes its argument as
! an SQL identifier; <literal>%L</literal> escapes its argument as an
! SQL literal; <literal>%%</literal> outputs a literal <literal>%</>.
! A conversion can reference an explicit parameter position by preceding
! the conversion specifier with <literal><replaceable>n</>$</>, where
! <replaceable>n</replaceable> is the argument position.
! See also <xref linkend="plpgsql-quote-literal-example">.
</entry>
<entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
<entry><literal>Hello World, World</literal></entry>
--- 1519,1531 ----
<primary>format</primary>
</indexterm>
<literal><function>format</function>(<parameter>formatstr</parameter> <type>text</type>
! [, <parameter>formatarg</parameter> <type>"any"</type> [, ...] ])</literal>
</entry>
<entry><type>text</type></entry>
<entry>
Format arguments according to a format string.
! This function is similar to the C function <function>sprintf</>.
! See <xref linkend="functions-string-format">.
</entry>
<entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
<entry><literal>Hello World, World</literal></entry>
***************
*** 2847,2852 ****
--- 2839,3024 ----
</tgroup>
</table>
+ <sect2 id="functions-string-format">
+ <title><function>format</function></title>
+
+ <indexterm>
+ <primary>format</primary>
+ </indexterm>
+
+ <para>
+ The function <function>format</> produces formatted output according to
+ a format string in a similar way to the C function <function>sprintf</>.
+ </para>
+
+ <para>
+ <synopsis>
+ format(<parameter>formatstr</> <type>text</> [, <parameter>formatarg</> <type>"any"</> [, ...] ])
+ </synopsis>
+ <replaceable>formatstr</> is a format string that specifies how the
+ result should be formatted. Text in the format string is copied directly
+ to the result, except where <firstterm>format specifiers</> are used.
+ Format specifiers act as placeholders in the string, allowing subsequent
+ function arguments to be formatted and inserted into the result.
+ </para>
+
+ <para>
+ Format specifiers are introduced by a <literal>%</> character and take
+ the form
+ <synopsis>
+ %[<replaceable>parameter</>][<replaceable>flags</>][<replaceable>width</>]<replaceable>type</>
+ </synopsis>
+ <variablelist>
+ <varlistentry>
+ <term><replaceable>parameter</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ An expression of the form <literal><replaceable>n</>$</> where
+ <replaceable>n</> is the index of the argument to use for the format
+ specifier's value. An index of 1 means the first argument after
+ <replaceable>formatstr</>. If the <replaceable>parameter</> field is
+ omitted, the default is to use the next argument.
+ </para>
+ <screen>
+ SELECT format('Testing %s, %s, %s', 'one', 'two', 'three');
+ <lineannotation>Result: </><computeroutput>Testing one, two, three</>
+
+ SELECT format('Testing %3$s, %2$s, %1$s', 'one', 'two', 'three');
+ <lineannotation>Result: </><computeroutput>Testing three, two, one</>
+ </screen>
+
+ <para>
+ Note that unlike the C function <function>sprintf</> defined in the
+ Single UNIX Specification, the <function>format</> function in
+ <productname>PostgreSQL</> allows format specifiers with and without
+ explicit <replaceable>parameter</> fields to be mixed in the same
+ format string. A format specifier without a
+ <replaceable>parameter</> field always uses the next argument after
+ the last argument consumed. In addition, the
+ <productname>PostgreSQL</> <function>format</> function does not
+ require all function arguments to be referred to in the format
+ string.
+ </para>
+ <screen>
+ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
+ <lineannotation>Result: </><computeroutput>Testing three, two, three</>
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>flags</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ Additional options controlling how the format specifier's output is
+ formatted. Currently the only supported flag is an minus sign
+ (<literal>-</>) which will cause the format specifier's output to be
+ left-aligned. This has no effect unless the <replaceable>width</>
+ field is also specified.
+ </para>
+ <screen>
+ SELECT format('|%10s|%-10s|', 'foo', 'bar');
+ <lineannotation>Result: </><computeroutput>| foo|bar |</>
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>width</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ Specifies the <emphasis>minimum</> number of characters to use to
+ display the format specifier's output. The width may be specified
+ using any of the following: a positive integer; an asterisk
+ (<literal>*</>) to use the next function argument as the width; or an
+ expression of the form <literal>*<replaceable>n</>$</> to use the
+ <replaceable>n</>th function argument as the width.
+ </para>
+
+ <para>
+ If the width comes from a function argument, that argument is
+ consumed <emphasis>before</> the argument that is used for the format
+ specifier's value. If the width argument is negative, the result is
+ left aligned, as if the <literal>-</> flag had been specified.
+ </para>
+ <screen>
+ SELECT format('|%10s|', 'foo');
+ <lineannotation>Result: </><computeroutput>| foo|</>
+
+ SELECT format('|%*s|', 10, 'foo');
+ <lineannotation>Result: </><computeroutput>| foo|</>
+
+ SELECT format('|%*s|', -10, 'foo');
+ <lineannotation>Result: </><computeroutput>|foo |</>
+
+ SELECT format('|%-*s|', 10, 'foo');
+ <lineannotation>Result: </><computeroutput>|foo |</>
+
+ SELECT format('|%-*s|', -10, 'foo');
+ <lineannotation>Result: </><computeroutput>|foo |</>
+
+ SELECT format('|%*2$s|', 'foo', 10, 'bar');
+ <lineannotation>Result: </><computeroutput>| bar|</>
+
+ SELECT format('|%3$*2$s|', 'foo', 10, 'bar');
+ <lineannotation>Result: </><computeroutput>| bar|</>
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>type</replaceable> (required)</term>
+ <listitem>
+ <para>
+ The type of format conversion to use to produce the format
+ specifier's output. The following types are supported:
+ <itemizedlist>
+ <listitem>
+ <para>
+ <literal>s</literal> formats the argument value as a simple
+ string. A null value is treated as an empty string.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>I</literal> escapes the value as an SQL identifier. It
+ is an error for the value to be null.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>L</literal> escapes the value as an SQL literal. A null
+ value is displayed as the literal value <literal>NULL</>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ <screen>
+ SELECT format('Hello %s', 'World');
+ <lineannotation>Result: </lineannotation><computeroutput>Hello World</computeroutput>
+
+ SELECT format('DROP TABLE %I', 'Foo bar');
+ <lineannotation>Result: </lineannotation><computeroutput>DROP TABLE "Foo bar"</computeroutput>
+
+ SELECT format('SELECT %L', E'O\'Reilly');
+ <lineannotation>Result: </lineannotation><computeroutput>SELECT 'O''Reilly'</computeroutput>
+ </screen>
+
+ <para>
+ The <literal>%I</> and <literal>%L</> format specifiers may be used
+ to safely construct dynamic SQL statements. See
+ <xref linkend="plpgsql-quote-literal-example">.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </para>
+
+ <para>
+ In addition to the format specifiers above, the special escape sequence
+ <literal>%%</> may be used to output a literal <literal>%</> character.
+ </para>
+ </sect2>
</sect1>
Hello
updated patch
* merged Dean's doc
* allow NULL as width
Regards
Pavel
2013/2/11 Dean Rasheed <dean.a.rasheed@gmail.com>:
Show quoted text
On 10 February 2013 12:37, Pavel Stehule <pavel.stehule@gmail.com> >>
Here is my first draft. I've also attached the generated HTML page,because it's not so easy to read an SGML patch.
nice
I have only one point - I am think, so format function should be in
table 9-6 - some small text with reference to special section.It is already there in table 9-6, referring to the new section.
Here is a minor update though -- I changed the name of the first
optional argument from "str" to "formatarg", since they are no longer
necessarily strings.Regards,
Dean
Attachments:
format-width-20130211.patchapplication/octet-stream; name=format-width-20130211.patchDownload
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
***************
*** 1519,1539 ****
<primary>format</primary>
</indexterm>
<literal><function>format</function>(<parameter>formatstr</parameter> <type>text</type>
! [, <parameter>str</parameter> <type>"any"</type> [, ...] ])</literal>
</entry>
<entry><type>text</type></entry>
<entry>
Format arguments according to a format string.
! This function is similar to the C function
! <function>sprintf</>, but only the following conversion specifications
! are recognized: <literal>%s</literal> interpolates the corresponding
! argument as a string; <literal>%I</literal> escapes its argument as
! an SQL identifier; <literal>%L</literal> escapes its argument as an
! SQL literal; <literal>%%</literal> outputs a literal <literal>%</>.
! A conversion can reference an explicit parameter position by preceding
! the conversion specifier with <literal><replaceable>n</>$</>, where
! <replaceable>n</replaceable> is the argument position.
! See also <xref linkend="plpgsql-quote-literal-example">.
</entry>
<entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
<entry><literal>Hello World, World</literal></entry>
--- 1519,1531 ----
<primary>format</primary>
</indexterm>
<literal><function>format</function>(<parameter>formatstr</parameter> <type>text</type>
! [, <parameter>formatarg</parameter> <type>"any"</type> [, ...] ])</literal>
</entry>
<entry><type>text</type></entry>
<entry>
Format arguments according to a format string.
! This function is similar to the C function <function>sprintf</>.
! See <xref linkend="functions-string-format">.
</entry>
<entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
<entry><literal>Hello World, World</literal></entry>
***************
*** 2847,2852 ****
--- 2839,3024 ----
</tgroup>
</table>
+ <sect2 id="functions-string-format">
+ <title><function>format</function></title>
+
+ <indexterm>
+ <primary>format</primary>
+ </indexterm>
+
+ <para>
+ The function <function>format</> produces formatted output according to
+ a format string in a similar way to the C function <function>sprintf</>.
+ </para>
+
+ <para>
+ <synopsis>
+ format(<parameter>formatstr</> <type>text</> [, <parameter>formatarg</> <type>"any"</> [, ...] ])
+ </synopsis>
+ <replaceable>formatstr</> is a format string that specifies how the
+ result should be formatted. Text in the format string is copied directly
+ to the result, except where <firstterm>format specifiers</> are used.
+ Format specifiers act as placeholders in the string, allowing subsequent
+ function arguments to be formatted and inserted into the result.
+ </para>
+
+ <para>
+ Format specifiers are introduced by a <literal>%</> character and take
+ the form
+ <synopsis>
+ %[<replaceable>parameter</>][<replaceable>flags</>][<replaceable>width</>]<replaceable>type</>
+ </synopsis>
+ <variablelist>
+ <varlistentry>
+ <term><replaceable>parameter</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ An expression of the form <literal><replaceable>n</>$</> where
+ <replaceable>n</> is the index of the argument to use for the format
+ specifier's value. An index of 1 means the first argument after
+ <replaceable>formatstr</>. If the <replaceable>parameter</> field is
+ omitted, the default is to use the next argument.
+ </para>
+ <screen>
+ SELECT format('Testing %s, %s, %s', 'one', 'two', 'three');
+ <lineannotation>Result: </><computeroutput>Testing one, two, three</>
+
+ SELECT format('Testing %3$s, %2$s, %1$s', 'one', 'two', 'three');
+ <lineannotation>Result: </><computeroutput>Testing three, two, one</>
+ </screen>
+
+ <para>
+ Note that unlike the C function <function>sprintf</> defined in the
+ Single UNIX Specification, the <function>format</> function in
+ <productname>PostgreSQL</> allows format specifiers with and without
+ explicit <replaceable>parameter</> fields to be mixed in the same
+ format string. A format specifier without a
+ <replaceable>parameter</> field always uses the next argument after
+ the last argument consumed. In addition, the
+ <productname>PostgreSQL</> <function>format</> function does not
+ require all function arguments to be referred to in the format
+ string.
+ </para>
+ <screen>
+ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
+ <lineannotation>Result: </><computeroutput>Testing three, two, three</>
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>flags</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ Additional options controlling how the format specifier's output is
+ formatted. Currently the only supported flag is an minus sign
+ (<literal>-</>) which will cause the format specifier's output to be
+ left-aligned. This has no effect unless the <replaceable>width</>
+ field is also specified.
+ </para>
+ <screen>
+ SELECT format('|%10s|%-10s|', 'foo', 'bar');
+ <lineannotation>Result: </><computeroutput>| foo|bar |</>
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>width</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ Specifies the <emphasis>minimum</> number of characters to use to
+ display the format specifier's output. The width may be specified
+ using any of the following: a positive integer; an asterisk
+ (<literal>*</>) to use the next function argument as the width; or an
+ expression of the form <literal>*<replaceable>n</>$</> to use the
+ <replaceable>n</>th function argument as the width.
+ </para>
+
+ <para>
+ If the width comes from a function argument, that argument is
+ consumed <emphasis>before</> the argument that is used for the format
+ specifier's value. If the width argument is negative, the result is
+ left aligned, as if the <literal>-</> flag had been specified.
+ </para>
+ <screen>
+ SELECT format('|%10s|', 'foo');
+ <lineannotation>Result: </><computeroutput>| foo|</>
+
+ SELECT format('|%*s|', 10, 'foo');
+ <lineannotation>Result: </><computeroutput>| foo|</>
+
+ SELECT format('|%*s|', -10, 'foo');
+ <lineannotation>Result: </><computeroutput>|foo |</>
+
+ SELECT format('|%-*s|', 10, 'foo');
+ <lineannotation>Result: </><computeroutput>|foo |</>
+
+ SELECT format('|%-*s|', -10, 'foo');
+ <lineannotation>Result: </><computeroutput>|foo |</>
+
+ SELECT format('|%*2$s|', 'foo', 10, 'bar');
+ <lineannotation>Result: </><computeroutput>| bar|</>
+
+ SELECT format('|%3$*2$s|', 'foo', 10, 'bar');
+ <lineannotation>Result: </><computeroutput>| bar|</>
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>type</replaceable> (required)</term>
+ <listitem>
+ <para>
+ The type of format conversion to use to produce the format
+ specifier's output. The following types are supported:
+ <itemizedlist>
+ <listitem>
+ <para>
+ <literal>s</literal> formats the argument value as a simple
+ string. A null value is treated as an empty string.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>I</literal> escapes the value as an SQL identifier. It
+ is an error for the value to be null.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>L</literal> escapes the value as an SQL literal. A null
+ value is displayed as the literal value <literal>NULL</>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ <screen>
+ SELECT format('Hello %s', 'World');
+ <lineannotation>Result: </lineannotation><computeroutput>Hello World</computeroutput>
+
+ SELECT format('DROP TABLE %I', 'Foo bar');
+ <lineannotation>Result: </lineannotation><computeroutput>DROP TABLE "Foo bar"</computeroutput>
+
+ SELECT format('SELECT %L', E'O\'Reilly');
+ <lineannotation>Result: </lineannotation><computeroutput>SELECT 'O''Reilly'</computeroutput>
+ </screen>
+
+ <para>
+ The <literal>%I</> and <literal>%L</> format specifiers may be used
+ to safely construct dynamic SQL statements. See
+ <xref linkend="plpgsql-quote-literal-example">.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </para>
+
+ <para>
+ In addition to the format specifiers above, the special escape sequence
+ <literal>%%</> may be used to output a literal <literal>%</> character.
+ </para>
+ </sect2>
</sect1>
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
***************
*** 78,84 **** static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
static void text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
const char *fldsep, const char *null_string);
--- 78,85 ----
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
static void text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull,
! int flags, bool use_width, int width);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
const char *fldsep, const char *null_string);
***************
*** 3997,4002 **** text_reverse(PG_FUNCTION_ARGS)
--- 3998,4134 ----
}
/*
+ * Returns pointer of first non digit char
+ *
+ * When some digits are processed, then inum returns parsed number
+ * and is_valid is true, otherwise is_valid is false.
+ */
+ static const char *
+ text_format_parse_digits(const char *start_ptr, const char *end_ptr,
+ int *inum, /* OUT param */
+ bool *is_valid) /* OUT param */
+ {
+ const char *cp = start_ptr;
+
+ *is_valid = false;
+ *inum = 0;
+
+ /* continue, only when start_ptr is less than end_ptr */
+ while (cp < end_ptr)
+ {
+ if (*cp >= '0' && *cp <= '9')
+ {
+ int newnum = *inum * 10 + (*cp - '0');
+
+ if (newnum / 10 != *inum) /* overflow? */
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("number is out of range")));
+ *inum = newnum;
+ *is_valid = true;
+ ++cp;
+ }
+ else
+ break;
+ }
+
+ /* should not be after last char of format string */
+ if (cp >= end_ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unterminated conversion specifier")));
+
+ return cp;
+ }
+
+ #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus in format string? */
+
+ #define TEXT_FORMAT_NEXT_CHAR(ptr, end_ptr) \
+ do { \
+ if (++(ptr) >= (end_ptr)) \
+ ereport(ERROR, \
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
+ errmsg("unterminated conversion specifier"))); \
+ } while (0)
+
+ /*
+ * parse format specification
+ * [parameter][flags][width]type
+ *
+ * -1 in int OUT parameters signalize so this attribut is missing,
+ * all values should be zero or greather than zero. Raise exeption
+ * when some parameter is out of range.
+ */
+ static const char *
+ text_format_parse_format(const char *start_ptr, const char *end_ptr,
+ int *parameter, /* OUT param */
+ int *flags, /* OUT param */
+ int *width, /* OUT param */
+ bool *indirect_width, /* OUT param */
+ int *indirect_width_parameter) /* OUT param */
+ {
+ int inum;
+ bool is_valid;
+
+ /* set defaults to out parameters */
+ *parameter = -1;
+ *flags = 0;
+ *width = -1;
+ *indirect_width = false;
+ *indirect_width_parameter = -1;
+
+ /* try to identify first number */
+ start_ptr = text_format_parse_digits(start_ptr, end_ptr, &inum, &is_valid);
+ if (is_valid)
+ {
+ /* number on this position can be parameter number or the width */
+ if (*start_ptr != '$')
+ {
+ *width = inum;
+ return start_ptr;
+ }
+
+ *parameter = inum;
+ TEXT_FORMAT_NEXT_CHAR(start_ptr, end_ptr);
+ }
+
+ /* try to parse flags, only minus is supported now */
+ if (*start_ptr == '-')
+ {
+ *flags = *flags | TEXT_FORMAT_FLAG_MINUS;
+ TEXT_FORMAT_NEXT_CHAR(start_ptr, end_ptr);
+ }
+
+ /* try to parse indirect width */
+ if (*start_ptr == '*')
+ {
+ start_ptr = text_format_parse_digits(++start_ptr, end_ptr, &inum, &is_valid);
+ if (is_valid)
+ {
+ /* number in this position should be closed by $ */
+ if (*start_ptr != '$')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unexpected char \"%c\"", *start_ptr)));
+ TEXT_FORMAT_NEXT_CHAR(start_ptr, end_ptr);
+
+ *indirect_width_parameter = inum;
+ }
+ else
+ *indirect_width = true;
+
+ return start_ptr;
+ }
+
+ /* last possible nummber - width */
+ start_ptr = text_format_parse_digits(start_ptr, end_ptr, &inum, &is_valid);
+ if (is_valid)
+ *width = inum;
+
+ return start_ptr;
+ }
+
+ /*
* Returns a formated string
*/
Datum
***************
*** 4016,4021 **** text_format(PG_FUNCTION_ARGS)
--- 4148,4155 ----
Oid element_type = InvalidOid;
Oid prev_type = InvalidOid;
FmgrInfo typoutputfinfo;
+ FmgrInfo typoutputinfo_width;
+ Oid prev_type_width = InvalidOid;
/* When format string is null, returns null */
if (PG_ARGISNULL(0))
***************
*** 4088,4093 **** text_format(PG_FUNCTION_ARGS)
--- 4222,4233 ----
Datum value;
bool isNull;
Oid typid;
+ int parameter;
+ int flags;
+ int width;
+ bool indirect_width;
+ int indirect_width_parameter;
+ bool use_width;
/*
* If it's not the start of a conversion specifier, just copy it to
***************
*** 4099,4109 **** text_format(PG_FUNCTION_ARGS)
continue;
}
! /* Did we run off the end of the string? */
! if (++cp >= end_ptr)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("unterminated conversion specifier")));
/* Easy case: %% outputs a single % */
if (*cp == '%')
--- 4239,4246 ----
continue;
}
! /* go to next char */
! TEXT_FORMAT_NEXT_CHAR(cp, end_ptr);
/* Easy case: %% outputs a single % */
if (*cp == '%')
***************
*** 4112,4125 **** text_format(PG_FUNCTION_ARGS)
continue;
}
! /*
! * If the user hasn't specified an argument position, we just advance
! * to the next one. If they have, we must parse it.
! */
! if (*cp < '0' || *cp > '9')
{
! ++arg;
! if (arg <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
--- 4249,4264 ----
continue;
}
! cp = text_format_parse_format(cp, end_ptr,
! ¶meter,
! &flags,
! &width,
! &indirect_width,
! &indirect_width_parameter);
!
! if (indirect_width)
{
! if (++arg <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
***************
*** 4129,4177 **** text_format(PG_FUNCTION_ARGS)
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
}
! else
{
! bool unterminated = false;
!
! /* Parse digit string. */
! arg = 0;
! do
! {
! int newarg = arg * 10 + (*cp - '0');
!
! if (newarg / 10 != arg) /* overflow? */
! ereport(ERROR,
! (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
! errmsg("argument number is out of range")));
! arg = newarg;
! ++cp;
! } while (cp < end_ptr && *cp >= '0' && *cp <= '9');
! /*
! * If we ran off the end, or if there's not a $ next, or if the $
! * is the last character, the conversion specifier is improperly
! * terminated.
! */
! if (cp == end_ptr || *cp != '$')
! unterminated = true;
! else
{
! ++cp;
! if (cp == end_ptr)
! unterminated = true;
! }
! if (unterminated)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("unterminated conversion specifier")));
!
! /* There's no argument 0. */
! if (arg == 0)
ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
}
/* Not enough arguments? Deduct 1 to avoid counting format string. */
if (arg > nargs - 1)
--- 4268,4301 ----
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
+ indirect_width_parameter = arg;
}
! else if (indirect_width_parameter != -1)
{
! /* be consistent, move ordered argument together with positional */
! arg = indirect_width_parameter;
! }
! if (parameter == -1)
! {
! if (++arg <= 0) /* overflow? */
{
! /*
! * Should not happen, as you can't pass billions of arguments
! * to a function, but better safe than sorry.
! */
ereport(ERROR,
! (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
! errmsg("argument number is out of range")));
! }
}
+ else
+ arg = parameter;
+
+ if (arg == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
/* Not enough arguments? Deduct 1 to avoid counting format string. */
if (arg > nargs - 1)
***************
*** 4179,4184 **** text_format(PG_FUNCTION_ARGS)
--- 4303,4380 ----
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("too few arguments for format")));
+ if (indirect_width_parameter != -1)
+ {
+
+ if (indirect_width_parameter == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
+
+ /* Not enough arguments? Deduct 1 to avoid counting format string. */
+ if (indirect_width_parameter > nargs - 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("too few arguments for format")));
+
+ /* get value of related parameter that holds width and coerce to int */
+ if (!funcvariadic)
+ {
+ value = PG_GETARG_DATUM(indirect_width_parameter);
+ isNull = PG_ARGISNULL(indirect_width_parameter);
+ typid = get_fn_expr_argtype(fcinfo->flinfo, indirect_width_parameter);
+ }
+ else
+ {
+ value = elements[indirect_width_parameter - 1];
+ isNull = nulls[indirect_width_parameter - 1];
+ typid = element_type;
+ }
+ if (!OidIsValid(typid))
+ elog(ERROR, "could not determine data type of format() input");
+
+ /*
+ * we don't need to different between NULL and zero in this moment,
+ * NULL means ignore this width - same as zero.
+ */
+ if (isNull)
+ {
+ value = 0;
+ isNull = false;
+ typid = INT4OID;
+ }
+ else if (typid != INT4OID)
+ {
+ char *str;
+
+ /* simple IO cast to int */
+ if (typid != prev_type_width)
+ {
+ Oid typoutputfunc;
+ bool typIsVarlena;
+
+ getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
+ fmgr_info(typoutputfunc, &typoutputinfo_width);
+ prev_type_width = typid;
+ }
+
+ /* Stringify. */
+ str = OutputFunctionCall(&typoutputinfo_width, value);
+
+ /* get int value */
+ width = pg_atoi(str, sizeof(int32), '\0');
+ pfree(str);
+ }
+ else
+ width = DatumGetInt32(value);
+
+ use_width = true;
+ }
+ else if (width != -1)
+ use_width = true;
+ else
+ use_width = false;
+
/* Get the value and type of the selected argument */
if (!funcvariadic)
{
***************
*** 4221,4227 **** text_format(PG_FUNCTION_ARGS)
case 'I':
case 'L':
text_format_string_conversion(&str, *cp, &typoutputfinfo,
! value, isNull);
break;
default:
ereport(ERROR,
--- 4417,4424 ----
case 'I':
case 'L':
text_format_string_conversion(&str, *cp, &typoutputfinfo,
! value, isNull,
! flags, use_width, width);
break;
default:
ereport(ERROR,
***************
*** 4244,4254 **** text_format(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(result);
}
/* Format a %s, %I, or %L conversion. */
static void
text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull)
{
char *str;
--- 4441,4497 ----
PG_RETURN_TEXT_P(result);
}
+ /*
+ * Add spaces on begin or on end when it is necessary
+ */
+ static void
+ text_format_append_string(StringInfo buf, const char *str,
+ int flags, bool use_width, int width)
+ {
+ bool align_to_left;
+ int len;
+
+ /* fast path */
+ if (!use_width)
+ {
+ appendStringInfoString(buf, str);
+ return;
+ }
+
+ if (width < 0)
+ {
+ width = -width;
+ align_to_left = true;
+ }
+ else if (flags & TEXT_FORMAT_FLAG_MINUS)
+ {
+ align_to_left = true;
+ }
+ else
+ align_to_left = false;
+
+ len = pg_mbstrlen(str);
+ if (align_to_left)
+ {
+ appendStringInfoString(buf, str);
+ if (len < width)
+ appendStringInfoSpaces(buf, width - len);
+ }
+ else
+ {
+ /* align_to_right */
+ if (len < width)
+ appendStringInfoSpaces(buf, width - len);
+ appendStringInfoString(buf, str);
+ }
+ }
+
/* Format a %s, %I, or %L conversion. */
static void
text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull,
! int flags, bool use_width, int width)
{
char *str;
***************
*** 4256,4262 **** text_format_string_conversion(StringInfo buf, char conversion,
if (isNull)
{
if (conversion == 'L')
! appendStringInfoString(buf, "NULL");
else if (conversion == 'I')
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
--- 4499,4506 ----
if (isNull)
{
if (conversion == 'L')
! text_format_append_string(buf, "NULL",
! flags, use_width, width);
else if (conversion == 'I')
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
***************
*** 4271,4288 **** text_format_string_conversion(StringInfo buf, char conversion,
if (conversion == 'I')
{
/* quote_identifier may or may not allocate a new string. */
! appendStringInfoString(buf, quote_identifier(str));
}
else if (conversion == 'L')
! {
! char *qstr = quote_literal_cstr(str);
! appendStringInfoString(buf, qstr);
/* quote_literal_cstr() always allocates a new string */
pfree(qstr);
}
else
! appendStringInfoString(buf, str);
/* Cleanup. */
pfree(str);
--- 4515,4534 ----
if (conversion == 'I')
{
/* quote_identifier may or may not allocate a new string. */
! text_format_append_string(buf, quote_identifier(str),
! flags, use_width, width);
}
else if (conversion == 'L')
! { char *qstr = quote_literal_cstr(str);
! text_format_append_string(buf, qstr,
! flags, use_width, width);
/* quote_literal_cstr() always allocates a new string */
pfree(qstr);
}
else
! text_format_append_string(buf, str,
! flags, use_width, width);
/* Cleanup. */
pfree(str);
*** a/src/test/regress/expected/text.out
--- b/src/test/regress/expected/text.out
***************
*** 257,267 **** ERROR: too few arguments for format
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
select format('%1s', 1);
! ERROR: unterminated conversion specifier
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unrecognized conversion specifier "1"
-- check mix of positional and ordered placeholders
select format('Hello %s %1$s %s', 'World', 'Hello again');
format
--- 257,271 ----
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
select format('%1s', 1);
! format
! --------
! 1
! (1 row)
!
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unterminated conversion specifier
-- check mix of positional and ordered placeholders
select format('Hello %s %1$s %s', 'World', 'Hello again');
format
***************
*** 328,330 **** from generate_series(1,200) g(i);
--- 332,387 ----
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200
(1 row)
+ -- left, right align
+ select format('>>%10s<<', 'Hello')
+ union all
+ select format('>>%-10s<<', 'Hello')
+ union all
+ select format('>>%1$10s<<', 'Hello')
+ union all
+ select format('>>%1$-10I<<', 'Hello')
+ union all
+ select format('>>%2$*1$L<<', 10, 'Hello')
+ union all
+ select format('>>%*s<<', 10, 'Hello');
+ format
+ ----------------
+ >> Hello<<
+ >>Hello <<
+ >> Hello<<
+ >>"Hello" <<
+ >> 'Hello'<<
+ >> Hello<<
+ (6 rows)
+
+ select format('>>%*1$s<<', 10, 'Hello');
+ format
+ ----------------
+ >> Hello<<
+ (1 row)
+
+ select format('>>%-s<<', 'Hello');
+ format
+ -----------
+ >>Hello<<
+ (1 row)
+
+ -- NULL is not different to zero here
+ select format('>>%10L<<', NULL);
+ format
+ ----------------
+ >> NULL<<
+ (1 row)
+
+ select format('>>%2$*1$L<<', NULL, 'Hello');
+ format
+ -------------
+ >>'Hello'<<
+ (1 row)
+
+ select format('>>%2$*1$L<<', 0, 'Hello');
+ format
+ -------------
+ >>'Hello'<<
+ (1 row)
+
*** a/src/test/regress/sql/text.sql
--- b/src/test/regress/sql/text.sql
***************
*** 97,99 **** select format('Hello', variadic NULL);
--- 97,120 ----
-- variadic argument allows simulating more than FUNC_MAX_ARGS parameters
select format(string_agg('%s',','), variadic array_agg(i))
from generate_series(1,200) g(i);
+
+ -- left, right align
+ select format('>>%10s<<', 'Hello')
+ union all
+ select format('>>%-10s<<', 'Hello')
+ union all
+ select format('>>%1$10s<<', 'Hello')
+ union all
+ select format('>>%1$-10I<<', 'Hello')
+ union all
+ select format('>>%2$*1$L<<', 10, 'Hello')
+ union all
+ select format('>>%*s<<', 10, 'Hello');
+
+ select format('>>%*1$s<<', 10, 'Hello');
+ select format('>>%-s<<', 'Hello');
+
+ -- NULL is not different to zero here
+ select format('>>%10L<<', NULL);
+ select format('>>%2$*1$L<<', NULL, 'Hello');
+ select format('>>%2$*1$L<<', 0, 'Hello');
On 11 February 2013 14:29, Pavel Stehule <pavel.stehule@gmail.com> wrote:
Hello
updated patch
* merged Dean's doc
* allow NULL as width
Hi,
I have not had time to look at this properly, but it doesn't look as
though you have fixed the other problem I mentioned up-thread, with %s
for NULL values:
SELECT format('|%s|', NULL);
Result: ||
SELECT format('|%5s|', NULL);
Result: ||
In the second case, I think it should produce | |.
Regards,
Dean
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Hello
2013/2/13 Dean Rasheed <dean.a.rasheed@gmail.com>:
On 11 February 2013 14:29, Pavel Stehule <pavel.stehule@gmail.com> wrote:
Hello
updated patch
* merged Dean's doc
* allow NULL as widthHi,
I have not had time to look at this properly, but it doesn't look as
though you have fixed the other problem I mentioned up-thread, with %s
for NULL values:SELECT format('|%s|', NULL);
Result: ||
SELECT format('|%5s|', NULL);
Result: ||In the second case, I think it should produce | |.
fixed
Regards
Pavel Stehule
Show quoted text
Regards,
Dean
Attachments:
format-width-20130213.patchapplication/octet-stream; name=format-width-20130213.patchDownload
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
***************
*** 1519,1539 ****
<primary>format</primary>
</indexterm>
<literal><function>format</function>(<parameter>formatstr</parameter> <type>text</type>
! [, <parameter>str</parameter> <type>"any"</type> [, ...] ])</literal>
</entry>
<entry><type>text</type></entry>
<entry>
Format arguments according to a format string.
! This function is similar to the C function
! <function>sprintf</>, but only the following conversion specifications
! are recognized: <literal>%s</literal> interpolates the corresponding
! argument as a string; <literal>%I</literal> escapes its argument as
! an SQL identifier; <literal>%L</literal> escapes its argument as an
! SQL literal; <literal>%%</literal> outputs a literal <literal>%</>.
! A conversion can reference an explicit parameter position by preceding
! the conversion specifier with <literal><replaceable>n</>$</>, where
! <replaceable>n</replaceable> is the argument position.
! See also <xref linkend="plpgsql-quote-literal-example">.
</entry>
<entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
<entry><literal>Hello World, World</literal></entry>
--- 1519,1531 ----
<primary>format</primary>
</indexterm>
<literal><function>format</function>(<parameter>formatstr</parameter> <type>text</type>
! [, <parameter>formatarg</parameter> <type>"any"</type> [, ...] ])</literal>
</entry>
<entry><type>text</type></entry>
<entry>
Format arguments according to a format string.
! This function is similar to the C function <function>sprintf</>.
! See <xref linkend="functions-string-format">.
</entry>
<entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
<entry><literal>Hello World, World</literal></entry>
***************
*** 2847,2852 ****
--- 2839,3024 ----
</tgroup>
</table>
+ <sect2 id="functions-string-format">
+ <title><function>format</function></title>
+
+ <indexterm>
+ <primary>format</primary>
+ </indexterm>
+
+ <para>
+ The function <function>format</> produces formatted output according to
+ a format string in a similar way to the C function <function>sprintf</>.
+ </para>
+
+ <para>
+ <synopsis>
+ format(<parameter>formatstr</> <type>text</> [, <parameter>formatarg</> <type>"any"</> [, ...] ])
+ </synopsis>
+ <replaceable>formatstr</> is a format string that specifies how the
+ result should be formatted. Text in the format string is copied directly
+ to the result, except where <firstterm>format specifiers</> are used.
+ Format specifiers act as placeholders in the string, allowing subsequent
+ function arguments to be formatted and inserted into the result.
+ </para>
+
+ <para>
+ Format specifiers are introduced by a <literal>%</> character and take
+ the form
+ <synopsis>
+ %[<replaceable>parameter</>][<replaceable>flags</>][<replaceable>width</>]<replaceable>type</>
+ </synopsis>
+ <variablelist>
+ <varlistentry>
+ <term><replaceable>parameter</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ An expression of the form <literal><replaceable>n</>$</> where
+ <replaceable>n</> is the index of the argument to use for the format
+ specifier's value. An index of 1 means the first argument after
+ <replaceable>formatstr</>. If the <replaceable>parameter</> field is
+ omitted, the default is to use the next argument.
+ </para>
+ <screen>
+ SELECT format('Testing %s, %s, %s', 'one', 'two', 'three');
+ <lineannotation>Result: </><computeroutput>Testing one, two, three</>
+
+ SELECT format('Testing %3$s, %2$s, %1$s', 'one', 'two', 'three');
+ <lineannotation>Result: </><computeroutput>Testing three, two, one</>
+ </screen>
+
+ <para>
+ Note that unlike the C function <function>sprintf</> defined in the
+ Single UNIX Specification, the <function>format</> function in
+ <productname>PostgreSQL</> allows format specifiers with and without
+ explicit <replaceable>parameter</> fields to be mixed in the same
+ format string. A format specifier without a
+ <replaceable>parameter</> field always uses the next argument after
+ the last argument consumed. In addition, the
+ <productname>PostgreSQL</> <function>format</> function does not
+ require all function arguments to be referred to in the format
+ string.
+ </para>
+ <screen>
+ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
+ <lineannotation>Result: </><computeroutput>Testing three, two, three</>
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>flags</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ Additional options controlling how the format specifier's output is
+ formatted. Currently the only supported flag is an minus sign
+ (<literal>-</>) which will cause the format specifier's output to be
+ left-aligned. This has no effect unless the <replaceable>width</>
+ field is also specified.
+ </para>
+ <screen>
+ SELECT format('|%10s|%-10s|', 'foo', 'bar');
+ <lineannotation>Result: </><computeroutput>| foo|bar |</>
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>width</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ Specifies the <emphasis>minimum</> number of characters to use to
+ display the format specifier's output. The width may be specified
+ using any of the following: a positive integer; an asterisk
+ (<literal>*</>) to use the next function argument as the width; or an
+ expression of the form <literal>*<replaceable>n</>$</> to use the
+ <replaceable>n</>th function argument as the width.
+ </para>
+
+ <para>
+ If the width comes from a function argument, that argument is
+ consumed <emphasis>before</> the argument that is used for the format
+ specifier's value. If the width argument is negative, the result is
+ left aligned, as if the <literal>-</> flag had been specified.
+ </para>
+ <screen>
+ SELECT format('|%10s|', 'foo');
+ <lineannotation>Result: </><computeroutput>| foo|</>
+
+ SELECT format('|%*s|', 10, 'foo');
+ <lineannotation>Result: </><computeroutput>| foo|</>
+
+ SELECT format('|%*s|', -10, 'foo');
+ <lineannotation>Result: </><computeroutput>|foo |</>
+
+ SELECT format('|%-*s|', 10, 'foo');
+ <lineannotation>Result: </><computeroutput>|foo |</>
+
+ SELECT format('|%-*s|', -10, 'foo');
+ <lineannotation>Result: </><computeroutput>|foo |</>
+
+ SELECT format('|%*2$s|', 'foo', 10, 'bar');
+ <lineannotation>Result: </><computeroutput>| bar|</>
+
+ SELECT format('|%3$*2$s|', 'foo', 10, 'bar');
+ <lineannotation>Result: </><computeroutput>| bar|</>
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>type</replaceable> (required)</term>
+ <listitem>
+ <para>
+ The type of format conversion to use to produce the format
+ specifier's output. The following types are supported:
+ <itemizedlist>
+ <listitem>
+ <para>
+ <literal>s</literal> formats the argument value as a simple
+ string. A null value is treated as an empty string.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>I</literal> escapes the value as an SQL identifier. It
+ is an error for the value to be null.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>L</literal> escapes the value as an SQL literal. A null
+ value is displayed as the literal value <literal>NULL</>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ <screen>
+ SELECT format('Hello %s', 'World');
+ <lineannotation>Result: </lineannotation><computeroutput>Hello World</computeroutput>
+
+ SELECT format('DROP TABLE %I', 'Foo bar');
+ <lineannotation>Result: </lineannotation><computeroutput>DROP TABLE "Foo bar"</computeroutput>
+
+ SELECT format('SELECT %L', E'O\'Reilly');
+ <lineannotation>Result: </lineannotation><computeroutput>SELECT 'O''Reilly'</computeroutput>
+ </screen>
+
+ <para>
+ The <literal>%I</> and <literal>%L</> format specifiers may be used
+ to safely construct dynamic SQL statements. See
+ <xref linkend="plpgsql-quote-literal-example">.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </para>
+
+ <para>
+ In addition to the format specifiers above, the special escape sequence
+ <literal>%%</> may be used to output a literal <literal>%</> character.
+ </para>
+ </sect2>
</sect1>
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
***************
*** 78,84 **** static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
static void text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
const char *fldsep, const char *null_string);
--- 78,85 ----
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
static void text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull,
! int flags, bool use_width, int width);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
const char *fldsep, const char *null_string);
***************
*** 3997,4002 **** text_reverse(PG_FUNCTION_ARGS)
--- 3998,4134 ----
}
/*
+ * Returns pointer of first non digit char
+ *
+ * When some digits are processed, then inum returns parsed number
+ * and is_valid is true, otherwise is_valid is false.
+ */
+ static const char *
+ text_format_parse_digits(const char *start_ptr, const char *end_ptr,
+ int *inum, /* OUT param */
+ bool *is_valid) /* OUT param */
+ {
+ const char *cp = start_ptr;
+
+ *is_valid = false;
+ *inum = 0;
+
+ /* continue, only when start_ptr is less than end_ptr */
+ while (cp < end_ptr)
+ {
+ if (*cp >= '0' && *cp <= '9')
+ {
+ int newnum = *inum * 10 + (*cp - '0');
+
+ if (newnum / 10 != *inum) /* overflow? */
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("number is out of range")));
+ *inum = newnum;
+ *is_valid = true;
+ ++cp;
+ }
+ else
+ break;
+ }
+
+ /* should not be after last char of format string */
+ if (cp >= end_ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unterminated conversion specifier")));
+
+ return cp;
+ }
+
+ #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus in format string? */
+
+ #define TEXT_FORMAT_NEXT_CHAR(ptr, end_ptr) \
+ do { \
+ if (++(ptr) >= (end_ptr)) \
+ ereport(ERROR, \
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
+ errmsg("unterminated conversion specifier"))); \
+ } while (0)
+
+ /*
+ * parse format specification
+ * [parameter][flags][width]type
+ *
+ * -1 in int OUT parameters signalize so this attribut is missing,
+ * all values should be zero or greather than zero. Raise exeption
+ * when some parameter is out of range.
+ */
+ static const char *
+ text_format_parse_format(const char *start_ptr, const char *end_ptr,
+ int *parameter, /* OUT param */
+ int *flags, /* OUT param */
+ int *width, /* OUT param */
+ bool *indirect_width, /* OUT param */
+ int *indirect_width_parameter) /* OUT param */
+ {
+ int inum;
+ bool is_valid;
+
+ /* set defaults to out parameters */
+ *parameter = -1;
+ *flags = 0;
+ *width = -1;
+ *indirect_width = false;
+ *indirect_width_parameter = -1;
+
+ /* try to identify first number */
+ start_ptr = text_format_parse_digits(start_ptr, end_ptr, &inum, &is_valid);
+ if (is_valid)
+ {
+ /* number on this position can be parameter number or the width */
+ if (*start_ptr != '$')
+ {
+ *width = inum;
+ return start_ptr;
+ }
+
+ *parameter = inum;
+ TEXT_FORMAT_NEXT_CHAR(start_ptr, end_ptr);
+ }
+
+ /* try to parse flags, only minus is supported now */
+ if (*start_ptr == '-')
+ {
+ *flags = *flags | TEXT_FORMAT_FLAG_MINUS;
+ TEXT_FORMAT_NEXT_CHAR(start_ptr, end_ptr);
+ }
+
+ /* try to parse indirect width */
+ if (*start_ptr == '*')
+ {
+ start_ptr = text_format_parse_digits(++start_ptr, end_ptr, &inum, &is_valid);
+ if (is_valid)
+ {
+ /* number in this position should be closed by $ */
+ if (*start_ptr != '$')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unexpected char \"%c\"", *start_ptr)));
+ TEXT_FORMAT_NEXT_CHAR(start_ptr, end_ptr);
+
+ *indirect_width_parameter = inum;
+ }
+ else
+ *indirect_width = true;
+
+ return start_ptr;
+ }
+
+ /* last possible nummber - width */
+ start_ptr = text_format_parse_digits(start_ptr, end_ptr, &inum, &is_valid);
+ if (is_valid)
+ *width = inum;
+
+ return start_ptr;
+ }
+
+ /*
* Returns a formated string
*/
Datum
***************
*** 4016,4021 **** text_format(PG_FUNCTION_ARGS)
--- 4148,4155 ----
Oid element_type = InvalidOid;
Oid prev_type = InvalidOid;
FmgrInfo typoutputfinfo;
+ FmgrInfo typoutputinfo_width;
+ Oid prev_type_width = InvalidOid;
/* When format string is null, returns null */
if (PG_ARGISNULL(0))
***************
*** 4088,4093 **** text_format(PG_FUNCTION_ARGS)
--- 4222,4233 ----
Datum value;
bool isNull;
Oid typid;
+ int parameter;
+ int flags;
+ int width;
+ bool indirect_width;
+ int indirect_width_parameter;
+ bool use_width;
/*
* If it's not the start of a conversion specifier, just copy it to
***************
*** 4099,4109 **** text_format(PG_FUNCTION_ARGS)
continue;
}
! /* Did we run off the end of the string? */
! if (++cp >= end_ptr)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("unterminated conversion specifier")));
/* Easy case: %% outputs a single % */
if (*cp == '%')
--- 4239,4246 ----
continue;
}
! /* go to next char */
! TEXT_FORMAT_NEXT_CHAR(cp, end_ptr);
/* Easy case: %% outputs a single % */
if (*cp == '%')
***************
*** 4112,4125 **** text_format(PG_FUNCTION_ARGS)
continue;
}
! /*
! * If the user hasn't specified an argument position, we just advance
! * to the next one. If they have, we must parse it.
! */
! if (*cp < '0' || *cp > '9')
{
! ++arg;
! if (arg <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
--- 4249,4264 ----
continue;
}
! cp = text_format_parse_format(cp, end_ptr,
! ¶meter,
! &flags,
! &width,
! &indirect_width,
! &indirect_width_parameter);
!
! if (indirect_width)
{
! if (++arg <= 0) /* overflow? */
{
/*
* Should not happen, as you can't pass billions of arguments
***************
*** 4129,4177 **** text_format(PG_FUNCTION_ARGS)
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
}
! else
{
! bool unterminated = false;
!
! /* Parse digit string. */
! arg = 0;
! do
! {
! int newarg = arg * 10 + (*cp - '0');
!
! if (newarg / 10 != arg) /* overflow? */
! ereport(ERROR,
! (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
! errmsg("argument number is out of range")));
! arg = newarg;
! ++cp;
! } while (cp < end_ptr && *cp >= '0' && *cp <= '9');
! /*
! * If we ran off the end, or if there's not a $ next, or if the $
! * is the last character, the conversion specifier is improperly
! * terminated.
! */
! if (cp == end_ptr || *cp != '$')
! unterminated = true;
! else
{
! ++cp;
! if (cp == end_ptr)
! unterminated = true;
! }
! if (unterminated)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("unterminated conversion specifier")));
!
! /* There's no argument 0. */
! if (arg == 0)
ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
}
/* Not enough arguments? Deduct 1 to avoid counting format string. */
if (arg > nargs - 1)
--- 4268,4301 ----
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("argument number is out of range")));
}
+ indirect_width_parameter = arg;
}
! else if (indirect_width_parameter != -1)
{
! /* be consistent, move ordered argument together with positional */
! arg = indirect_width_parameter;
! }
! if (parameter == -1)
! {
! if (++arg <= 0) /* overflow? */
{
! /*
! * Should not happen, as you can't pass billions of arguments
! * to a function, but better safe than sorry.
! */
ereport(ERROR,
! (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
! errmsg("argument number is out of range")));
! }
}
+ else
+ arg = parameter;
+
+ if (arg == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
/* Not enough arguments? Deduct 1 to avoid counting format string. */
if (arg > nargs - 1)
***************
*** 4179,4184 **** text_format(PG_FUNCTION_ARGS)
--- 4303,4380 ----
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("too few arguments for format")));
+ if (indirect_width_parameter != -1)
+ {
+
+ if (indirect_width_parameter == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
+
+ /* Not enough arguments? Deduct 1 to avoid counting format string. */
+ if (indirect_width_parameter > nargs - 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("too few arguments for format")));
+
+ /* get value of related parameter that holds width and coerce to int */
+ if (!funcvariadic)
+ {
+ value = PG_GETARG_DATUM(indirect_width_parameter);
+ isNull = PG_ARGISNULL(indirect_width_parameter);
+ typid = get_fn_expr_argtype(fcinfo->flinfo, indirect_width_parameter);
+ }
+ else
+ {
+ value = elements[indirect_width_parameter - 1];
+ isNull = nulls[indirect_width_parameter - 1];
+ typid = element_type;
+ }
+ if (!OidIsValid(typid))
+ elog(ERROR, "could not determine data type of format() input");
+
+ /*
+ * we don't need to different between NULL and zero in this moment,
+ * NULL means ignore this width - same as zero.
+ */
+ if (isNull)
+ {
+ value = 0;
+ isNull = false;
+ typid = INT4OID;
+ }
+ else if (typid != INT4OID)
+ {
+ char *str;
+
+ /* simple IO cast to int */
+ if (typid != prev_type_width)
+ {
+ Oid typoutputfunc;
+ bool typIsVarlena;
+
+ getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
+ fmgr_info(typoutputfunc, &typoutputinfo_width);
+ prev_type_width = typid;
+ }
+
+ /* Stringify. */
+ str = OutputFunctionCall(&typoutputinfo_width, value);
+
+ /* get int value */
+ width = pg_atoi(str, sizeof(int32), '\0');
+ pfree(str);
+ }
+ else
+ width = DatumGetInt32(value);
+
+ use_width = true;
+ }
+ else if (width != -1)
+ use_width = true;
+ else
+ use_width = false;
+
/* Get the value and type of the selected argument */
if (!funcvariadic)
{
***************
*** 4221,4227 **** text_format(PG_FUNCTION_ARGS)
case 'I':
case 'L':
text_format_string_conversion(&str, *cp, &typoutputfinfo,
! value, isNull);
break;
default:
ereport(ERROR,
--- 4417,4424 ----
case 'I':
case 'L':
text_format_string_conversion(&str, *cp, &typoutputfinfo,
! value, isNull,
! flags, use_width, width);
break;
default:
ereport(ERROR,
***************
*** 4244,4288 **** text_format(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(result);
}
/* Format a %s, %I, or %L conversion. */
static void
text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull)
{
char *str;
- /* Handle NULL arguments before trying to stringify the value. */
if (isNull)
{
! if (conversion == 'L')
! appendStringInfoString(buf, "NULL");
else if (conversion == 'I')
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("null values cannot be formatted as an SQL identifier")));
return;
}
- /* Stringify. */
str = OutputFunctionCall(typOutputInfo, value);
/* Escape. */
if (conversion == 'I')
{
/* quote_identifier may or may not allocate a new string. */
! appendStringInfoString(buf, quote_identifier(str));
}
else if (conversion == 'L')
! {
! char *qstr = quote_literal_cstr(str);
! appendStringInfoString(buf, qstr);
/* quote_literal_cstr() always allocates a new string */
pfree(qstr);
}
else
! appendStringInfoString(buf, str);
/* Cleanup. */
pfree(str);
--- 4441,4536 ----
PG_RETURN_TEXT_P(result);
}
+ /*
+ * Add spaces on begin or on end when it is necessary
+ */
+ static void
+ text_format_append_string(StringInfo buf, const char *str,
+ int flags, bool use_width, int width)
+ {
+ bool align_to_left;
+ int len;
+
+ /* fast path */
+ if (!use_width)
+ {
+ appendStringInfoString(buf, str);
+ return;
+ }
+
+ if (width < 0)
+ {
+ width = -width;
+ align_to_left = true;
+ }
+ else if (flags & TEXT_FORMAT_FLAG_MINUS)
+ {
+ align_to_left = true;
+ }
+ else
+ align_to_left = false;
+
+ len = pg_mbstrlen(str);
+ if (align_to_left)
+ {
+ appendStringInfoString(buf, str);
+ if (len < width)
+ appendStringInfoSpaces(buf, width - len);
+ }
+ else
+ {
+ /* align_to_right */
+ if (len < width)
+ appendStringInfoSpaces(buf, width - len);
+ appendStringInfoString(buf, str);
+ }
+ }
+
/* Format a %s, %I, or %L conversion. */
static void
text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull,
! int flags, bool use_width, int width)
{
char *str;
if (isNull)
{
! if (conversion == 's')
! text_format_append_string(buf, "",
! flags, use_width, width);
! else if (conversion == 'L')
! text_format_append_string(buf, "NULL",
! flags, use_width, width);
else if (conversion == 'I')
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("null values cannot be formatted as an SQL identifier")));
+
return;
}
str = OutputFunctionCall(typOutputInfo, value);
/* Escape. */
if (conversion == 'I')
{
/* quote_identifier may or may not allocate a new string. */
! text_format_append_string(buf, quote_identifier(str),
! flags, use_width, width);
}
else if (conversion == 'L')
! { char *qstr = quote_literal_cstr(str);
! text_format_append_string(buf, qstr,
! flags, use_width, width);
/* quote_literal_cstr() always allocates a new string */
pfree(qstr);
}
else
! text_format_append_string(buf, str,
! flags, use_width, width);
/* Cleanup. */
pfree(str);
*** a/src/test/regress/expected/text.out
--- b/src/test/regress/expected/text.out
***************
*** 257,267 **** ERROR: too few arguments for format
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
select format('%1s', 1);
! ERROR: unterminated conversion specifier
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unrecognized conversion specifier "1"
-- check mix of positional and ordered placeholders
select format('Hello %s %1$s %s', 'World', 'Hello again');
format
--- 257,271 ----
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
select format('%1s', 1);
! format
! --------
! 1
! (1 row)
!
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unterminated conversion specifier
-- check mix of positional and ordered placeholders
select format('Hello %s %1$s %s', 'World', 'Hello again');
format
***************
*** 328,330 **** from generate_series(1,200) g(i);
--- 332,405 ----
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200
(1 row)
+ -- left, right align
+ select format('>>%10s<<', 'Hello')
+ union all
+ select format('>>%10s<<', NULL)
+ union all
+ select format('>>%10s<<', '')
+ union all
+ select format('>>%-10s<<', '')
+ union all
+ select format('>>%-10s<<', 'Hello')
+ union all
+ select format('>>%-10s<<', NULL)
+ union all
+ select format('>>%1$10s<<', 'Hello')
+ union all
+ select format('>>%1$-10I<<', 'Hello')
+ union all
+ select format('>>%2$*1$L<<', 10, 'Hello')
+ union all
+ select format('>>%2$*1$L<<', 10, NULL)
+ union all
+ select format('>>%2$*1$L<<', -10, NULL)
+ union all
+ select format('>>%*s<<', 10, 'Hello');
+ format
+ ----------------
+ >> Hello<<
+ >> <<
+ >> <<
+ >> <<
+ >>Hello <<
+ >> <<
+ >> Hello<<
+ >>"Hello" <<
+ >> 'Hello'<<
+ >> NULL<<
+ >>NULL <<
+ >> Hello<<
+ (12 rows)
+
+ select format('>>%*1$s<<', 10, 'Hello');
+ format
+ ----------------
+ >> Hello<<
+ (1 row)
+
+ select format('>>%-s<<', 'Hello');
+ format
+ -----------
+ >>Hello<<
+ (1 row)
+
+ -- NULL is not different to zero here
+ select format('>>%10L<<', NULL);
+ format
+ ----------------
+ >> NULL<<
+ (1 row)
+
+ select format('>>%2$*1$L<<', NULL, 'Hello');
+ format
+ -------------
+ >>'Hello'<<
+ (1 row)
+
+ select format('>>%2$*1$L<<', 0, 'Hello');
+ format
+ -------------
+ >>'Hello'<<
+ (1 row)
+
*** a/src/test/regress/sql/text.sql
--- b/src/test/regress/sql/text.sql
***************
*** 97,99 **** select format('Hello', variadic NULL);
--- 97,132 ----
-- variadic argument allows simulating more than FUNC_MAX_ARGS parameters
select format(string_agg('%s',','), variadic array_agg(i))
from generate_series(1,200) g(i);
+
+ -- left, right align
+ select format('>>%10s<<', 'Hello')
+ union all
+ select format('>>%10s<<', NULL)
+ union all
+ select format('>>%10s<<', '')
+ union all
+ select format('>>%-10s<<', '')
+ union all
+ select format('>>%-10s<<', 'Hello')
+ union all
+ select format('>>%-10s<<', NULL)
+ union all
+ select format('>>%1$10s<<', 'Hello')
+ union all
+ select format('>>%1$-10I<<', 'Hello')
+ union all
+ select format('>>%2$*1$L<<', 10, 'Hello')
+ union all
+ select format('>>%2$*1$L<<', 10, NULL)
+ union all
+ select format('>>%2$*1$L<<', -10, NULL)
+ union all
+ select format('>>%*s<<', 10, 'Hello');
+
+ select format('>>%*1$s<<', 10, 'Hello');
+ select format('>>%-s<<', 'Hello');
+
+ -- NULL is not different to zero here
+ select format('>>%10L<<', NULL);
+ select format('>>%2$*1$L<<', NULL, 'Hello');
+ select format('>>%2$*1$L<<', 0, 'Hello');
Hello, Could you let me review this patch?
* merged Dean's doc
* allow NULL as width
I understand that this patch aims pure expansion of format's
current behavior and to mimic the printf in SUS glibc (*1).
(*1) http://pubs.opengroup.org/onlinepubs/009695399/functions/printf.html
This patch seems to preserve the behaviors of current
implement. And also succeeds in mimicking almost of SUS without
very subtle difference.
Attached is the new patch which I've edited following the
comments below and some fixed of typos, and added a few regtests.
If you have no problem with this, I'll send this to committer.
What do you think of this?
My comments are below,
======================================
Following is a comment about the behavior.
- The minus('-') is a flag, not a sign nor a operator. So this
seems permitted to appear more than one time. For example,
printf(">>%-------10s<<", "hoge") yields the output
">>hoge______<<" safely. This is consistent with the behavior
when negative value is supplied to '-*' conversion.
Followings are some comments about coding,
in text_format_parse_digits,
- is_valid seems to be the primary return value so returning
this as function's return value should make the caller more
simple.
- Although the compiler should deal properly with that, I don't
think it proper to use the memory pointed by function
parameters as local working storage. *inum and *is_valid in
the while loop should be replaced with local variables and
set them after the values are settled.
for TEXT_FORMAT_NEXT_CHAR,
- This macro name sounds somewhat confusing and this could be
used also in text_format_parse_digits. I propose
FORWARD_PARSE_POINT instead. Also I removed end_ptr from
macro parameters but I'm not sure of the pertinence of that.
for text_format_parse_format:
- Using start_ptr as a working pointer makes the name
inappropriate.
- Out parameters seems somewhat redundant. indirect_width and
indirect_width_parameter could be merged using 0 to indicate
unnumbered.
for text_format:
- maximum number of function argument limited to FUNC_MAX_ARGS
(100), so no need to care of wrap around of argument index, I
suppose.
- Something seems confusing at the lines follow
| /* Not enough arguments? Deduct 1 to avoid counting format string. */
| if (arg > nargs - 1)
This expression does not have so special meaning. The maximum
index in an zero-based array should not be equal to or larger
than the number of the elements of it. If that's not your
intent, some rewrite would be needed..
- Only int4 is directly read for width value in the latest
patch, but int2 can also be directly readable and it should
be needed.
regards,
--
Kyotaro Horiguchi
NTT Open Source Software Center
Attachments:
Umm. sorry,
If you have no problem with this, I'll send this to committer.
I just found that this patch already has a revewer. I've seen
only Status field in patch list..
Should I leave this to you, Dean?
--
Kyotaro Horiguchi
NTT Open Source Software Center
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Hello
I have no objections,
Thank you for update
Regards
Pavel
2013/2/28 Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>:
Hello, Could you let me review this patch?
* merged Dean's doc
* allow NULL as widthI understand that this patch aims pure expansion of format's
current behavior and to mimic the printf in SUS glibc (*1).(*1) http://pubs.opengroup.org/onlinepubs/009695399/functions/printf.html
This patch seems to preserve the behaviors of current
implement. And also succeeds in mimicking almost of SUS without
very subtle difference.Attached is the new patch which I've edited following the
comments below and some fixed of typos, and added a few regtests.If you have no problem with this, I'll send this to committer.
What do you think of this?
My comments are below,
======================================
Following is a comment about the behavior.- The minus('-') is a flag, not a sign nor a operator. So this
seems permitted to appear more than one time. For example,
printf(">>%-------10s<<", "hoge") yields the output
">>hoge______<<" safely. This is consistent with the behavior
when negative value is supplied to '-*' conversion.Followings are some comments about coding,
in text_format_parse_digits,
- is_valid seems to be the primary return value so returning
this as function's return value should make the caller more
simple.- Although the compiler should deal properly with that, I don't
think it proper to use the memory pointed by function
parameters as local working storage. *inum and *is_valid in
the while loop should be replaced with local variables and
set them after the values are settled.for TEXT_FORMAT_NEXT_CHAR,
- This macro name sounds somewhat confusing and this could be
used also in text_format_parse_digits. I propose
FORWARD_PARSE_POINT instead. Also I removed end_ptr from
macro parameters but I'm not sure of the pertinence of that.for text_format_parse_format:
- Using start_ptr as a working pointer makes the name
inappropriate.- Out parameters seems somewhat redundant. indirect_width and
indirect_width_parameter could be merged using 0 to indicate
unnumbered.for text_format:
- maximum number of function argument limited to FUNC_MAX_ARGS
(100), so no need to care of wrap around of argument index, I
suppose.- Something seems confusing at the lines follow
| /* Not enough arguments? Deduct 1 to avoid counting format string. */
| if (arg > nargs - 1)This expression does not have so special meaning. The maximum
index in an zero-based array should not be equal to or larger
than the number of the elements of it. If that's not your
intent, some rewrite would be needed..- Only int4 is directly read for width value in the latest
patch, but int2 can also be directly readable and it should
be needed.regards,
--
Kyotaro Horiguchi
NTT Open Source Software Center
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 28 February 2013 11:25, Kyotaro HORIGUCHI
<horiguchi.kyotaro@lab.ntt.co.jp> wrote:
Umm. sorry,
If you have no problem with this, I'll send this to committer.
I just found that this patch already has a revewer. I've seen
only Status field in patch list..Should I leave this to you, Dean?
Sorry, I've been meaning to review this properly for some time, but
I've been swamped with other work, so I'm happy for you to take over.
My overall impression is that the patch is in good shape, and provides
valuable new functionality, and it is probably close to being ready
for committer.
I think that the only other behavioural glitch I spotted was that it
fails to catch one overflow case, which should generate an "out of
ranger" error:
select format('|%*s|', -2147483648, 'foo');
Result: |foo|
because -(-2147483648) = 0 in signed 32-bit integers.
Apart from that, I didn't find any problems during my testing.
Thanks for your review.
Regards,
Dean
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Hello
2013/2/28 Dean Rasheed <dean.a.rasheed@gmail.com>:
On 28 February 2013 11:25, Kyotaro HORIGUCHI
<horiguchi.kyotaro@lab.ntt.co.jp> wrote:Umm. sorry,
If you have no problem with this, I'll send this to committer.
I just found that this patch already has a revewer. I've seen
only Status field in patch list..Should I leave this to you, Dean?
Sorry, I've been meaning to review this properly for some time, but
I've been swamped with other work, so I'm happy for you to take over.My overall impression is that the patch is in good shape, and provides
valuable new functionality, and it is probably close to being ready
for committer.I think that the only other behavioural glitch I spotted was that it
fails to catch one overflow case, which should generate an "out of
ranger" error:select format('|%*s|', -2147483648, 'foo');
Result: |foo|because -(-2147483648) = 0 in signed 32-bit integers.
fixed - next other overflow check added
Regards
Pavel
Show quoted text
Apart from that, I didn't find any problems during my testing.
Thanks for your review.
Regards,
Dean
Attachments:
format-width-20130301.patchapplication/octet-stream; name=format-width-20130301.patchDownload
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
***************
*** 1519,1539 ****
<primary>format</primary>
</indexterm>
<literal><function>format</function>(<parameter>formatstr</parameter> <type>text</type>
! [, <parameter>str</parameter> <type>"any"</type> [, ...] ])</literal>
</entry>
<entry><type>text</type></entry>
<entry>
Format arguments according to a format string.
! This function is similar to the C function
! <function>sprintf</>, but only the following conversion specifications
! are recognized: <literal>%s</literal> interpolates the corresponding
! argument as a string; <literal>%I</literal> escapes its argument as
! an SQL identifier; <literal>%L</literal> escapes its argument as an
! SQL literal; <literal>%%</literal> outputs a literal <literal>%</>.
! A conversion can reference an explicit parameter position by preceding
! the conversion specifier with <literal><replaceable>n</>$</>, where
! <replaceable>n</replaceable> is the argument position.
! See also <xref linkend="plpgsql-quote-literal-example">.
</entry>
<entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
<entry><literal>Hello World, World</literal></entry>
--- 1519,1531 ----
<primary>format</primary>
</indexterm>
<literal><function>format</function>(<parameter>formatstr</parameter> <type>text</type>
! [, <parameter>formatarg</parameter> <type>"any"</type> [, ...] ])</literal>
</entry>
<entry><type>text</type></entry>
<entry>
Format arguments according to a format string.
! This function is similar to the C function <function>sprintf</>.
! See <xref linkend="functions-string-format">.
</entry>
<entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
<entry><literal>Hello World, World</literal></entry>
***************
*** 2847,2852 ****
--- 2839,3024 ----
</tgroup>
</table>
+ <sect2 id="functions-string-format">
+ <title><function>format</function></title>
+
+ <indexterm>
+ <primary>format</primary>
+ </indexterm>
+
+ <para>
+ The function <function>format</> produces formatted output according to
+ a format string in a similar way to the C function <function>sprintf</>.
+ </para>
+
+ <para>
+ <synopsis>
+ format(<parameter>formatstr</> <type>text</> [, <parameter>formatarg</> <type>"any"</> [, ...] ])
+ </synopsis>
+ <replaceable>formatstr</> is a format string that specifies how the
+ result should be formatted. Text in the format string is copied directly
+ to the result, except where <firstterm>format specifiers</> are used.
+ Format specifiers act as placeholders in the string, allowing subsequent
+ function arguments to be formatted and inserted into the result.
+ </para>
+
+ <para>
+ Format specifiers are introduced by a <literal>%</> character and take
+ the form
+ <synopsis>
+ %[<replaceable>parameter</>][<replaceable>flags</>][<replaceable>width</>]<replaceable>type</>
+ </synopsis>
+ <variablelist>
+ <varlistentry>
+ <term><replaceable>parameter</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ An expression of the form <literal><replaceable>n</>$</> where
+ <replaceable>n</> is the index of the argument to use for the format
+ specifier's value. An index of 1 means the first argument after
+ <replaceable>formatstr</>. If the <replaceable>parameter</> field is
+ omitted, the default is to use the next argument.
+ </para>
+ <screen>
+ SELECT format('Testing %s, %s, %s', 'one', 'two', 'three');
+ <lineannotation>Result: </><computeroutput>Testing one, two, three</>
+
+ SELECT format('Testing %3$s, %2$s, %1$s', 'one', 'two', 'three');
+ <lineannotation>Result: </><computeroutput>Testing three, two, one</>
+ </screen>
+
+ <para>
+ Note that unlike the C function <function>sprintf</> defined in the
+ Single UNIX Specification, the <function>format</> function in
+ <productname>PostgreSQL</> allows format specifiers with and without
+ explicit <replaceable>parameter</> fields to be mixed in the same
+ format string. A format specifier without a
+ <replaceable>parameter</> field always uses the next argument after
+ the last argument consumed. In addition, the
+ <productname>PostgreSQL</> <function>format</> function does not
+ require all function arguments to be referred to in the format
+ string.
+ </para>
+ <screen>
+ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
+ <lineannotation>Result: </><computeroutput>Testing three, two, three</>
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>flags</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ Additional options controlling how the format specifier's output is
+ formatted. Currently the only supported flag is an minus sign
+ (<literal>-</>) which will cause the format specifier's output to be
+ left-aligned. This has no effect unless the <replaceable>width</>
+ field is also specified.
+ </para>
+ <screen>
+ SELECT format('|%10s|%-10s|', 'foo', 'bar');
+ <lineannotation>Result: </><computeroutput>| foo|bar |</>
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>width</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ Specifies the <emphasis>minimum</> number of characters to use to
+ display the format specifier's output. The width may be specified
+ using any of the following: a positive integer; an asterisk
+ (<literal>*</>) to use the next function argument as the width; or an
+ expression of the form <literal>*<replaceable>n</>$</> to use the
+ <replaceable>n</>th function argument as the width.
+ </para>
+
+ <para>
+ If the width comes from a function argument, that argument is
+ consumed <emphasis>before</> the argument that is used for the format
+ specifier's value. If the width argument is negative, the result is
+ left aligned, as if the <literal>-</> flag had been specified.
+ </para>
+ <screen>
+ SELECT format('|%10s|', 'foo');
+ <lineannotation>Result: </><computeroutput>| foo|</>
+
+ SELECT format('|%*s|', 10, 'foo');
+ <lineannotation>Result: </><computeroutput>| foo|</>
+
+ SELECT format('|%*s|', -10, 'foo');
+ <lineannotation>Result: </><computeroutput>|foo |</>
+
+ SELECT format('|%-*s|', 10, 'foo');
+ <lineannotation>Result: </><computeroutput>|foo |</>
+
+ SELECT format('|%-*s|', -10, 'foo');
+ <lineannotation>Result: </><computeroutput>|foo |</>
+
+ SELECT format('|%*2$s|', 'foo', 10, 'bar');
+ <lineannotation>Result: </><computeroutput>| bar|</>
+
+ SELECT format('|%3$*2$s|', 'foo', 10, 'bar');
+ <lineannotation>Result: </><computeroutput>| bar|</>
+ </screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>type</replaceable> (required)</term>
+ <listitem>
+ <para>
+ The type of format conversion to use to produce the format
+ specifier's output. The following types are supported:
+ <itemizedlist>
+ <listitem>
+ <para>
+ <literal>s</literal> formats the argument value as a simple
+ string. A null value is treated as an empty string.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>I</literal> escapes the value as an SQL identifier. It
+ is an error for the value to be null.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>L</literal> escapes the value as an SQL literal. A null
+ value is displayed as the literal value <literal>NULL</>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ <screen>
+ SELECT format('Hello %s', 'World');
+ <lineannotation>Result: </lineannotation><computeroutput>Hello World</computeroutput>
+
+ SELECT format('DROP TABLE %I', 'Foo bar');
+ <lineannotation>Result: </lineannotation><computeroutput>DROP TABLE "Foo bar"</computeroutput>
+
+ SELECT format('SELECT %L', E'O\'Reilly');
+ <lineannotation>Result: </lineannotation><computeroutput>SELECT 'O''Reilly'</computeroutput>
+ </screen>
+
+ <para>
+ The <literal>%I</> and <literal>%L</> format specifiers may be used
+ to safely construct dynamic SQL statements. See
+ <xref linkend="plpgsql-quote-literal-example">.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </para>
+
+ <para>
+ In addition to the format specifiers above, the special escape sequence
+ <literal>%%</> may be used to output a literal <literal>%</> character.
+ </para>
+ </sect2>
</sect1>
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
***************
*** 78,84 **** static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
static void text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
const char *fldsep, const char *null_string);
--- 78,85 ----
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
static void text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull,
! int flags, int width);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
const char *fldsep, const char *null_string);
***************
*** 3996,4001 **** text_reverse(PG_FUNCTION_ARGS)
--- 3997,4133 ----
PG_RETURN_TEXT_P(result);
}
+ #define FORWARD_PARSE_POINT(ptr) \
+ do { \
+ if (++(ptr) >= (end_ptr)) \
+ ereport(ERROR, \
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
+ errmsg("unterminated conversion specifier"))); \
+ } while (0)
+
+ /*
+ * Parse congiguous digits into decimal number.
+ *
+ * Returns true if some digits could be parsed and *ptr moved to the next
+ * character to be parsed. The value is returned into *value.
+ */
+ static bool
+ text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
+ {
+ const char *cp = *ptr;
+ int wval = 0;
+ bool found;
+
+ /*
+ * continue, only when start_ptr is less than end_ptr.
+ * Overrun of cp is checked in FORWARD_PARSE_POINT.
+ */
+ while (*cp >= '0' && *cp <= '9')
+ {
+ int newnum = wval * 10 + (*cp - '0');
+
+ if (newnum / 10 != wval) /* overflow? */
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("number is out of range")));
+ wval = newnum;
+ FORWARD_PARSE_POINT(cp);
+ }
+
+ found = (cp > *ptr);
+ *value = wval;
+ *ptr = cp;
+
+ return found;
+ }
+
+ #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus in format string? */
+
+ #define SAMESIGN(a,b) (((a) < 0) == ((b) < 0))
+
+ /*
+ * parse format specification
+ * [argpos][flags][width]type
+ *
+ * Return values are,
+ * static const char * : Address to be parsed next.
+ * valarg : argument position for value to be printed. -1 means missing.
+ * widtharg : argument position for width. Zero means that argument position
+ * is not specified and -1 means missing.
+ * flags : flags
+ * width : the value for direct width specification, zero means that width
+ * is not specified.
+ */
+ static const char *
+ text_format_parse_format(const char *start_ptr, const char *end_ptr,
+ int *valarg, int *widtharg, int *flags, int *width)
+ {
+ const char *cp = start_ptr;
+ int n;
+
+ /* set defaults to out parameters */
+ *valarg = -1;
+ *widtharg = -1;
+ *flags = 0;
+ *width = 0;
+
+ /* try to identify first number */
+ if (text_format_parse_digits(&cp, end_ptr, &n))
+ {
+ if (*cp != '$')
+ {
+ *width = n; /* The number should be width */
+ return cp;
+ }
+ /* Explicit 0 for argument index is immediately refused */
+ if (n == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
+ *valarg = n; /* The number was argument position */
+ FORWARD_PARSE_POINT(cp);
+ }
+
+ /* Check for flags, only minus is supported now. */
+ while (*cp == '-')
+ {
+ *flags = *flags | TEXT_FORMAT_FLAG_MINUS;
+ FORWARD_PARSE_POINT(cp);
+ }
+
+ /* try to parse indirect width */
+ if (*cp == '*')
+ {
+ FORWARD_PARSE_POINT(cp);
+
+ if (text_format_parse_digits(&cp, end_ptr, &n)){
+ /* number in this position should be closed by $ */
+ if (*cp != '$')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unexpected char \"%c\".",*cp)));
+ FORWARD_PARSE_POINT(cp);
+
+ /* Explicit 0 for argument index is immediately refused */
+ if (n == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
+ *widtharg = n;
+ }
+ else
+ *widtharg = 0; /* 0 means argument position is not specified */
+
+ return cp;
+ }
+
+ /* last possible number - width */
+ if (text_format_parse_digits(&cp, end_ptr, &n))
+ *width = n;
+
+ return cp;
+ }
+
/*
* Returns a formated string
*/
***************
*** 4016,4021 **** text_format(PG_FUNCTION_ARGS)
--- 4148,4155 ----
Oid element_type = InvalidOid;
Oid prev_type = InvalidOid;
FmgrInfo typoutputfinfo;
+ FmgrInfo typoutputinfo_width;
+ Oid prev_type_width = InvalidOid;
/* When format string is null, returns null */
if (PG_ARGISNULL(0))
***************
*** 4077,4083 **** text_format(PG_FUNCTION_ARGS)
}
/* Setup for main loop. */
! fmt = PG_GETARG_TEXT_PP(0);
start_ptr = VARDATA_ANY(fmt);
end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
initStringInfo(&str);
--- 4211,4217 ----
}
/* Setup for main loop. */
! fmt = PG_GETARG_TEXT_PP(arg++);
start_ptr = VARDATA_ANY(fmt);
end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
initStringInfo(&str);
***************
*** 4085,4093 **** text_format(PG_FUNCTION_ARGS)
/* Scan format string, looking for conversion specifiers. */
for (cp = start_ptr; cp < end_ptr; cp++)
{
! Datum value;
! bool isNull;
! Oid typid;
/*
* If it's not the start of a conversion specifier, just copy it to
--- 4219,4231 ----
/* Scan format string, looking for conversion specifiers. */
for (cp = start_ptr; cp < end_ptr; cp++)
{
! Datum value;
! bool isNull;
! Oid typid;
! int valarg;
! int widtharg;
! int flags;
! int width;
/*
* If it's not the start of a conversion specifier, just copy it to
***************
*** 4099,4109 **** text_format(PG_FUNCTION_ARGS)
continue;
}
! /* Did we run off the end of the string? */
! if (++cp >= end_ptr)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("unterminated conversion specifier")));
/* Easy case: %% outputs a single % */
if (*cp == '%')
--- 4237,4243 ----
continue;
}
! FORWARD_PARSE_POINT(cp);
/* Easy case: %% outputs a single % */
if (*cp == '%')
***************
*** 4112,4184 **** text_format(PG_FUNCTION_ARGS)
continue;
}
! /*
! * If the user hasn't specified an argument position, we just advance
! * to the next one. If they have, we must parse it.
! */
! if (*cp < '0' || *cp > '9')
{
! ++arg;
! if (arg <= 0) /* overflow? */
! {
! /*
! * Should not happen, as you can't pass billions of arguments
! * to a function, but better safe than sorry.
! */
ereport(ERROR,
! (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
! errmsg("argument number is out of range")));
! }
! }
! else
! {
! bool unterminated = false;
! /* Parse digit string. */
! arg = 0;
! do
{
! int newarg = arg * 10 + (*cp - '0');
! if (newarg / 10 != arg) /* overflow? */
! ereport(ERROR,
! (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
! errmsg("argument number is out of range")));
! arg = newarg;
! ++cp;
! } while (cp < end_ptr && *cp >= '0' && *cp <= '9');
/*
! * If we ran off the end, or if there's not a $ next, or if the $
! * is the last character, the conversion specifier is improperly
! * terminated.
*/
! if (cp == end_ptr || *cp != '$')
! unterminated = true;
else
{
! ++cp;
! if (cp == end_ptr)
! unterminated = true;
}
! if (unterminated)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("unterminated conversion specifier")));
! /* There's no argument 0. */
! if (arg == 0)
ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
}
! /* Not enough arguments? Deduct 1 to avoid counting format string. */
! if (arg > nargs - 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("too few arguments for format")));
!
/* Get the value and type of the selected argument */
if (!funcvariadic)
{
--- 4246,4338 ----
continue;
}
! cp = text_format_parse_format(cp, end_ptr,
! &valarg, &widtharg, &flags, &width);
!
! if (widtharg >= 0)
{
! if (widtharg > 0)
! /* be consistent, move ordered argument together with
! * positional */
! arg = widtharg;
!
! if (arg >= nargs)
ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("too few arguments for format")));
! if (!funcvariadic)
{
! value = PG_GETARG_DATUM(arg);
! isNull = PG_ARGISNULL(arg);
! typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
! }
! else
! {
! value = elements[arg - 1];
! isNull = nulls[arg - 1];
! typid = element_type;
! }
! if (!OidIsValid(typid))
! elog(ERROR, "could not determine data type of format() input");
! arg++;
/*
! * we don't need to different between NULL and zero in this moment,
! * NULL means ignore this width - same as zero.
*/
! if (isNull)
! width = 0;
! else if (typid == INT4OID)
! width = DatumGetInt32(value);
! else if (typid == INT2OID)
! width = DatumGetInt16(value);
else
{
! char *str;
!
! /* simple IO cast to int */
! if (typid != prev_type_width)
! {
! Oid typoutputfunc;
! bool typIsVarlena;
!
! getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
! fmgr_info(typoutputfunc, &typoutputinfo_width);
! prev_type_width = typid;
! }
!
! /* Stringify. */
! str = OutputFunctionCall(&typoutputinfo_width, value);
!
! /* get int value */
! width = pg_atoi(str, sizeof(int32), '\0');
! pfree(str);
}
! }
! /* Overflow check */
! if (width != 0)
! {
! int32 _width = -width;
!
! if (SAMESIGN(width, _width))
ereport(ERROR,
! (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
! errmsg("number is out of range")));
}
! if (valarg >= 0)
! /* be consistent, move ordered argument together with
! * positional */
! arg = valarg;
!
! if (arg >= nargs)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("too few arguments for format")));
!
/* Get the value and type of the selected argument */
if (!funcvariadic)
{
***************
*** 4195,4200 **** text_format(PG_FUNCTION_ARGS)
--- 4349,4356 ----
if (!OidIsValid(typid))
elog(ERROR, "could not determine data type of format() input");
+ arg++;
+
/*
* Get the appropriate typOutput function, reusing previous one if
* same type as previous argument. That's particularly useful in the
***************
*** 4221,4227 **** text_format(PG_FUNCTION_ARGS)
case 'I':
case 'L':
text_format_string_conversion(&str, *cp, &typoutputfinfo,
! value, isNull);
break;
default:
ereport(ERROR,
--- 4377,4384 ----
case 'I':
case 'L':
text_format_string_conversion(&str, *cp, &typoutputfinfo,
! value, isNull,
! flags, width);
break;
default:
ereport(ERROR,
***************
*** 4244,4288 **** text_format(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(result);
}
/* Format a %s, %I, or %L conversion. */
static void
text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull)
{
char *str;
- /* Handle NULL arguments before trying to stringify the value. */
if (isNull)
{
! if (conversion == 'L')
! appendStringInfoString(buf, "NULL");
else if (conversion == 'I')
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("null values cannot be formatted as an SQL identifier")));
return;
}
- /* Stringify. */
str = OutputFunctionCall(typOutputInfo, value);
/* Escape. */
if (conversion == 'I')
{
/* quote_identifier may or may not allocate a new string. */
! appendStringInfoString(buf, quote_identifier(str));
}
else if (conversion == 'L')
! {
! char *qstr = quote_literal_cstr(str);
! appendStringInfoString(buf, qstr);
/* quote_literal_cstr() always allocates a new string */
pfree(qstr);
}
else
! appendStringInfoString(buf, str);
/* Cleanup. */
pfree(str);
--- 4401,4490 ----
PG_RETURN_TEXT_P(result);
}
+ /*
+ * Add spaces on begin or on end when it is necessary
+ */
+ static void
+ text_format_append_string(StringInfo buf, const char *str,
+ int flags, int width)
+ {
+ bool align_to_left = false;
+ int len;
+
+ /* fast path */
+ if (width == 0)
+ {
+ appendStringInfoString(buf, str);
+ return;
+ }
+ else if (width < 0 || (flags & TEXT_FORMAT_FLAG_MINUS))
+ {
+ align_to_left = true;
+ if (width < 0)
+ width = -width;
+ }
+
+ len = pg_mbstrlen(str);
+ if (align_to_left)
+ {
+ appendStringInfoString(buf, str);
+ if (len < width)
+ appendStringInfoSpaces(buf, width - len);
+ }
+ else
+ {
+ /* align_to_right */
+ if (len < width)
+ appendStringInfoSpaces(buf, width - len);
+ appendStringInfoString(buf, str);
+ }
+ }
+
/* Format a %s, %I, or %L conversion. */
static void
text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
! Datum value, bool isNull,
! int flags, int width)
{
char *str;
if (isNull)
{
! if (conversion == 's')
! text_format_append_string(buf, "",
! flags, width);
! else if (conversion == 'L')
! text_format_append_string(buf, "NULL",
! flags, width);
else if (conversion == 'I')
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("null values cannot be formatted as an SQL identifier")));
+
return;
}
str = OutputFunctionCall(typOutputInfo, value);
/* Escape. */
if (conversion == 'I')
{
/* quote_identifier may or may not allocate a new string. */
! text_format_append_string(buf, quote_identifier(str),
! flags, width);
}
else if (conversion == 'L')
! { char *qstr = quote_literal_cstr(str);
! text_format_append_string(buf, qstr,
! flags, width);
/* quote_literal_cstr() always allocates a new string */
pfree(qstr);
}
else
! text_format_append_string(buf, str,
! flags, width);
/* Cleanup. */
pfree(str);
*** a/src/test/regress/expected/text.out
--- b/src/test/regress/expected/text.out
***************
*** 256,267 **** select format('%1$s %4$s', 1, 2, 3);
ERROR: too few arguments for format
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
select format('%1s', 1);
! ERROR: unterminated conversion specifier
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unrecognized conversion specifier "1"
-- check mix of positional and ordered placeholders
select format('Hello %s %1$s %s', 'World', 'Hello again');
format
--- 256,275 ----
ERROR: too few arguments for format
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
+ select format('%0$s', 'Hello');
+ ERROR: conversion specifies argument 0, but arguments are numbered from 1
+ select format('%*0$s', 'Hello');
+ ERROR: conversion specifies argument 0, but arguments are numbered from 1
select format('%1s', 1);
! format
! --------
! 1
! (1 row)
!
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
! ERROR: unterminated conversion specifier
-- check mix of positional and ordered placeholders
select format('Hello %s %1$s %s', 'World', 'Hello again');
format
***************
*** 328,330 **** from generate_series(1,200) g(i);
--- 336,409 ----
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200
(1 row)
+ -- left, right align
+ select format('>>%10s<<', 'Hello')
+ union all
+ select format('>>%10s<<', NULL)
+ union all
+ select format('>>%10s<<', '')
+ union all
+ select format('>>%-10s<<', '')
+ union all
+ select format('>>%-10s<<', 'Hello')
+ union all
+ select format('>>%-10s<<', NULL)
+ union all
+ select format('>>%1$10s<<', 'Hello')
+ union all
+ select format('>>%1$-10I<<', 'Hello')
+ union all
+ select format('>>%2$*1$L<<', 10, 'Hello')
+ union all
+ select format('>>%2$*1$L<<', 10, NULL)
+ union all
+ select format('>>%2$*1$L<<', -10, NULL)
+ union all
+ select format('>>%*s<<', 10, 'Hello');
+ format
+ ----------------
+ >> Hello<<
+ >> <<
+ >> <<
+ >> <<
+ >>Hello <<
+ >> <<
+ >> Hello<<
+ >>"Hello" <<
+ >> 'Hello'<<
+ >> NULL<<
+ >>NULL <<
+ >> Hello<<
+ (12 rows)
+
+ select format('>>%*1$s<<', 10, 'Hello');
+ format
+ ----------------
+ >> Hello<<
+ (1 row)
+
+ select format('>>%-s<<', 'Hello');
+ format
+ -----------
+ >>Hello<<
+ (1 row)
+
+ -- NULL is not different to zero here
+ select format('>>%10L<<', NULL);
+ format
+ ----------------
+ >> NULL<<
+ (1 row)
+
+ select format('>>%2$*1$L<<', NULL, 'Hello');
+ format
+ -------------
+ >>'Hello'<<
+ (1 row)
+
+ select format('>>%2$*1$L<<', 0, 'Hello');
+ format
+ -------------
+ >>'Hello'<<
+ (1 row)
+
*** a/src/test/regress/sql/text.sql
--- b/src/test/regress/sql/text.sql
***************
*** 78,83 **** select format('%1$s %12$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
--- 78,85 ----
-- should fail
select format('%1$s %4$s', 1, 2, 3);
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+ select format('%0$s', 'Hello');
+ select format('%*0$s', 'Hello');
select format('%1s', 1);
select format('%1$', 1);
select format('%1$1', 1);
***************
*** 97,99 **** select format('Hello', variadic NULL);
--- 99,134 ----
-- variadic argument allows simulating more than FUNC_MAX_ARGS parameters
select format(string_agg('%s',','), variadic array_agg(i))
from generate_series(1,200) g(i);
+
+ -- left, right align
+ select format('>>%10s<<', 'Hello')
+ union all
+ select format('>>%10s<<', NULL)
+ union all
+ select format('>>%10s<<', '')
+ union all
+ select format('>>%-10s<<', '')
+ union all
+ select format('>>%-10s<<', 'Hello')
+ union all
+ select format('>>%-10s<<', NULL)
+ union all
+ select format('>>%1$10s<<', 'Hello')
+ union all
+ select format('>>%1$-10I<<', 'Hello')
+ union all
+ select format('>>%2$*1$L<<', 10, 'Hello')
+ union all
+ select format('>>%2$*1$L<<', 10, NULL)
+ union all
+ select format('>>%2$*1$L<<', -10, NULL)
+ union all
+ select format('>>%*s<<', 10, 'Hello');
+
+ select format('>>%*1$s<<', 10, 'Hello');
+ select format('>>%-s<<', 'Hello');
+
+ -- NULL is not different to zero here
+ select format('>>%10L<<', NULL);
+ select format('>>%2$*1$L<<', NULL, 'Hello');
+ select format('>>%2$*1$L<<', 0, 'Hello');
Hello,
I think that the only other behavioural glitch I spotted was that it
fails to catch one overflow case, which should generate an "out of
ranger" error:select format('|%*s|', -2147483648, 'foo');
Result: |foo|because -(-2147483648) = 0 in signed 32-bit integers.
Ouch. Thanks for pointing out.
fixed - next other overflow check added
Your change shown below seems assuming that the two's complement
of the most negative number in integer types is identical to
itself, and looks working as expected at least on
linux/x86_64. But C standard defines it as undefined, (As far as
I hear :-).
| if (width != 0)
| {
| int32 _width = -width;
|
| if (SAMESIGN(width, _width))
| ereport(ERROR,
Instead, I think we can deny it by simply comparing with
INT_MIN. I modified the patch like so and put some modifications
on styling.
Finally, enlargeStringInfo fails just after for large numbers. We
might should keep it under certain length to get rid of memory
exhaustion.
Anyway, I'll send this patch to committers as it is in this
message.
best wishes,
--
Kyotaro Horiguchi
NTT Open Source Software Center
Attachments:
format-width-20130305.patchtext/x-patch; charset=us-asciiDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 9b7e967..b2d2ed6 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1519,21 +1519,13 @@
<primary>format</primary>
</indexterm>
<literal><function>format</function>(<parameter>formatstr</parameter> <type>text</type>
- [, <parameter>str</parameter> <type>"any"</type> [, ...] ])</literal>
+ [, <parameter>formatarg</parameter> <type>"any"</type> [, ...] ])</literal>
</entry>
<entry><type>text</type></entry>
<entry>
Format arguments according to a format string.
- This function is similar to the C function
- <function>sprintf</>, but only the following conversion specifications
- are recognized: <literal>%s</literal> interpolates the corresponding
- argument as a string; <literal>%I</literal> escapes its argument as
- an SQL identifier; <literal>%L</literal> escapes its argument as an
- SQL literal; <literal>%%</literal> outputs a literal <literal>%</>.
- A conversion can reference an explicit parameter position by preceding
- the conversion specifier with <literal><replaceable>n</>$</>, where
- <replaceable>n</replaceable> is the argument position.
- See also <xref linkend="plpgsql-quote-literal-example">.
+ This function is similar to the C function <function>sprintf</>.
+ See <xref linkend="functions-string-format">.
</entry>
<entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
<entry><literal>Hello World, World</literal></entry>
@@ -2847,6 +2839,186 @@
</tgroup>
</table>
+ <sect2 id="functions-string-format">
+ <title><function>format</function></title>
+
+ <indexterm>
+ <primary>format</primary>
+ </indexterm>
+
+ <para>
+ The function <function>format</> produces formatted output according to
+ a format string in a similar way to the C function <function>sprintf</>.
+ </para>
+
+ <para>
+<synopsis>
+format(<parameter>formatstr</> <type>text</> [, <parameter>formatarg</> <type>"any"</> [, ...] ])
+</synopsis>
+ <replaceable>formatstr</> is a format string that specifies how the
+ result should be formatted. Text in the format string is copied directly
+ to the result, except where <firstterm>format specifiers</> are used.
+ Format specifiers act as placeholders in the string, allowing subsequent
+ function arguments to be formatted and inserted into the result.
+ </para>
+
+ <para>
+ Format specifiers are introduced by a <literal>%</> character and take
+ the form
+<synopsis>
+%[<replaceable>parameter</>][<replaceable>flags</>][<replaceable>width</>]<replaceable>type</>
+</synopsis>
+ <variablelist>
+ <varlistentry>
+ <term><replaceable>parameter</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ An expression of the form <literal><replaceable>n</>$</> where
+ <replaceable>n</> is the index of the argument to use for the format
+ specifier's value. An index of 1 means the first argument after
+ <replaceable>formatstr</>. If the <replaceable>parameter</> field is
+ omitted, the default is to use the next argument.
+ </para>
+<screen>
+SELECT format('Testing %s, %s, %s', 'one', 'two', 'three');
+<lineannotation>Result: </><computeroutput>Testing one, two, three</>
+
+SELECT format('Testing %3$s, %2$s, %1$s', 'one', 'two', 'three');
+<lineannotation>Result: </><computeroutput>Testing three, two, one</>
+</screen>
+
+ <para>
+ Note that unlike the C function <function>sprintf</> defined in the
+ Single UNIX Specification, the <function>format</> function in
+ <productname>PostgreSQL</> allows format specifiers with and without
+ explicit <replaceable>parameter</> fields to be mixed in the same
+ format string. A format specifier without a
+ <replaceable>parameter</> field always uses the next argument after
+ the last argument consumed. In addition, the
+ <productname>PostgreSQL</> <function>format</> function does not
+ require all function arguments to be referred to in the format
+ string.
+ </para>
+<screen>
+SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
+<lineannotation>Result: </><computeroutput>Testing three, two, three</>
+</screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>flags</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ Additional options controlling how the format specifier's output is
+ formatted. Currently the only supported flag is an minus sign
+ (<literal>-</>) which will cause the format specifier's output to be
+ left-aligned. This has no effect unless the <replaceable>width</>
+ field is also specified.
+ </para>
+<screen>
+SELECT format('|%10s|%-10s|', 'foo', 'bar');
+<lineannotation>Result: </><computeroutput>| foo|bar |</>
+</screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>width</replaceable> (optional)</term>
+ <listitem>
+ <para>
+ Specifies the <emphasis>minimum</> number of characters to use to
+ display the format specifier's output. The width may be specified
+ using any of the following: a positive integer; an asterisk
+ (<literal>*</>) to use the next function argument as the width; or an
+ expression of the form <literal>*<replaceable>n</>$</> to use the
+ <replaceable>n</>th function argument as the width.
+ </para>
+
+ <para>
+ If the width comes from a function argument, that argument is
+ consumed <emphasis>before</> the argument that is used for the format
+ specifier's value. If the width argument is negative, the result is
+ left aligned, as if the <literal>-</> flag had been specified.
+ </para>
+<screen>
+SELECT format('|%10s|', 'foo');
+<lineannotation>Result: </><computeroutput>| foo|</>
+
+SELECT format('|%*s|', 10, 'foo');
+<lineannotation>Result: </><computeroutput>| foo|</>
+
+SELECT format('|%*s|', -10, 'foo');
+<lineannotation>Result: </><computeroutput>|foo |</>
+
+SELECT format('|%-*s|', 10, 'foo');
+<lineannotation>Result: </><computeroutput>|foo |</>
+
+SELECT format('|%-*s|', -10, 'foo');
+<lineannotation>Result: </><computeroutput>|foo |</>
+
+SELECT format('|%*2$s|', 'foo', 10, 'bar');
+<lineannotation>Result: </><computeroutput>| bar|</>
+
+SELECT format('|%3$*2$s|', 'foo', 10, 'bar');
+<lineannotation>Result: </><computeroutput>| bar|</>
+</screen>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable>type</replaceable> (required)</term>
+ <listitem>
+ <para>
+ The type of format conversion to use to produce the format
+ specifier's output. The following types are supported:
+ <itemizedlist>
+ <listitem>
+ <para>
+ <literal>s</literal> formats the argument value as a simple
+ string. A null value is treated as an empty string.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>I</literal> escapes the value as an SQL identifier. It
+ is an error for the value to be null.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>L</literal> escapes the value as an SQL literal. A null
+ value is displayed as the literal value <literal>NULL</>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+<screen>
+SELECT format('Hello %s', 'World');
+<lineannotation>Result: </lineannotation><computeroutput>Hello World</computeroutput>
+
+SELECT format('DROP TABLE %I', 'Foo bar');
+<lineannotation>Result: </lineannotation><computeroutput>DROP TABLE "Foo bar"</computeroutput>
+
+SELECT format('SELECT %L', E'O\'Reilly');
+<lineannotation>Result: </lineannotation><computeroutput>SELECT 'O''Reilly'</computeroutput>
+</screen>
+
+ <para>
+ The <literal>%I</> and <literal>%L</> format specifiers may be used
+ to safely construct dynamic SQL statements. See
+ <xref linkend="plpgsql-quote-literal-example">.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </para>
+
+ <para>
+ In addition to the format specifiers above, the special escape sequence
+ <literal>%%</> may be used to output a literal <literal>%</> character.
+ </para>
+ </sect2>
</sect1>
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index e69b7dd..19b8049 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -78,7 +78,8 @@ static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
static void text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
- Datum value, bool isNull);
+ Datum value, bool isNull,
+ int flags, int width);
static Datum text_to_array_internal(PG_FUNCTION_ARGS);
static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
const char *fldsep, const char *null_string);
@@ -3996,6 +3997,135 @@ text_reverse(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(result);
}
+#define FORWARD_PARSE_POINT(ptr) \
+do { \
+ if (++(ptr) >= (end_ptr)) \
+ ereport(ERROR, \
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
+ errmsg("unterminated conversion specifier"))); \
+} while (0)
+
+/*
+ * Parse congiguous digits into decimal number.
+ *
+ * Returns true if some digits could be parsed and *ptr moved to the next
+ * character to be parsed. The value is returned into *value.
+ */
+static bool
+text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
+{
+ const char *cp = *ptr;
+ int wval = 0;
+ bool found;
+
+ /*
+ * continue, only when start_ptr is less than end_ptr.
+ * Overrun of cp is checked in FORWARD_PARSE_POINT.
+ */
+ while (*cp >= '0' && *cp <= '9')
+ {
+ int newnum = wval * 10 + (*cp - '0');
+
+ if (newnum / 10 != wval) /* overflow? */
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("number is out of range")));
+ wval = newnum;
+ FORWARD_PARSE_POINT(cp);
+ }
+
+ found = (cp > *ptr);
+ *value = wval;
+ *ptr = cp;
+
+ return found;
+}
+
+#define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus in format string? */
+
+/*
+ * parse format specification
+ * [argpos][flags][width]type
+ *
+ * Return values are,
+ * static const char * : Address to be parsed next.
+ * valarg : argument position for value to be printed. -1 means missing.
+ * widtharg : argument position for width. Zero means that argument position
+ * is not specified and -1 means missing.
+ * flags : flags
+ * width : the value for direct width specification, zero means that width
+ * is not specified.
+ */
+static const char *
+text_format_parse_format(const char *start_ptr, const char *end_ptr,
+ int *valarg, int *widtharg, int *flags, int *width)
+{
+ const char *cp = start_ptr;
+ int n;
+
+ /* set defaults to out parameters */
+ *valarg = -1;
+ *widtharg = -1;
+ *flags = 0;
+ *width = 0;
+
+ /* try to identify first number */
+ if (text_format_parse_digits(&cp, end_ptr, &n))
+ {
+ if (*cp != '$')
+ {
+ *width = n; /* The number should be width */
+ return cp;
+ }
+ /* Explicit 0 for argument index is immediately refused */
+ if (n == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
+ *valarg = n; /* The number was argument position */
+ FORWARD_PARSE_POINT(cp);
+ }
+
+ /* Check for flags, only minus is supported now. */
+ while (*cp == '-')
+ {
+ *flags = *flags | TEXT_FORMAT_FLAG_MINUS;
+ FORWARD_PARSE_POINT(cp);
+ }
+
+ /* try to parse indirect width */
+ if (*cp == '*')
+ {
+ FORWARD_PARSE_POINT(cp);
+
+ if (text_format_parse_digits(&cp, end_ptr, &n)){
+ /* number in this position should be closed by $ */
+ if (*cp != '$')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unexpected char \"%c\".",*cp)));
+ FORWARD_PARSE_POINT(cp);
+
+ /* Explicit 0 for argument index is immediately refused */
+ if (n == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
+ *widtharg = n;
+ }
+ else
+ *widtharg = 0; /* 0 means argument position is not specified */
+
+ return cp;
+ }
+
+ /* last possible number - width */
+ if (text_format_parse_digits(&cp, end_ptr, &n))
+ *width = n;
+
+ return cp;
+}
+
/*
* Returns a formated string
*/
@@ -4016,6 +4146,8 @@ text_format(PG_FUNCTION_ARGS)
Oid element_type = InvalidOid;
Oid prev_type = InvalidOid;
FmgrInfo typoutputfinfo;
+ FmgrInfo typoutputinfo_width;
+ Oid prev_type_width = InvalidOid;
/* When format string is null, returns null */
if (PG_ARGISNULL(0))
@@ -4077,7 +4209,7 @@ text_format(PG_FUNCTION_ARGS)
}
/* Setup for main loop. */
- fmt = PG_GETARG_TEXT_PP(0);
+ fmt = PG_GETARG_TEXT_PP(arg++);
start_ptr = VARDATA_ANY(fmt);
end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
initStringInfo(&str);
@@ -4088,6 +4220,10 @@ text_format(PG_FUNCTION_ARGS)
Datum value;
bool isNull;
Oid typid;
+ int valarg;
+ int widtharg;
+ int flags;
+ int width;
/*
* If it's not the start of a conversion specifier, just copy it to
@@ -4099,11 +4235,7 @@ text_format(PG_FUNCTION_ARGS)
continue;
}
- /* Did we run off the end of the string? */
- if (++cp >= end_ptr)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("unterminated conversion specifier")));
+ FORWARD_PARSE_POINT(cp);
/* Easy case: %% outputs a single % */
if (*cp == '%')
@@ -4112,69 +4244,84 @@ text_format(PG_FUNCTION_ARGS)
continue;
}
- /*
- * If the user hasn't specified an argument position, we just advance
- * to the next one. If they have, we must parse it.
- */
- if (*cp < '0' || *cp > '9')
+ cp = text_format_parse_format(cp, end_ptr,
+ &valarg, &widtharg, &flags, &width);
+
+ if (widtharg >= 0)
{
- ++arg;
- if (arg <= 0) /* overflow? */
- {
- /*
- * Should not happen, as you can't pass billions of arguments
- * to a function, but better safe than sorry.
- */
+ if (widtharg > 0)
+ /* be consistent, move ordered argument together with
+ * positional */
+ arg = widtharg;
+
+ if (arg >= nargs)
ereport(ERROR,
- (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("argument number is out of range")));
- }
- }
- else
- {
- bool unterminated = false;
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("too few arguments for format")));
- /* Parse digit string. */
- arg = 0;
- do
+ if (!funcvariadic)
{
- int newarg = arg * 10 + (*cp - '0');
+ value = PG_GETARG_DATUM(arg);
+ isNull = PG_ARGISNULL(arg);
+ typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
+ }
+ else
+ {
+ value = elements[arg - 1];
+ isNull = nulls[arg - 1];
+ typid = element_type;
+ }
+ if (!OidIsValid(typid))
+ elog(ERROR, "could not determine data type of format() input");
- if (newarg / 10 != arg) /* overflow? */
- ereport(ERROR,
- (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("argument number is out of range")));
- arg = newarg;
- ++cp;
- } while (cp < end_ptr && *cp >= '0' && *cp <= '9');
+ arg++;
/*
- * If we ran off the end, or if there's not a $ next, or if the $
- * is the last character, the conversion specifier is improperly
- * terminated.
+ * we don't need to different between NULL and zero in this moment,
+ * NULL means ignore this width - same as zero.
*/
- if (cp == end_ptr || *cp != '$')
- unterminated = true;
+ if (isNull)
+ width = 0;
+ else if (typid == INT4OID)
+ width = DatumGetInt32(value);
+ else if (typid == INT2OID)
+ width = DatumGetInt16(value);
else
{
- ++cp;
- if (cp == end_ptr)
- unterminated = true;
- }
- if (unterminated)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("unterminated conversion specifier")));
+ char *str;
- /* There's no argument 0. */
- if (arg == 0)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
+ /* simple IO cast to int */
+ if (typid != prev_type_width)
+ {
+ Oid typoutputfunc;
+ bool typIsVarlena;
+
+ getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
+ fmgr_info(typoutputfunc, &typoutputinfo_width);
+ prev_type_width = typid;
+ }
+
+ /* Stringify. */
+ str = OutputFunctionCall(&typoutputinfo_width, value);
+
+ /* get int value */
+ width = pg_atoi(str, sizeof(int32), '\0');
+ pfree(str);
+ }
}
- /* Not enough arguments? Deduct 1 to avoid counting format string. */
- if (arg > nargs - 1)
+ /* We calculate -width later but -INT_MIN is undefined for int. */
+ if (width <= INT_MIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("number is out of range")));
+
+ if (valarg >= 0)
+ /* be consistent, move ordered argument together with
+ * positional */
+ arg = valarg;
+
+ if (arg >= nargs)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("too few arguments for format")));
@@ -4195,6 +4342,8 @@ text_format(PG_FUNCTION_ARGS)
if (!OidIsValid(typid))
elog(ERROR, "could not determine data type of format() input");
+ arg++;
+
/*
* Get the appropriate typOutput function, reusing previous one if
* same type as previous argument. That's particularly useful in the
@@ -4221,7 +4370,7 @@ text_format(PG_FUNCTION_ARGS)
case 'I':
case 'L':
text_format_string_conversion(&str, *cp, &typoutputfinfo,
- value, isNull);
+ value, isNull, flags, width);
break;
default:
ereport(ERROR,
@@ -4244,23 +4393,65 @@ text_format(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(result);
}
+/*
+ * Add spaces on begin or on end when it is necessary
+ */
+static void
+text_format_append_string(StringInfo buf, const char *str,
+ int flags, int width)
+{
+ bool align_to_left = false;
+ int len;
+
+ /* fast path */
+ if (width == 0)
+ {
+ appendStringInfoString(buf, str);
+ return;
+ }
+ else if (width < 0 || (flags & TEXT_FORMAT_FLAG_MINUS))
+ {
+ align_to_left = true;
+ if (width < 0)
+ width = -width;
+ }
+
+ len = pg_mbstrlen(str);
+ if (align_to_left)
+ {
+ appendStringInfoString(buf, str);
+ if (len < width)
+ appendStringInfoSpaces(buf, width - len);
+ }
+ else
+ {
+ /* align_to_right */
+ if (len < width)
+ appendStringInfoSpaces(buf, width - len);
+ appendStringInfoString(buf, str);
+ }
+}
+
/* Format a %s, %I, or %L conversion. */
static void
text_format_string_conversion(StringInfo buf, char conversion,
FmgrInfo *typOutputInfo,
- Datum value, bool isNull)
+ Datum value, bool isNull,
+ int flags, int width)
{
char *str;
- /* Handle NULL arguments before trying to stringify the value. */
if (isNull)
{
- if (conversion == 'L')
- appendStringInfoString(buf, "NULL");
+ if (conversion == 's')
+ text_format_append_string(buf, "", flags, width);
+ else if (conversion == 'L')
+ text_format_append_string(buf, "NULL", flags, width);
else if (conversion == 'I')
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("null values cannot be formatted as an SQL identifier")));
+
return;
}
@@ -4271,18 +4462,18 @@ text_format_string_conversion(StringInfo buf, char conversion,
if (conversion == 'I')
{
/* quote_identifier may or may not allocate a new string. */
- appendStringInfoString(buf, quote_identifier(str));
+ text_format_append_string(buf, quote_identifier(str), flags, width);
}
else if (conversion == 'L')
{
char *qstr = quote_literal_cstr(str);
- appendStringInfoString(buf, qstr);
+ text_format_append_string(buf, qstr, flags, width);
/* quote_literal_cstr() always allocates a new string */
pfree(qstr);
}
else
- appendStringInfoString(buf, str);
+ text_format_append_string(buf, str, flags, width);
/* Cleanup. */
pfree(str);
diff --git a/src/test/regress/expected/text.out b/src/test/regress/expected/text.out
index b756583..e05a1e5 100644
--- a/src/test/regress/expected/text.out
+++ b/src/test/regress/expected/text.out
@@ -256,12 +256,20 @@ select format('%1$s %4$s', 1, 2, 3);
ERROR: too few arguments for format
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
ERROR: too few arguments for format
+select format('%0$s', 'Hello');
+ERROR: conversion specifies argument 0, but arguments are numbered from 1
+select format('%*0$s', 'Hello');
+ERROR: conversion specifies argument 0, but arguments are numbered from 1
select format('%1s', 1);
-ERROR: unterminated conversion specifier
+ format
+--------
+ 1
+(1 row)
+
select format('%1$', 1);
ERROR: unterminated conversion specifier
select format('%1$1', 1);
-ERROR: unrecognized conversion specifier "1"
+ERROR: unterminated conversion specifier
-- check mix of positional and ordered placeholders
select format('Hello %s %1$s %s', 'World', 'Hello again');
format
@@ -328,3 +336,74 @@ from generate_series(1,200) g(i);
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200
(1 row)
+-- left, right align
+select format('>>%10s<<', 'Hello')
+union all
+select format('>>%10s<<', NULL)
+union all
+select format('>>%10s<<', '')
+union all
+select format('>>%-10s<<', '')
+union all
+select format('>>%-10s<<', 'Hello')
+union all
+select format('>>%-10s<<', NULL)
+union all
+select format('>>%1$10s<<', 'Hello')
+union all
+select format('>>%1$-10I<<', 'Hello')
+union all
+select format('>>%2$*1$L<<', 10, 'Hello')
+union all
+select format('>>%2$*1$L<<', 10, NULL)
+union all
+select format('>>%2$*1$L<<', -10, NULL)
+union all
+select format('>>%*s<<', 10, 'Hello');
+ format
+----------------
+ >> Hello<<
+ >> <<
+ >> <<
+ >> <<
+ >>Hello <<
+ >> <<
+ >> Hello<<
+ >>"Hello" <<
+ >> 'Hello'<<
+ >> NULL<<
+ >>NULL <<
+ >> Hello<<
+(12 rows)
+
+select format('>>%*1$s<<', 10, 'Hello');
+ format
+----------------
+ >> Hello<<
+(1 row)
+
+select format('>>%-s<<', 'Hello');
+ format
+-----------
+ >>Hello<<
+(1 row)
+
+-- NULL is not different to zero here
+select format('>>%10L<<', NULL);
+ format
+----------------
+ >> NULL<<
+(1 row)
+
+select format('>>%2$*1$L<<', NULL, 'Hello');
+ format
+-------------
+ >>'Hello'<<
+(1 row)
+
+select format('>>%2$*1$L<<', 0, 'Hello');
+ format
+-------------
+ >>'Hello'<<
+(1 row)
+
diff --git a/src/test/regress/sql/text.sql b/src/test/regress/sql/text.sql
index a96e9f7..1c68754 100644
--- a/src/test/regress/sql/text.sql
+++ b/src/test/regress/sql/text.sql
@@ -78,6 +78,8 @@ select format('%1$s %12$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
-- should fail
select format('%1$s %4$s', 1, 2, 3);
select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+select format('%0$s', 'Hello');
+select format('%*0$s', 'Hello');
select format('%1s', 1);
select format('%1$', 1);
select format('%1$1', 1);
@@ -97,3 +99,36 @@ select format('Hello', variadic NULL);
-- variadic argument allows simulating more than FUNC_MAX_ARGS parameters
select format(string_agg('%s',','), variadic array_agg(i))
from generate_series(1,200) g(i);
+
+-- left, right align
+select format('>>%10s<<', 'Hello')
+union all
+select format('>>%10s<<', NULL)
+union all
+select format('>>%10s<<', '')
+union all
+select format('>>%-10s<<', '')
+union all
+select format('>>%-10s<<', 'Hello')
+union all
+select format('>>%-10s<<', NULL)
+union all
+select format('>>%1$10s<<', 'Hello')
+union all
+select format('>>%1$-10I<<', 'Hello')
+union all
+select format('>>%2$*1$L<<', 10, 'Hello')
+union all
+select format('>>%2$*1$L<<', 10, NULL)
+union all
+select format('>>%2$*1$L<<', -10, NULL)
+union all
+select format('>>%*s<<', 10, 'Hello');
+
+select format('>>%*1$s<<', 10, 'Hello');
+select format('>>%-s<<', 'Hello');
+
+-- NULL is not different to zero here
+select format('>>%10L<<', NULL);
+select format('>>%2$*1$L<<', NULL, 'Hello');
+select format('>>%2$*1$L<<', 0, 'Hello');
2013/3/5 Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>:
Hello,
I think that the only other behavioural glitch I spotted was that it
fails to catch one overflow case, which should generate an "out of
ranger" error:select format('|%*s|', -2147483648, 'foo');
Result: |foo|because -(-2147483648) = 0 in signed 32-bit integers.
Ouch. Thanks for pointing out.
fixed - next other overflow check added
Your change shown below seems assuming that the two's complement
of the most negative number in integer types is identical to
itself, and looks working as expected at least on
linux/x86_64. But C standard defines it as undefined, (As far as
I hear :-).| if (width != 0)
| {
| int32 _width = -width;
|
| if (SAMESIGN(width, _width))
| ereport(ERROR,
this pattern was used elsewhere in pg
Instead, I think we can deny it by simply comparing with
INT_MIN. I modified the patch like so and put some modifications
on styling.
ook - I have not enough expirience with this topic and I cannot say
what is preferred.
Finally, enlargeStringInfo fails just after for large numbers. We
might should keep it under certain length to get rid of memory
exhaustion.
I though about it, but I don't know a correct value - probably any
width specification higher 1MB will be bogus and can be blocked ?? Our
VARLENA max size is 1GB so with should not be higher than 1GB ever.
what do you thinking about these limits?
Regards
Pavel
Anyway, I'll send this patch to committers as it is in this
message.best wishes,
--
Kyotaro Horiguchi
NTT Open Source Software Center
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 5 March 2013 13:46, Pavel Stehule <pavel.stehule@gmail.com> wrote:
2013/3/5 Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>:
Hello,
I think that the only other behavioural glitch I spotted was that it
fails to catch one overflow case, which should generate an "out of
ranger" error:select format('|%*s|', -2147483648, 'foo');
Result: |foo|because -(-2147483648) = 0 in signed 32-bit integers.
Ouch. Thanks for pointing out.
fixed - next other overflow check added
Your change shown below seems assuming that the two's complement
of the most negative number in integer types is identical to
itself, and looks working as expected at least on
linux/x86_64. But C standard defines it as undefined, (As far as
I hear :-).| if (width != 0)
| {
| int32 _width = -width;
|
| if (SAMESIGN(width, _width))
| ereport(ERROR,this pattern was used elsewhere in pg
Instead, I think we can deny it by simply comparing with
INT_MIN. I modified the patch like so and put some modifications
on styling.ook - I have not enough expirience with this topic and I cannot say
what is preferred.Finally, enlargeStringInfo fails just after for large numbers. We
might should keep it under certain length to get rid of memory
exhaustion.I though about it, but I don't know a correct value - probably any
width specification higher 1MB will be bogus and can be blocked ?? Our
VARLENA max size is 1GB so with should not be higher than 1GB ever.what do you thinking about these limits?
I think it's fine as it is.
It's no different from repeat() for example. We allow repeat('a',
1000000000) so allowing format('%1000000000s', '') seems reasonable,
although probably not very useful.
Upping either beyond 1GB generates an out of memory error, which also
seems reasonable -- I can't imagine why you would want such a long
string.
Regards,
Dean
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
2013/3/5 Dean Rasheed <dean.a.rasheed@gmail.com>:
On 5 March 2013 13:46, Pavel Stehule <pavel.stehule@gmail.com> wrote:
2013/3/5 Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>:
Hello,
I think that the only other behavioural glitch I spotted was that it
fails to catch one overflow case, which should generate an "out of
ranger" error:select format('|%*s|', -2147483648, 'foo');
Result: |foo|because -(-2147483648) = 0 in signed 32-bit integers.
Ouch. Thanks for pointing out.
fixed - next other overflow check added
Your change shown below seems assuming that the two's complement
of the most negative number in integer types is identical to
itself, and looks working as expected at least on
linux/x86_64. But C standard defines it as undefined, (As far as
I hear :-).| if (width != 0)
| {
| int32 _width = -width;
|
| if (SAMESIGN(width, _width))
| ereport(ERROR,this pattern was used elsewhere in pg
Instead, I think we can deny it by simply comparing with
INT_MIN. I modified the patch like so and put some modifications
on styling.ook - I have not enough expirience with this topic and I cannot say
what is preferred.Finally, enlargeStringInfo fails just after for large numbers. We
might should keep it under certain length to get rid of memory
exhaustion.I though about it, but I don't know a correct value - probably any
width specification higher 1MB will be bogus and can be blocked ?? Our
VARLENA max size is 1GB so with should not be higher than 1GB ever.what do you thinking about these limits?
I think it's fine as it is.
It's no different from repeat() for example. We allow repeat('a',
1000000000) so allowing format('%1000000000s', '') seems reasonable,
although probably not very useful.Upping either beyond 1GB generates an out of memory error, which also
seems reasonable -- I can't imagine why you would want such a long
string.Regards,
Dean
ok
Pavel
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Kyotaro HORIGUCHI escribió:
Umm. sorry,
If you have no problem with this, I'll send this to committer.
I just found that this patch already has a revewer. I've seen
only Status field in patch list..
Patches can be reviewed by more than one people. It's particularly
useful if they have different things to say. So don't hesitate to
review patches that have already been reviewed by other people.
In fact, you can even review committed patches; it's not unlikely that
you will be able to find bugs in those, too.
--
Álvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Hello,
Patches can be reviewed by more than one people. It's particularly
useful if they have different things to say. So don't hesitate to
review patches that have already been reviewed by other people.
Thanks for the advice. I was anxious about who among the
reviewers is, and when to make a decisision if the patch has
reached the level or not, I'll take it more easy.
In fact, you can even review committed patches; it's not unlikely that
you will be able to find bugs in those, too.
Umm.. to be sure..
--
Kyotaro Horiguchi
NTT Open Source Software Center
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp> writes:
[ format-width-20130305.patch ]
Applied with some mostly-cosmetic adjustments. I also took the liberty
of changing some of the error message texts to line up more closely
with the expanded documentation (eg, use "format specifier" not
"conversion specifier" because that's the phrase used in the docs).
regards, tom lane
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Thank you for committing this patch.
Applied with some mostly-cosmetic adjustments. I also took the
liberty of changing some of the error message texts to line up
more closely with the expanded documentation (eg, use "format
specifier" not "conversion specifier" because that's the phrase
used in the docs).
I looked over the modifications. Thanks for refining rather large
portion of documentation and comments.. and code.
regards,
--
Kyotaro Horiguchi
NTT Open Source Software Center
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers