[PATCH] Cleanup of GUC units code
Current GUC units code has 2 problems:
- It requires case-sensiteive representation of units ("kB").
As the main point of units is to make configuring easier,
requiring byte-exact typing makes them unnecessarily fragile.
- In attempt to preserve maximum range of values for INT64_IS_BUSTED
systems, the code is written rather non-obvious way.
Attached patch replaces per-unit custom code with lookup table,
where each unit is represented as multiplier of base unit.
And it compares unit names case-insensitivaly.
It sacrifices some range on INT64_IS_BUSTED systems, but as the only promise
we offer them is that "Postgres may compile" I don't see it as problem.
In case people like the case-sensitivity, it can be restored and the patch
applied as plain cleanup.
Attachments:
units-full-cleanup.patchtext/plain; charset=utf-8; name=units-full-cleanup.patchDownload
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
***************
*** 95,111 ****
#define KB_PER_MB (1024)
#define KB_PER_GB (1024*1024)
#define MS_PER_S 1000
- #define S_PER_MIN 60
#define MS_PER_MIN (1000 * 60)
- #define MIN_PER_H 60
- #define S_PER_H (60 * 60)
#define MS_PER_H (1000 * 60 * 60)
- #define MIN_PER_D (60 * 24)
- #define S_PER_D (60 * 60 * 24)
#define MS_PER_D (1000 * 60 * 60 * 24)
/* XXX these should appear in other modules' header files */
extern bool Log_disconnections;
extern int CommitDelay;
--- 95,106 ----
*************** parse_bool(const char *value, bool *resu
*** 4115,4124 ****
--- 4110,4215 ----
}
return true;
}
+ #if BLCKSZ < 1024 || XLOG_BLCKSZ < 1024
+ #error BLCKSZ or XLOG_BLCKSZ less than 1K is not supported by GUC units code.
+ #endif
+
+ /*
+ * Accepted unit abbrevations.
+ *
+ * Units are represented in terms of base unit.
+ *
+ * Base unit for memory is 1 kbyte.
+ * Base unit for time is 1 ms.
+ *
+ * If several units of same type share common prefix, longer
+ * must come first. Does not apply if different types.
+ */
+ static const GucUnit unit_list [] = {
+ /* memory units */
+ { "kb", GUC_UNIT_MEMORY, 1 },
+ { "mb", GUC_UNIT_MEMORY, KB_PER_MB },
+ { "gb", GUC_UNIT_MEMORY, KB_PER_GB },
+ /* time units */
+ { "ms", GUC_UNIT_TIME, 1 },
+ { "s", GUC_UNIT_TIME, MS_PER_S },
+ { "min", GUC_UNIT_TIME, MS_PER_MIN },
+ { "h", GUC_UNIT_TIME, MS_PER_H },
+ { "d", GUC_UNIT_TIME, MS_PER_D },
+ /* end marker */
+ { NULL },
+ };
+
+ /* hints when unit parsing fails. keep them near unit list */
+ static const char *hintmsg_time = gettext_noop("Valid units for this parameter are \"kB\", \"MB\", and \"GB\".");
+ static const char *hintmsg_mem = gettext_noop("Valid units for this parameter are \"ms\", \"s\", \"min\", \"h\", and \"d\".");
+
+ /* returns guc_unit struct or NULL for error */
+ static const GucUnit *
+ lookup_unit(const char *s, int flags)
+ {
+ const GucUnit *unit;
+ for (unit = unit_list; unit->abbr; unit++)
+ {
+ if ((flags & unit->typemask) == 0)
+ continue;
+ if (pg_strncasecmp(s, unit->abbr, strlen(unit->abbr)) == 0)
+ return unit;
+ }
+ return NULL;
+ }
+
+ /* returns final value */
+ static int64
+ apply_unit(const GucUnit *unit, int flags, int64 val, char **errstr)
+ {
+ /* divider to get real units from base units */
+ int div;
+ /* final result */
+ int64 res;
+
+ switch (flags & unit->typemask)
+ {
+ /* memory values */
+ case GUC_UNIT_KB:
+ div = 1;
+ break;
+ case GUC_UNIT_BLOCKS:
+ div = BLCKSZ / 1024;
+ break;
+ case GUC_UNIT_XBLOCKS:
+ div = XLOG_BLCKSZ / 1024;
+ break;
+ /* time values */
+ case GUC_UNIT_MS:
+ div = 1;
+ break;
+ case GUC_UNIT_S:
+ div = MS_PER_S;
+ break;
+ case GUC_UNIT_MIN:
+ div = MS_PER_MIN;
+ break;
+ default:
+ /* can happen only if new GUC_UNIT_* is introduced */
+ *errstr = gettext_noop("BUG: apply_unit not updated");
+ return val;
+ }
+
+ res = val * unit->multiplier / div;
+
+ #ifdef INT64_IS_BUSTED
+ /* check for overflow */
+ if (res < (val / div) * unit->multiplier)
+ *errstr = gettext_noop("unit overflow");
+ #endif
+
+ return res;
+ }
/*
* Try to parse value as an integer. The accepted formats are the
* usual decimal, octal, or hexadecimal formats, optionally followed by
* a unit name if "flags" indicates a unit is allowed.
*************** parse_bool(const char *value, bool *resu
*** 4131,4140 ****
--- 4222,4233 ----
bool
parse_int(const char *value, int *result, int flags, const char **hintmsg)
{
int64 val;
char *endptr;
+ char *errmsg = NULL;
+ const GucUnit *unit;
/* To suppress compiler warnings, always set output params */
if (result)
*result = 0;
if (hintmsg)
*************** parse_int(const char *value, int *result
*** 4159,4311 ****
endptr++;
/* Handle possible unit */
if (*endptr != '\0')
{
! /*
! * Note: the multiple-switch coding technique here is a bit tedious,
! * but seems necessary to avoid intermediate-value overflows.
! *
! * If INT64_IS_BUSTED (ie, it's really int32) we will fail to detect
! * overflow due to units conversion, but there are few enough such
! * machines that it does not seem worth trying to be smarter.
! */
! if (flags & GUC_UNIT_MEMORY)
{
! /* Set hint for use if no match or trailing garbage */
! if (hintmsg)
! *hintmsg = gettext_noop("Valid units for this parameter are \"kB\", \"MB\", and \"GB\".");
!
! #if BLCKSZ < 1024 || BLCKSZ > (1024*1024)
! #error BLCKSZ must be between 1KB and 1MB
! #endif
! #if XLOG_BLCKSZ < 1024 || XLOG_BLCKSZ > (1024*1024)
! #error XLOG_BLCKSZ must be between 1KB and 1MB
! #endif
!
! if (strncmp(endptr, "kB", 2) == 0)
! {
! endptr += 2;
! switch (flags & GUC_UNIT_MEMORY)
! {
! case GUC_UNIT_BLOCKS:
! val /= (BLCKSZ / 1024);
! break;
! case GUC_UNIT_XBLOCKS:
! val /= (XLOG_BLCKSZ / 1024);
! break;
! }
! }
! else if (strncmp(endptr, "MB", 2) == 0)
! {
! endptr += 2;
! switch (flags & GUC_UNIT_MEMORY)
! {
! case GUC_UNIT_KB:
! val *= KB_PER_MB;
! break;
! case GUC_UNIT_BLOCKS:
! val *= KB_PER_MB / (BLCKSZ / 1024);
! break;
! case GUC_UNIT_XBLOCKS:
! val *= KB_PER_MB / (XLOG_BLCKSZ / 1024);
! break;
! }
! }
! else if (strncmp(endptr, "GB", 2) == 0)
! {
! endptr += 2;
! switch (flags & GUC_UNIT_MEMORY)
! {
! case GUC_UNIT_KB:
! val *= KB_PER_GB;
! break;
! case GUC_UNIT_BLOCKS:
! val *= KB_PER_GB / (BLCKSZ / 1024);
! break;
! case GUC_UNIT_XBLOCKS:
! val *= KB_PER_GB / (XLOG_BLCKSZ / 1024);
! break;
! }
! }
}
- else if (flags & GUC_UNIT_TIME)
- {
- /* Set hint for use if no match or trailing garbage */
- if (hintmsg)
- *hintmsg = gettext_noop("Valid units for this parameter are \"ms\", \"s\", \"min\", \"h\", and \"d\".");
! if (strncmp(endptr, "ms", 2) == 0)
! {
! endptr += 2;
! switch (flags & GUC_UNIT_TIME)
! {
! case GUC_UNIT_S:
! val /= MS_PER_S;
! break;
! case GUC_UNIT_MIN:
! val /= MS_PER_MIN;
! break;
! }
! }
! else if (strncmp(endptr, "s", 1) == 0)
! {
! endptr += 1;
! switch (flags & GUC_UNIT_TIME)
! {
! case GUC_UNIT_MS:
! val *= MS_PER_S;
! break;
! case GUC_UNIT_MIN:
! val /= S_PER_MIN;
! break;
! }
! }
! else if (strncmp(endptr, "min", 3) == 0)
! {
! endptr += 3;
! switch (flags & GUC_UNIT_TIME)
! {
! case GUC_UNIT_MS:
! val *= MS_PER_MIN;
! break;
! case GUC_UNIT_S:
! val *= S_PER_MIN;
! break;
! }
! }
! else if (strncmp(endptr, "h", 1) == 0)
! {
! endptr += 1;
! switch (flags & GUC_UNIT_TIME)
! {
! case GUC_UNIT_MS:
! val *= MS_PER_H;
! break;
! case GUC_UNIT_S:
! val *= S_PER_H;
! break;
! case GUC_UNIT_MIN:
! val *= MIN_PER_H;
! break;
! }
! }
! else if (strncmp(endptr, "d", 1) == 0)
{
! endptr += 1;
! switch (flags & GUC_UNIT_TIME)
! {
! case GUC_UNIT_MS:
! val *= MS_PER_D;
! break;
! case GUC_UNIT_S:
! val *= S_PER_D;
! break;
! case GUC_UNIT_MIN:
! val *= MIN_PER_D;
! break;
! }
}
}
/* allow whitespace after unit */
while (isspace((unsigned char) *endptr))
--- 4252,4281 ----
endptr++;
/* Handle possible unit */
if (*endptr != '\0')
{
! /* prepare hint msg */
! if (hintmsg)
{
! if (flags & GUC_UNIT_MEMORY)
! *hintmsg = hintmsg_mem;
! else if (flags & GUC_UNIT_TIME)
! *hintmsg = hintmsg_time;
}
! /* apply unit */
! unit = lookup_unit(endptr, flags);
! if (unit)
! {
! endptr += strlen(unit->abbr);
! val = apply_unit(unit, flags, val, &errmsg);
! if (errmsg)
{
! if (hintmsg)
! *hintmsg = errmsg;
! return false;
}
}
/* allow whitespace after unit */
while (isspace((unsigned char) *endptr))
*** a/src/include/utils/guc_tables.h
--- b/src/include/utils/guc_tables.h
*************** struct config_string
*** 210,219 ****
--- 210,232 ----
GucShowHook show_hook;
/* variable fields, initialized at runtime: */
char *reset_val;
};
+ /*
+ * Units are represented in terms of base unit.
+ * Base unit for memory is 1 kbyte.
+ * Base unit for time is 1 ms.
+ *
+ * typemask is either GUC_UNIT_MEMORY or GUC_UNIT_TIME.
+ */
+ typedef struct guc_unit {
+ const char *abbr;
+ int typemask;
+ unsigned int multiplier;
+ } GucUnit;
+
struct config_enum
{
struct config_generic gen;
/* constant fields, must be set correctly in initial value: */
int *variable;
Marko Kreen <markokr@gmail.com> writes:
- In attempt to preserve maximum range of values for INT64_IS_BUSTED
systems, the code is written rather non-obvious way.
I do not personally object a bit to making the units comparisons
case-insensitive (I think it's mainly Peter who wants to be strict
about it). I don't think there are any other good ideas in this
patch, however, and exposing ourselves to intermediate overflows in
the name of simplicity is definitely not one.
regards, tom lane
On 9/1/08, Tom Lane <tgl@sss.pgh.pa.us> wrote:
Marko Kreen <markokr@gmail.com> writes:
- In attempt to preserve maximum range of values for INT64_IS_BUSTED
systems, the code is written rather non-obvious way.I do not personally object a bit to making the units comparisons
case-insensitive (I think it's mainly Peter who wants to be strict
about it). I don't think there are any other good ideas in this
patch, however, and exposing ourselves to intermediate overflows in
the name of simplicity is definitely not one.
For all practical purposes, the overflow is insignificant when int64
works. I'll look if I can avoid it on INT64_IS_BUSTED case.
In the meantime, here is simple patch for case-insensivity.
--
marko
Attachments:
units.nocase.difftext/x-diff; name=units.nocase.diffDownload
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 4ca25dc..b10be22 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -4182,7 +4182,7 @@ parse_int(const char *value, int *result, int flags, const char **hintmsg)
#error XLOG_BLCKSZ must be between 1KB and 1MB
#endif
- if (strncmp(endptr, "kB", 2) == 0)
+ if (pg_strncasecmp(endptr, "kB", 2) == 0)
{
endptr += 2;
switch (flags & GUC_UNIT_MEMORY)
@@ -4195,7 +4195,7 @@ parse_int(const char *value, int *result, int flags, const char **hintmsg)
break;
}
}
- else if (strncmp(endptr, "MB", 2) == 0)
+ else if (pg_strncasecmp(endptr, "MB", 2) == 0)
{
endptr += 2;
switch (flags & GUC_UNIT_MEMORY)
@@ -4211,7 +4211,7 @@ parse_int(const char *value, int *result, int flags, const char **hintmsg)
break;
}
}
- else if (strncmp(endptr, "GB", 2) == 0)
+ else if (pg_strncasecmp(endptr, "GB", 2) == 0)
{
endptr += 2;
switch (flags & GUC_UNIT_MEMORY)
@@ -4234,7 +4234,7 @@ parse_int(const char *value, int *result, int flags, const char **hintmsg)
if (hintmsg)
*hintmsg = gettext_noop("Valid units for this parameter are \"ms\", \"s\", \"min\", \"h\", and \"d\".");
- if (strncmp(endptr, "ms", 2) == 0)
+ if (pg_strncasecmp(endptr, "ms", 2) == 0)
{
endptr += 2;
switch (flags & GUC_UNIT_TIME)
@@ -4247,7 +4247,7 @@ parse_int(const char *value, int *result, int flags, const char **hintmsg)
break;
}
}
- else if (strncmp(endptr, "s", 1) == 0)
+ else if (pg_strncasecmp(endptr, "s", 1) == 0)
{
endptr += 1;
switch (flags & GUC_UNIT_TIME)
@@ -4260,7 +4260,7 @@ parse_int(const char *value, int *result, int flags, const char **hintmsg)
break;
}
}
- else if (strncmp(endptr, "min", 3) == 0)
+ else if (pg_strncasecmp(endptr, "min", 3) == 0)
{
endptr += 3;
switch (flags & GUC_UNIT_TIME)
@@ -4273,7 +4273,7 @@ parse_int(const char *value, int *result, int flags, const char **hintmsg)
break;
}
}
- else if (strncmp(endptr, "h", 1) == 0)
+ else if (pg_strncasecmp(endptr, "h", 1) == 0)
{
endptr += 1;
switch (flags & GUC_UNIT_TIME)
@@ -4289,7 +4289,7 @@ parse_int(const char *value, int *result, int flags, const char **hintmsg)
break;
}
}
- else if (strncmp(endptr, "d", 1) == 0)
+ else if (pg_strncasecmp(endptr, "d", 1) == 0)
{
endptr += 1;
switch (flags & GUC_UNIT_TIME)
Marko Kreen wrote:
In the meantime, here is simple patch for case-insensivity.
You might be able to talk me into accepting various unambiguous, common
alternative spellings of various units. But for instance allowing MB
and Mb to mean the same thing is insane.
Peter Eisentraut <peter_e@gmx.net> writes:
Marko Kreen wrote:
In the meantime, here is simple patch for case-insensivity.
You might be able to talk me into accepting various unambiguous, common
alternative spellings of various units. But for instance allowing MB and Mb to
mean the same thing is insane.
Because you think some user will be trying to specify their shared_buffers in
bits?
--
Gregory Stark
EnterpriseDB http://www.enterprisedb.com
Ask me about EnterpriseDB's RemoteDBA services!
On 9/2/08, Peter Eisentraut <peter_e@gmx.net> wrote:
Marko Kreen wrote:
In the meantime, here is simple patch for case-insensivity.
You might be able to talk me into accepting various unambiguous, common
alternative spellings of various units. But for instance allowing MB and Mb
to mean the same thing is insane.
How would the docs for that look like? And anyway, what is wrong with
Mb for megabytes? mB may be slightly weird but if some user likes it,
I see no reason to reject it.
You do realize that misspelling unit name can cause downtime of several
minutes instead of couple seconds? We can easily do restart in couple of
seconds but the restart, look logs, launch editor, find value, change,
save, restart cycle will take quite a lot more. Why should we increase
the chance that any config edit causes problems?
Secondly, humans don't have byte-exact memory, instead they generalize
and deduce (eg. from nearby parameters). Thus remembering "KB, MB, GB"
or "kb, mb, gb" is easier than remembering "kB, MB, GB". Also remembering
"ms, s, m, h, d" is easier than "ms, s, min, h, d". (I'm not proposing 'm'
for minutes, just noting, that as we shouldn't ever introduce 'month' unit
for core config values, the 'm' as minutes is preferable for 'min'.)
Thirdly, please don't use "standard units" argument, unless you plan to
propose use of "KiB, MiB, GiB" at the same moment. The units will be
remembered as units-for-postgresql.conf. It is good if they conform to
user expectations, but it's even more important they are easy to remember.
Fourth, for the same reason, it is preferable the amount of units
accepted stays small. Again, to be easily rememberable. This
also kills any chance "mb" can be confused with "milli-bits".
If any extension/module wants to use any other units or full-SI,
it can but that should not extend to core config values. Again,
to reduce chance for confusion.
So, are there any other arguments for keeping current behaviour?
--
marko
Gregory Stark wrote:
Peter Eisentraut <peter_e@gmx.net> writes:
Marko Kreen wrote:
In the meantime, here is simple patch for case-insensivity.
You might be able to talk me into accepting various unambiguous, common
alternative spellings of various units. But for instance allowing MB and Mb to
mean the same thing is insane.Because you think some user will be trying to specify their shared_buffers in
bits?
My concern is that this information does not stay in the configuration
files. It will invariably leak out into whitepapers, presentations,
product documentation, and before long there will be confusion about why
you can't stuff N Mb over an N Mb connection. I am not making this up.
Mb does not add any typing ease (as "KB" might) or readability (as "sec"
might), and there is no respectable source that will claim it is an
acceptable alias for MB.
On 9/2/08, Peter Eisentraut <peter_e@gmx.net> wrote:
Gregory Stark wrote:
Peter Eisentraut <peter_e@gmx.net> writes:
Marko Kreen wrote:
In the meantime, here is simple patch for case-insensivity.
You might be able to talk me into accepting various unambiguous, common
alternative spellings of various units. But for instance allowing MBand Mb to
mean the same thing is insane.
Because you think some user will be trying to specify their shared_buffers
in
bits?
My concern is that this information does not stay in the configuration
files. It will invariably leak out into whitepapers, presentations, product
documentation, and before long there will be confusion about why you can't
stuff N Mb over an N Mb connection. I am not making this up.
Uh. So you want force "proper" units in presentations at the price
of everyday admin operations? Does not seem like a sensible tradeoff.
--
marko
"Marko Kreen" <markokr@gmail.com> writes:
Uh. So you want force "proper" units in presentations at the price
of everyday admin operations? Does not seem like a sensible tradeoff.
It didn't to anyone else when Peter wrote the current version either, but as
the person willing to actually do the work and write the code Peter got to
make the decision. Nobody else stepped up to do the work to change it and we
can't exactly force Peter to do work he doesn't agree with. The current
version is unquestionably better than what came before where you had to
specify some things in units of Postgres blocks, for example.
--
Gregory Stark
EnterpriseDB http://www.enterprisedb.com
Ask me about EnterpriseDB's RemoteDBA services!
Gregory Stark <stark@enterprisedb.com> writes:
"Marko Kreen" <markokr@gmail.com> writes:
Uh. So you want force "proper" units in presentations at the price
of everyday admin operations? Does not seem like a sensible tradeoff.
It didn't to anyone else when Peter wrote the current version either, but as
the person willing to actually do the work and write the code Peter got to
make the decision. Nobody else stepped up to do the work to change it and we
can't exactly force Peter to do work he doesn't agree with.
It's not that, in my mind: it's that Peter feels more strongly about it
than the rest of us. This proposal has come up before and he's
successfully argued it down each time. He does have a point about there
being some potential for confusion; and the arguments on the other side
are not much better than "I'm lazy". Being lazy myself, I'd prefer a
case insensitive implementation; but I don't feel strongly enough about
it to want to override Peter's opinion.
regards, tom lane
Anything that will reduce potential downtime should be way to go.
To me it seems that Peter uses the loudest voice and others just don't care
enough.
Using kB for kilobyte seems quite alien and confusing. I have not noticed
that to be used in software i use in my everyday work and could not find any
either with quick search.
On Wed, Sep 3, 2008 at 12:15 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
Show quoted text
Gregory Stark <stark@enterprisedb.com> writes:
"Marko Kreen" <markokr@gmail.com> writes:
Uh. So you want force "proper" units in presentations at the price
of everyday admin operations? Does not seem like a sensible tradeoff.It didn't to anyone else when Peter wrote the current version either, but
as
the person willing to actually do the work and write the code Peter got
to
make the decision. Nobody else stepped up to do the work to change it and
we
can't exactly force Peter to do work he doesn't agree with.
It's not that, in my mind: it's that Peter feels more strongly about it
than the rest of us. This proposal has come up before and he's
successfully argued it down each time. He does have a point about there
being some potential for confusion; and the arguments on the other side
are not much better than "I'm lazy". Being lazy myself, I'd prefer a
case insensitive implementation; but I don't feel strongly enough about
it to want to override Peter's opinion.regards, tom lane
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
"Asko Oja" <ascoja@gmail.com> writes:
Anything that will reduce potential downtime should be way to go.
That argument didn't seem to me to be worth the electrons to rebut,
but now that someone else has repeated it, maybe I should. It's
ludicrous to claim that allowing case insensitivity here will make
any noticeable improvement in downtime, as there are far more ways
to screw up your config file than this one. (A config file test mode,
such as was discussed a few weeks ago, *would* help, but this won't.)
In the light of Peter's argument about confusion, one could even
argue that admins putting in wrong values because they got confused
about the meaning of the units is a downtime risk in itself.
regards, tom lane
On 9/3/08, Tom Lane <tgl@sss.pgh.pa.us> wrote:
Gregory Stark <stark@enterprisedb.com> writes:
"Marko Kreen" <markokr@gmail.com> writes:
Uh. So you want force "proper" units in presentations at the price
of everyday admin operations? Does not seem like a sensible tradeoff.It didn't to anyone else when Peter wrote the current version either, but as
the person willing to actually do the work and write the code Peter got to
make the decision. Nobody else stepped up to do the work to change it and we
can't exactly force Peter to do work he doesn't agree with.It's not that, in my mind: it's that Peter feels more strongly about it
than the rest of us. This proposal has come up before and he's
successfully argued it down each time. He does have a point about there
being some potential for confusion; and the arguments on the other side
are not much better than "I'm lazy". Being lazy myself, I'd prefer a
case insensitive implementation; but I don't feel strongly enough about
it to want to override Peter's opinion.
Um, being lazy is *the point* of units!? And being friendly to
newbies. Can there be any other reason for their existence?
--
marko
On 9/3/08, Tom Lane <tgl@sss.pgh.pa.us> wrote:
"Asko Oja" <ascoja@gmail.com> writes:
Anything that will reduce potential downtime should be way to go.
That argument didn't seem to me to be worth the electrons to rebut,
but now that someone else has repeated it, maybe I should. It's
ludicrous to claim that allowing case insensitivity here will make
any noticeable improvement in downtime, as there are far more ways
to screw up your config file than this one. (A config file test mode,
such as was discussed a few weeks ago, *would* help, but this won't.)
In the light of Peter's argument about confusion, one could even
argue that admins putting in wrong values because they got confused
about the meaning of the units is a downtime risk in itself.
How can the meaning of "gb", "mb", "kb" be uncertain?
But currently writing "kb" *will* load database with wrong values.
--
marko
Marko Kreen wrote:
On 9/2/08, Peter Eisentraut <peter_e@gmx.net> wrote:
Marko Kreen wrote:
In the meantime, here is simple patch for case-insensivity.
You might be able to talk me into accepting various unambiguous, common
alternative spellings of various units. But for instance allowing MB and Mb
to mean the same thing is insane.How would the docs for that look like? And anyway, what is wrong with
Mb for megabytes?
I doesn't seem completely unreasonable to me that we'd want to express
something in megabits/second in the future. For example, instead of
vacuum_cost_delay, it would be cool to specify a bandwidth allowance.
Megabits/second is a completely reasonable unit for that. Or a limit on
network bandwidth.
FWIW, I don't feel very strongly either way. I'm more than happy with
the status quo. The hint in the error message very clearly spells out
what the valid values are, so it's immediately clear what you need to
fix if you get that wrong.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
On Wed, Sep 3, 2008 at 11:20 AM, Heikki Linnakangas <
heikki.linnakangas@enterprisedb.com> wrote:
Marko Kreen wrote:
On 9/2/08, Peter Eisentraut <peter_e@gmx.net> wrote:
Marko Kreen wrote:
In the meantime, here is simple patch for case-insensivity.
You might be able to talk me into accepting various unambiguous, common
alternative spellings of various units. But for instance allowing MB and
Mb
to mean the same thing is insane.How would the docs for that look like? And anyway, what is wrong with
Mb for megabytes?From infamous wikipedia: A *megabit* is a unit of information<http://en.wikipedia.org/wiki/Information>or computer storage, abbreviated
*Mbit* (or *Mb*).
To me playing with case of acronyms and even depending on it seems more
insane. It would make much more sense to have case insensitive set of
acronyms and (thanks Tom for pointing out) some sanity checks when
configuration is loaded to notify user when wrong ones are used for some
context.
I doesn't seem completely unreasonable to me that we'd want to express
something in megabits/second in the future. For example, instead of
vacuum_cost_delay, it would be cool to specify a bandwidth allowance.
Megabits/second is a completely reasonable unit for that. Or a limit on
network bandwidth.
There are less confusing (better) acronyms kbit/s and mbit/s available for
that.
FWIW, I don't feel very strongly either way. I'm more than happy with the
status quo. The hint in the error message very clearly spells out what the
valid values are, so it's immediately clear what you need to fix if you get
that wrong.
Is the database down during that time?
Show quoted text
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 9/3/08, Heikki Linnakangas <heikki.linnakangas@enterprisedb.com> wrote:
Marko Kreen wrote:
On 9/2/08, Peter Eisentraut <peter_e@gmx.net> wrote:
Marko Kreen wrote:
In the meantime, here is simple patch for case-insensivity.
You might be able to talk me into accepting various unambiguous, common
alternative spellings of various units. But for instance allowing MBand Mb
to mean the same thing is insane.
How would the docs for that look like? And anyway, what is wrong with
Mb for megabytes?I doesn't seem completely unreasonable to me that we'd want to express
something in megabits/second in the future. For example, instead of
vacuum_cost_delay, it would be cool to specify a bandwidth allowance.
Megabits/second is a completely reasonable unit for that. Or a limit on
network bandwidth.
While it sounds theoretically useful, as a UI it's combines worst
from both worlds. You now can confuse user with both case-sensitivity
and unit-mixup. Even if we keep the current case-sensitivity, we should
set policy that units that differ only with case will never be accepted.
And the best way to set the policy in stone would be to make
units case-insensitive.
I like Asko's proposal of 'kbit/s' and 'mbit/s' for those - clear
and no chance of confusion.
FWIW, I don't feel very strongly either way. I'm more than happy with the
status quo. The hint in the error message very clearly spells out what the
valid values are, so it's immediately clear what you need to fix if you get
that wrong.
Well, the problem is that while database may come up with wrong values,
it may be unusable for actual loads, until admin browses the logs,
re-edits the config file and finally restarts in again.
--
marko
On Tue, 2008-09-02 at 16:50 +0300, Peter Eisentraut wrote:
Gregory Stark wrote:
Peter Eisentraut <peter_e@gmx.net> writes:
Marko Kreen wrote:
In the meantime, here is simple patch for case-insensivity.
You might be able to talk me into accepting various unambiguous, common
alternative spellings of various units. But for instance allowing MB and Mb to
mean the same thing is insane.Because you think some user will be trying to specify their shared_buffers in
bits?
If somebody wants then somebody can still claim that the units are
wrong, as SI specifies 1k = 1000 and 1M=1000000, etc
The binary "should" use IEC multipliers for kibi-, mebi-, tebi-, gibi-,
etc.
http://searchstorage.techtarget.com/sDefinition/0,,sid5_gci825099,00.html
So we could beat "correctness" into DBA-s by forcing them to write KiB
MiB and so on for things that they have always written kb and mb (unless
they are communication engineers, in which case they may be used to
writing these as kB and MB)
My concern is that this information does not stay in the configuration
files. It will invariably leak out into whitepapers, presentations,
product documentation, and before long there will be confusion about why
you can't stuff N Mb over an N Mb connection.
You can't even put N Mbits of data in one sec over N Mbits/sec
connection as there is some overhead from all protocol levels.
Clueless people will also be confused about why they can't fit 1Mbyte of
CVS data into a 1Mbyte database file or why you can't store 1024 MB of
data on a 1GB disk.
I am not making this up.
Is there anything in conf files that can be reasonably specified in
bits ?
Mb does not add any typing ease (as "KB" might) or readability (as "sec"
might), and there is no respectable source that will claim it is an
acceptable alias for MB.
Are you really afraid that someone would want to use mb to mean
millibits ?
As SQL is generally case insensitive, it is quite surprising to most
people that GUC units are not.
-----------------
Hannu
Hannu Krosing wrote:
On Tue, 2008-09-02 at 16:50 +0300, Peter Eisentraut wrote:
Gregory Stark wrote:
Peter Eisentraut <peter_e@gmx.net> writes:
Are you really afraid that someone would want to use mb to mean
millibits ?As SQL is generally case insensitive, it is quite surprising to most
people that GUC units are not.
We have had this discussion before, I even submitted a patch to make
them case insensitive. In retrospect I was wrong to submit that patch.
SQL may be case insensitive but units are not. MB != Mb != mb , I don't
think we should encourage in any way for users to do the wrong thing.
Sincerely,
Joshua D. Drake
I don't think worrying about the message we send to users is
reasonable. We can take responsibilty for the messages we output but
punishing our users to teach them a lesson is being actively user-
hostile
greg
On 3 Sep 2008, at 15:52, "Joshua D. Drake" <jd@commandprompt.com> wrote:
Show quoted text
Hannu Krosing wrote:
On Tue, 2008-09-02 at 16:50 +0300, Peter Eisentraut wrote:
Gregory Stark wrote:
Peter Eisentraut <peter_e@gmx.net> writes:
Are you really afraid that someone would want to use mb to mean
millibits ?
As SQL is generally case insensitive, it is quite surprising to most
people that GUC units are not.We have had this discussion before, I even submitted a patch to make
them case insensitive. In retrospect I was wrong to submit that
patch. SQL may be case insensitive but units are not. MB != Mb !=
mb , I don't think we should encourage in any way for users to do
the wrong thing.Sincerely,
Joshua D. Drake
Greg Stark wrote:
I don't think worrying about the message we send to users is reasonable.
We can take responsibilty for the messages we output but punishing our
users to teach them a lesson is being actively user-hostile
There is no arguing that MB != Mb; nor is there anything user-hostile
behind the idea of doing it the right way.
Sincerely,
Joshua D. Drake
On Wed, 2008-09-03 at 07:52 -0700, Joshua D. Drake wrote:
Hannu Krosing wrote:
On Tue, 2008-09-02 at 16:50 +0300, Peter Eisentraut wrote:
Gregory Stark wrote:
Peter Eisentraut <peter_e@gmx.net> writes:
Are you really afraid that someone would want to use mb to mean
millibits ?As SQL is generally case insensitive, it is quite surprising to most
people that GUC units are not.We have had this discussion before, I even submitted a patch to make
them case insensitive. In retrospect I was wrong to submit that patch.
SQL may be case insensitive but units are not. MB != Mb != mb ,
For most people they are equal, and all mean MEGABYTE(S) though
http://en.wikipedia.org/wiki/MB has lots of other possible meanings for
each.
I don't think we should encourage in any way for users to do the wrong thing.
Can you see any scenario where accepting case insensitive units does
more damage than just ignoring the conf line with "incorrect" casing ?
Or do you mean we should discourage people from editing postgresql.conf
manually and have them use some tool which prevents them entering "kb" ?
----------------
Hannu
On Wed, 2008-09-03 at 08:20 -0700, Joshua D. Drake wrote:
Greg Stark wrote:
I don't think worrying about the message we send to users is reasonable.
We can take responsibilty for the messages we output but punishing our
users to teach them a lesson is being actively user-hostileThere is no arguing that MB != Mb;
The whole point of this discussion is, that mostly people expect
MB == Mb = mb == mB, especially if they see weird constructs like kB
used (k for Kilo, or actually Kibi).
nor is there anything user-hostile behind the idea of doing it the right way.
I was not trying to expose some sinister plan, just pointing out that
users seldom expect that kind of surprise.
--------------
Hannu
On Wed, 03 Sep 2008 18:32:16 +0300
Hannu Krosing <hannu@krosing.net> wrote:
We have had this discussion before, I even submitted a patch to
make them case insensitive. In retrospect I was wrong to submit
that patch. SQL may be case insensitive but units are not. MB !=
Mb != mb ,For most people they are equal, and all mean MEGABYTE(S) though
http://en.wikipedia.org/wiki/MB has lots of other possible meanings
for each.
O.k. there is an interesting point being made here, "For most people".
Which people exactly? Are we talking about the general populous? If so
I would argue that "most people" don't have a clue what MB, Mb, or mb
is except to say I think that means some kind of speed or capacity.
The above is not our target.
If our definition of most people is, "those who are reasonably
technically adept and will be deploying PostgreSQL in production on
some level".
If someone doesn't know the difference between Mb and MB on a
production system, I would not want them anywhere near any instance of
a production system.
If we are going to make sweeping statements (anyone on this thread)
about user-hostile and most people, then we better define what those
mean. This whole argument about making something easier (and incorrect)
for someone who doesn't exist and has not been defined.
I would be hung on this list if I made a similar argument about any
other feature.
I don't think we should encourage in any way for users to do the
wrong thing.Can you see any scenario where accepting case insensitive units does
more damage than just ignoring the conf line with "incorrect" casing ?
Generally speaking, no I can't think of any damage that could be done
from mixed casing. Especially since we would only accept certain
possibilities, e.g; Mb would equal MB.
It just strikes me as really bad that a project that prides itself on
"doing it right" is willing to make this type of sacrifice. This isn't
about usability. This is about doing it wrong and actively encouraging
our users that "wrong is ok". It could also misinform the user about
what the meaning of the value means.
Or do you mean we should discourage people from editing
postgresql.conf manually and have them use some tool which prevents
them entering "kb" ?
Well that is a whole other argument :P. I would be happy to have that
one on another thread.
Sincerely,
Joshua D. Drake
--
The PostgreSQL Company since 1997: http://www.commandprompt.com/
PostgreSQL Community Conference: http://www.postgresqlconference.org/
United States PostgreSQL Association: http://www.postgresql.us/
Donate to the PostgreSQL Project: http://www.postgresql.org/about/donate
On Wed, Sep 03, 2008 at 06:37:29PM +0300, Hannu Krosing wrote:
On Wed, 2008-09-03 at 08:20 -0700, Joshua D. Drake wrote:
There is no arguing that MB != Mb;
The whole point of this discussion is, that mostly people expect
MB == Mb = mb == mB, especially if they see weird constructs like kB
used (k for Kilo, or actually Kibi).
Note that in the networked computer world, MB and Mb are
importantly different. The latter is relevant for the speed of your
network interface, for instance. People often get this wrong when
speaking carelessly, but a mistake of this sort is a serious one,
given the orders of magnitude difference.
A
--
Andrew Sullivan
ajs@commandprompt.com
+1 503 667 4564 x104
http://www.commandprompt.com/
On Wed, 2008-09-03 at 09:10 -0700, Joshua Drake wrote:
If someone doesn't know the difference between Mb and MB on a
production system, I would not want them anywhere near any instance of
a production system.
I for one can make the difference, once I can zen that we are in a
domain, where Mbit makes sense. For me it does not make any sense to
define shared_buffer in Mbit's.
If we are going to make sweeping statements (anyone on this thread)
about user-hostile and most people, then we better define what those
mean.
Not user-hostile but rather hostile to an overworked DBA, who tries to
change some .conf param from 1MB to 512KB at 3AM to save an overloaded
server and then suddenly the server won't start anymore.
This whole argument about making something easier (and incorrect)
for someone who doesn't exist and has not been defined.
I still don't get in what non-academic way it would be incorrect to
spell kilobytes as kb or KB or megabytes as mb ?
Do you have any standard source where it specifies one use and forbids
the other ?
We are not correct according to SI (where k = 1000) nor IEC which
specifies Ki for 1024.
I would be hung on this list if I made a similar argument about any
other feature.
Its all about making it easier, not incorrect.
It may be perhaps "incorrect" in some strictly academic sense, maybe
some article submission guidelines or such.
I don't think we should encourage in any way for users to do the
wrong thing.Can you see any scenario where accepting case insensitive units does
more damage than just ignoring the conf line with "incorrect" casing ?Generally speaking, no I can't think of any damage that could be done
from mixed casing. Especially since we would only accept certain
possibilities, e.g; Mb would equal MB.It just strikes me as really bad that a project that prides itself on
"doing it right" is willing to make this type of sacrifice. This isn't
about usability.
It is all about usability. Not accepting popular spellings of common
units seems to me as unnecessary nit-picking , similar to if we
required _exactly_ one space on either side on = in SET statements.
And we are already "incorrect" by allowing both '' and "" around the
value, the latter would usually imply an identifier :P
hannu=# set effective_cache_size = '132MB';
SET
hannu=# set effective_cache_size = "132MB";
SET
Maybe we should change this too if we want to champion the "right way"?
This is about doing it wrong and actively encouraging
our users that "wrong is ok". It could also misinform the user about
what the meaning of the value means.
You could issue a warning, something like
"Warning: KB is not a spelling endorsed by PostgeSQL Global Development
Group, in future please use kB, a spelling that many of us feel to be
the only valid one and others don't think important enough to argue with
them."
-----------------------
Hannu
Hannu Krosing escribi�:
On Wed, 2008-09-03 at 09:10 -0700, Joshua Drake wrote:
If we are going to make sweeping statements (anyone on this thread)
about user-hostile and most people, then we better define what those
mean.Not user-hostile but rather hostile to an overworked DBA, who tries to
change some .conf param from 1MB to 512KB at 3AM to save an overloaded
server and then suddenly the server won't start anymore.
I think the energy wasted in this discussion would be better spent in
working a the check-the-config-file feature. That would equally solve
this problem, as well as many others.
--
Alvaro Herrera http://www.CommandPrompt.com/
The PostgreSQL Company - Command Prompt, Inc.
On Wed, Sep 03, 2008 at 01:48:18PM -0400, Alvaro Herrera wrote:
I think the energy wasted in this discussion would be better spent in
working a the check-the-config-file feature. That would equally solve
this problem, as well as many others.
This seems like a good idea to me.
A
--
Andrew Sullivan
ajs@commandprompt.com
+1 503 667 4564 x104
http://www.commandprompt.com/
Sure if people want to do it the right way more power to them. What
you're talking about is punishing people when they don't live up to
your standards.
greg
On 3 Sep 2008, at 16:20, "Joshua D. Drake" <jd@commandprompt.com> wrote:
Show quoted text
Greg Stark wrote:
I don't think worrying about the message we send to users is
reasonable. We can take responsibilty for the messages we output
but punishing our users to teach them a lesson is being actively
user-hostileThere is no arguing that MB != Mb; nor is there anything user-
hostile behind the idea of doing it the right way.Sincerely,
Joshua D. Drake
On Wed, 3 Sep 2008 19:36:19 +0100
Greg Stark <greg.stark@enterprisedb.com> wrote:
Sure if people want to do it the right way more power to them. What
you're talking about is punishing people when they don't live up to
your standards.
I think I will defer to Andrew and Alvaro's opinion on the matter.
Sincerely,
Joshua D. Drake
--
The PostgreSQL Company since 1997: http://www.commandprompt.com/
PostgreSQL Community Conference: http://www.postgresqlconference.org/
United States PostgreSQL Association: http://www.postgresql.us/
Donate to the PostgreSQL Project: http://www.postgresql.org/about/donate
-----BEGIN PGP SIGNED MESSAGE-----
Hash: RIPEMD160
You do realize that misspelling unit name can cause downtime of several
minutes instead of couple seconds? We can easily do restart in couple of
seconds but the restart, look logs, launch editor, find value, change,
save, restart cycle will take quite a lot more. Why should we increase
the chance that any config edit causes problems?
Minutes? It's like any other changes made to postgresql.conf: make the change,
ctrl-z out of your editor, reload Postgres. If there's an error, foreground
to the editor, revert the change, and reload Postgres. Not ideal (the
config file checker is what we really want), but certainly quick.
Secondly, humans don't have byte-exact memory, instead they generalize
and deduce (eg. from nearby parameters). Thus remembering "KB, MB, GB"
or "kb, mb, gb" is easier than remembering "kB, MB, GB".
They are all listed at the top of the file, in case there is any confusion.
No need to worry about remembering things.
- --
Greg Sabino Mullane greg@turnstep.com
PGP Key: 0x14964AC8 200809031501
http://biglumber.com/x/web?pk=2529DF6AB8F79407E94445B4BC9B906714964AC8
-----BEGIN PGP SIGNATURE-----
iEYEAREDAAYFAki+39gACgkQvJuQZxSWSsiTfACgiNPP77YKGKgyBm36ckkKZhGM
b9sAn2JmrpbMkJ8dm0Wbz3TYdLo83h/S
=PQvy
-----END PGP SIGNATURE-----
On Wed, 2008-09-03 at 13:48 -0400, Alvaro Herrera wrote:
Hannu Krosing escribió:
On Wed, 2008-09-03 at 09:10 -0700, Joshua Drake wrote:
If we are going to make sweeping statements (anyone on this thread)
about user-hostile and most people, then we better define what those
mean.Not user-hostile but rather hostile to an overworked DBA, who tries to
change some .conf param from 1MB to 512KB at 3AM to save an overloaded
server and then suddenly the server won't start anymore.I think the energy wasted in this discussion would be better spent in
working a the check-the-config-file feature.
What kind of checks do you have in mind. Would this be something that
works at restart, does the check and continues with old settings if new
file would not load ?
That would equally solve
this problem, as well as many others.
AFAIK the config file is checked now, and if the check fails, the
database won't start.
-------------
Hannu
On Wed, 03 Sep 2008 23:10:24 +0300
Hannu Krosing <hannu@2ndQuadrant.com> wrote:
That would equally solve
this problem, as well as many others.AFAIK the config file is checked now, and if the check fails, the
database won't start.
Like apachectl configcheck ... E.g; we have the ability to check if the
config file is valid "before" we restart and have an extended outage.
Joshua D. Drake
-------------
Hannu
--
The PostgreSQL Company since 1997: http://www.commandprompt.com/
PostgreSQL Community Conference: http://www.postgresqlconference.org/
United States PostgreSQL Association: http://www.postgresql.us/
Donate to the PostgreSQL Project: http://www.postgresql.org/about/donate
Hannu Krosing escribi�:
On Wed, 2008-09-03 at 13:48 -0400, Alvaro Herrera wrote:
I think the energy wasted in this discussion would be better spent in
working a the check-the-config-file feature.What kind of checks do you have in mind. Would this be something that
works at restart, does the check and continues with old settings if new
file would not load ?
This was discussed in some other thread. No, the idea is that before
you do a reload, you run some function or invoke postmaster with certain
arguments, and it checks the config file and it says "this is OK" or
"there are errors here and here". The check can be run even if the
server is already running. That way you can just run it just before a
reload or restart.
That would equally solve
this problem, as well as many others.AFAIK the config file is checked now, and if the check fails, the
database won't start.
... which is not ideal. Obviously it doesn't make much sense to run the
check if the server is already down, because you'll immediately know
that it won't come up just by trying to start it up.
(However, maybe it would be better for the init script to run the check
anyway, and report the error to stderr where the user running the script
can read it directly instead of having to go check the postmaster log
which could be sitting somewhere else.)
--
Alvaro Herrera http://www.CommandPrompt.com/
PostgreSQL Replication, Consulting, Custom Development, 24x7 support
On Wed, 2008-09-03 at 11:45 -0700, Joshua Drake wrote:
On Wed, 3 Sep 2008 19:36:19 +0100
Greg Stark <greg.stark@enterprisedb.com> wrote:Sure if people want to do it the right way more power to them. What
you're talking about is punishing people when they don't live up to
your standards.I think I will defer to Andrew and Alvaro's opinion on the matter.
So Andrews opinion was that Mb (meaning Mbit) is different from MB (for
megabyte) and that if someone thinks that we define shared buffers in
megabits can get confused and order wrong kind of network card ?
I can understand Alvaros stance more readily - if we have irrational
constraints on what can go into conf file, and people wont listen to
reason, then build better tools for helping people to compli to these
irrational demands. It has the added benefit of helping to catch reall
conf file errors.
I did not realize earlier that KB vs kb vs kB vs Kb is a religious
issue .
I mean, there is no known written standard, which says that Mb is
megabit, not megabyte or that you can (or can't) write kilo as K, but
some people just believe that kB is "the Way" and allowing people to
write kilobytes as KB or kb is evil and should be punished.
To me this sounds stupid, but I understand that this is a thing that
can't be argued logically and I have better things to do than changing
peoples irrational beliefs.
Sorry.
---------------
Hannu
On Thu, 04 Sep 2008 01:26:44 +0300
Hannu Krosing <hannu@krosing.net> wrote:
So Andrews opinion was that Mb (meaning Mbit) is different from MB
(for megabyte) and that if someone thinks that we define shared
buffers in megabits can get confused and order wrong kind of network
card ?
I was actually referring to:
http://archives.postgresql.org/pgsql-hackers/2008-09/msg00206.php
Sincerely,
Joshua D. Drake
--
The PostgreSQL Company since 1997: http://www.commandprompt.com/
PostgreSQL Community Conference: http://www.postgresqlconference.org/
United States PostgreSQL Association: http://www.postgresql.us/
Donate to the PostgreSQL Project: http://www.postgresql.org/about/donate
Hannu Krosing escribi�:
I mean, there is no known written standard, which says that Mb is
megabit, not megabyte or that you can (or can't) write kilo as K, but
some people just believe that kB is "the Way" and allowing people to
write kilobytes as KB or kb is evil and should be punished.
Yes there is --- it's the SI.
http://en.wikipedia.org/wiki/SI#SI_writing_style
I don't know about it being "evil" and punishment, but it's wrong.
--
Alvaro Herrera http://www.CommandPrompt.com/
The PostgreSQL Company - Command Prompt, Inc.
On Wednesday 03 September 2008 16:12:29 Joshua Drake wrote:
On Wed, 03 Sep 2008 23:10:24 +0300
Hannu Krosing <hannu@2ndQuadrant.com> wrote:
That would equally solve
this problem, as well as many others.AFAIK the config file is checked now, and if the check fails, the
database won't start.Like apachectl configcheck ... E.g; we have the ability to check if the
config file is valid "before" we restart and have an extended outage.
To paraphrase, "if you can't write a config file correctly before restarting,
I do not want you anywhere near any instance of a production system"
--
Robert Treat
Build A Brighter LAMP :: Linux Apache {middleware} PostgreSQL
On Wed, 2008-09-03 at 20:01 -0400, Alvaro Herrera wrote:
Hannu Krosing escribió:
I mean, there is no known written standard, which says that Mb is
megabit, not megabyte or that you can (or can't) write kilo as K, but
some people just believe that kB is "the Way" and allowing people to
write kilobytes as KB or kb is evil and should be punished.Yes there is --- it's the SI.
http://en.wikipedia.org/wiki/SI#SI_writing_style
I don't know about it being "evil" and punishment, but it's wrong.
SI defines decimal-based prefixes, where k = kilo = 1000, so our current
conf use is also wrong.
http://en.wikipedia.org/wiki/Kilobyte
...
1024 bytes (2^10): This unit is used when expressing quantities which
are based on powers of two, such as memory chip capacities. Most
software also expresses storage capacity in units of 1024 bytes.
Although the use of kilobyte for this unit is common, this usage has
been expressly forbidden by the SI standard and other standards
organisations. To indicate a quantity of 1024 bytes, the term kibibyte
(KiB) has been recommended instead.
And no, I am not proposing switching conf units to KiB, nor am I
proposing to use MB to mean 1 000 000. I'm quite happy with us being
"wrong" in 1000 v. 1024 area.
---------------
Hannu
Le jeudi 04 septembre 2008, Robert Treat a écrit :
To paraphrase, "if you can't write a config file correctly before
restarting, I do not want you anywhere near any instance of a production
system"
Do you really want to TCO of PostgreSQL to raise that much when the software
could help lowering it?
If you're a shop where you can't have only experts in any given domain do
level 1 nightly "fix", maybe you'd still like them to be able to follow some
procedures involving server config change.
On what grounds are you wanting this not to be possible?
Regards,
--
dim
Hannu Krosing escribi�:
On Wed, 2008-09-03 at 20:01 -0400, Alvaro Herrera wrote:
Yes there is --- it's the SI.
http://en.wikipedia.org/wiki/SI#SI_writing_style
I don't know about it being "evil" and punishment, but it's wrong.
SI defines decimal-based prefixes, where k = kilo = 1000, so our current
conf use is also wrong.
Actually, this has been a moving target. For a certain length of time,
some standards did accept that k meant 1024 "in computing context"; see
http://en.wikipedia.org/wiki/Binary_prefix
So we're not _absolutely_ wrong here; at least not until KiB are more
widely accepted and kB more widely refused to mean 1024 bytes. The
relevant standard has been published just this year by ISO.
http://en.wikipedia.org/wiki/ISO/IEC_80000#Binary_prefixes
So this is new territory, whereas case-sensitivity of prefixes and unit
abbreviations has existed for decades.
--
Alvaro Herrera http://www.CommandPrompt.com/
PostgreSQL Replication, Consulting, Custom Development, 24x7 support
On Thu, Sep 04, 2008 at 01:26:44AM +0300, Hannu Krosing wrote:
So Andrews opinion was that Mb (meaning Mbit) is different from MB (for
megabyte) and that if someone thinks that we define shared buffers in
megabits can get confused and order wrong kind of network card ?
I know it's fun to point and laugh instead of giving an argument, but
the above is not what I said. What I said is that there is a
technical difference between at least some of these units, and one
that is relevant in some contexts where we have good reason to believe
Postgres is used. So it seems to me that there is at least a _prima
facie_ reason in favour of making case-based decisions. Your argument
against that appears to be, "Well, people can be sloppy."
Alvaro's suggestion seems to me to be a better one. It is customary,
in servers with large complicated configuration systems, for the
server to come with a tool that validates the configuration file
before you try to load it. Postfix does this; apache does it; so does
BIND. Heck, even NSD (which is way less configurable than BIND) does
this. Offering such a tool provides considerable more benefit than
the questionable one of allowing people to type whatever they want
into the configuration file and suppose that the server will by magic
know what they meant.
I can understand Alvaros stance more readily - if we have irrational
constraints on what can go into conf file, and people wont listen to
reason
Extending your current reasoning, it's irrational that all the names
of the parameters have to be spelled correctly. Why can't we just
accept log_statement_duration_min? It's _obvious_ that it's the same
thing as log_min_duration_statement! It's silly to expect that
harried administrators have to spell these options correctly. Why
can't we parse all the file, separating each label by "_". Then if
any arrangements of those labels matches a "real" configuration
parameter, select that one as the thing to match and proceed from
there?
A
--
Andrew Sullivan
ajs@commandprompt.com
+1 503 667 4564 x104
http://www.commandprompt.com/
On Sep 4, 2008, at 6:29 AM, Andrew Sullivan wrote:
On Thu, Sep 04, 2008 at 01:26:44AM +0300, Hannu Krosing wrote:
So Andrews opinion was that Mb (meaning Mbit) is different from MB
(for
megabyte) and that if someone thinks that we define shared buffers in
megabits can get confused and order wrong kind of network card ?I know it's fun to point and laugh instead of giving an argument, but
the above is not what I said. What I said is that there is a
technical difference between at least some of these units, and one
that is relevant in some contexts where we have good reason to believe
Postgres is used. So it seems to me that there is at least a _prima
facie_ reason in favour of making case-based decisions. Your argument
against that appears to be, "Well, people can be sloppy."
Settings in postgresql.conf are currently case-insensitive. Except
for the units.
Alvaro's suggestion seems to me to be a better one. It is customary,
in servers with large complicated configuration systems, for the
server to come with a tool that validates the configuration file
before you try to load it. Postfix does this; apache does it; so does
BIND. Heck, even NSD (which is way less configurable than BIND) does
this. Offering such a tool provides considerable more benefit than
the questionable one of allowing people to type whatever they want
into the configuration file and suppose that the server will by magic
know what they meant.
How would such a tool cope with, for example, shared_buffers
being set to one eighth the size the DBA intended, due to their
use of Mb rather than MB? Both of which are perfectly valid
units to use to set shared buffers, even though we only support
one right now. If the answer to that is something along the lines
of we don't support megaabits for shared_buffers, and never will because
nobody in their right mind would ever intend to use megabits
to set their shared buffer size... that's a useful datapoint when
it comes to designing for usability.
Cheers,
Steve
On Thu, 2008-09-04 at 09:29 -0400, Andrew Sullivan wrote:
On Thu, Sep 04, 2008 at 01:26:44AM +0300, Hannu Krosing wrote:
So Andrews opinion was that Mb (meaning Mbit) is different from MB (for
megabyte) and that if someone thinks that we define shared buffers in
megabits can get confused and order wrong kind of network card ?I know it's fun to point and laugh instead of giving an argument, but
the above is not what I said. What I said is that there is a
technical difference between at least some of these units, and one
that is relevant in some contexts where we have good reason to believe
Postgres is used. So it seems to me that there is at least a _prima
facie_ reason in favour of making case-based decisions. Your argument
against that appears to be, "Well, people can be sloppy."Alvaro's suggestion seems to me to be a better one.
Agreed. maybe this can even be implemented as a special switch to
postmaster (maybe -n or --dry-run, similar to make), not a separate
command.
I can understand Alvaros stance more readily - if we have irrational
constraints on what can go into conf file, and people wont listen to
reasonExtending your current reasoning, it's irrational that all the names
of the parameters have to be spelled correctly.
It would be irrational to allow all letters in parameter names to be
case-insensitive, except 'k' which has to be lowercase ;)
The main point of confusion comes from not accepting KB and this bites
you when you go down from MB, with reasoning like "ok, it seems that
units are in uppercase, so let's change 1MB to 768KB and see what
happens"
-------------
Hannu
On Thu, Sep 04, 2008 at 07:01:18AM -0700, Steve Atkins wrote:
Settings in postgresql.conf are currently case-insensitive. Except
for the units.
And, of course, filenames when you are using a case-sensitive
filesystem. Because these are things that are defined by some
convention other than the ones the PGDG made up. Since units fall
into that category, it seems to me that we're stuck with using
external conventions.
one right now. If the answer to that is something along the lines
of we don't support megaabits for shared_buffers, and never will because
nobody in their right mind would ever intend to use megabits
to set their shared buffer size... that's a useful datapoint when
it comes to designing for usability.
And you are going to establish this worldwide convention on what
someone in right mind would do how, exactly? For instance, I think
nobody in right mind would use "KB" to mean "kilobytes". I suppose
you could get a random sample of all current Postgres users to decide
what makes sense, but then you'd have the problem of knowing whether
you had a random sample, since the population isn't obviously
identifiable. Or, we could just stick with the convention that we
already have, and write a tool that captures this an other issues.
Maybe even one that could later form the basis for an automatic tuning
advisor, as well.
The problem with appeals to common sense always turns out to be that
different people's common sense leads them to different conclusions.
(We had a devastating government in Ontario some years ago that claimed
to be doing things that were just common sense; the Province is still
cleaning up the mess.)
A
--
Andrew Sullivan
ajs@commandprompt.com
+1 503 667 4564 x104
http://www.commandprompt.com/
On Wed, 2008-09-03 at 11:58 +0300, Asko Oja wrote:
On Wed, Sep 3, 2008 at 11:20 AM, Heikki Linnakangas
<heikki.linnakangas@enterprisedb.com> wrote:
Marko Kreen wrote:
On 9/2/08, Peter Eisentraut <peter_e@gmx.net> wrote:
Marko Kreen wrote:
In the meantime, here is simple patch
for case-insensivity.
You might be able to talk me into accepting
various unambiguous, common
alternative spellings of various units. But
for instance allowing MB and Mb
to mean the same thing is insane.How would the docs for that look like? And anyway,
what is wrong with
Mb for megabytes?
From infamous wikipedia: A megabit is a unit of information or
computer storage, abbreviated Mbit (or Mb).
To me playing with case of acronyms and even depending on it seems
more insane. It would make much more sense to have case insensitive
set of acronyms and (thanks Tom for pointing out) some sanity checks
when configuration is loaded to notify user when wrong ones are used
for some context.
There is a patch on the CommitFest, so we must either accept it or
reject it (with/without comments).
Peter's objection is reasonable, as far as most people have replied.
Marko's proposal is also reasonable to most people, since they do not
wish fat fingers to cause any amount of downtime. ISTM that if you've
done this, you appreciate the feature, if not it seems less important.
I would point out that Marko's patch is about what values get accepted
in postgresql.conf, not how the units are represented when you perform
SHOW, look at pg_settings or read the docs. So Marko's proposal does not
ignore the important distinction between units in *all* places, just at
the time of input. With that in mind, the proposal seems to be
acceptable to the majority, based upon my assessment of the comments.
In terms of the patch, ISTM that relaxing the comparison logic also
appears to relax the warnings given and that is not acceptable, given
concerns raised.
So my recommendation to everybody is
* we allow case insensitive matches of units in postgresql.conf
* Marko should change patch to put WARNINGs in place so people know they
got it wrong
* we make sure the case is always shown correctly in all other aspects
of Postgres server and docs (no relaxation at all there)
* in the longer term, we look for the solution to be a config checker
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
Simon Riggs <simon@2ndQuadrant.com> writes:
Peter's objection is reasonable, as far as most people have replied.
Marko's proposal is also reasonable to most people, since they do not
wish fat fingers to cause any amount of downtime. ISTM that if you've
done this, you appreciate the feature, if not it seems less important.
I really think that the claim that this will "save downtime" is a
ridiculous argument. On that basis we should, for example, be looking
for a nearest match for any misspelled variable name. The fact of the
matter is that a configuration validator is a far better answer to any
such worries than trying to accept bad/questionable input.
So my recommendation to everybody is
* we allow case insensitive matches of units in postgresql.conf
* Marko should change patch to put WARNINGs in place so people know they
got it wrong
* we make sure the case is always shown correctly in all other aspects
of Postgres server and docs (no relaxation at all there)
* in the longer term, we look for the solution to be a config checker
My vote is to reject the patch and work on a config checker.
regards, tom lane
Simon Riggs <simon@2ndQuadrant.com> writes:
Peter's objection is reasonable, as far as most people have replied.
Marko's proposal is also reasonable to most people, since they do not
wish fat fingers to cause any amount of downtime. ISTM that if you've
done this, you appreciate the feature, if not it seems less important.
My objection isn't down-time at all, it's the insultingly user-hostile
attitude. I normally am setting work_mem by hand in a psql session and *every*
time I do it I swear at Postgres for being annoyingly pedantic here.
I'm all for using the correct acronyms in all messages and documentation. What
I find annoying is the:
postgres=# set work_mem = '1g';
ERROR: invalid value for parameter "work_mem": "1g"
HINT: It's perfectly clear what you want but I'm going to refuse to do
it until you type it exactly as I say: "GB"
* Marko should change patch to put WARNINGs in place so people know they
got it wrong
That's only slightly less insulting than an error.
* we make sure the case is always shown correctly in all other aspects
of Postgres server and docs (no relaxation at all there)
I believe we're already fairly stringent about this as we should be.
* in the longer term, we look for the solution to be a config checker
I don't think a config checker directly addresses the same problem. I never
set work_mem in a config and it still annoys the hell out of me.
--
Gregory Stark
EnterpriseDB http://www.enterprisedb.com
Ask me about EnterpriseDB's On-Demand Production Tuning
Gregory Stark <stark@enterprisedb.com> writes:
I'm all for using the correct acronyms in all messages and documentation. What
I find annoying is the:
postgres=# set work_mem = '1g';
ERROR: invalid value for parameter "work_mem": "1g"
But of course case insensitivity isn't going to fix that example for you.
So we're right back at the question of where we should draw the line in
trying to accept variant input.
regards, tom lane
On Mon, Sep 8, 2008 at 2:11 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
But of course case insensitivity isn't going to fix that example for you.
So we're right back at the question of where we should draw the line in
trying to accept variant input.
Well it's not a perfect precedent but for example, dd accepts:
G (2^30)
M (2^20)
k (2^10)
K (2^10)
Kb (10^3)
MB (10^6)
GB (10^9)
b (512)
I think we're all agreed we want to ignore the KiB crap and make all
our units base 2. And I don't think usin "b" for block makes sense for
us. But the point is that yes, people expect to type "100M" or "1G"
and have that work. Plenty of us do it all the time with dd or other
tools already.
--
greg
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
On Mon, Sep 08, 2008 at 02:18:55PM +0100, Greg Stark wrote:
On Mon, Sep 8, 2008 at 2:11 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
But of course case insensitivity isn't going to fix that example for you.
So we're right back at the question of where we should draw the line in
trying to accept variant input.Well it's not a perfect precedent but for example, dd accepts:
G (2^30)
M (2^20)
k (2^10)
K (2^10)
Kb (10^3)
MB (10^6)
GB (10^9)
b (512)
Heh. But it doesn't take Gb or gB or gb, which was one of your express
wishes. So -- hm.
Regards
- -- tomás
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)
iD8DBQFIxSxlBcgs9XrR2kYRAkOkAJ9pp7nNjgJOb2NtHPwKIKZMcsNYlQCdE8wa
VQ/c+9Nan1V3d+/cdTm+Xn4=
=rkzg
-----END PGP SIGNATURE-----
"Greg Stark" <stark@enterprisedb.com> writes:
On Mon, Sep 8, 2008 at 2:11 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
But of course case insensitivity isn't going to fix that example for you.
So we're right back at the question of where we should draw the line in
trying to accept variant input.
Well it's not a perfect precedent but for example, dd accepts:
G (2^30)
M (2^20)
k (2^10)
K (2^10)
Kb (10^3)
MB (10^6)
GB (10^9)
b (512)
Hmm. I could get behind a proposal to allow single-letter abbreviations
if it could be made to work across the board, but what about the time
units? "ms" vs "min" is the sticky part there...
regards, tom lane
-----BEGIN PGP SIGNED MESSAGE-----
Hash: RIPEMD160
Tom Lane wrote:
My vote is to reject the patch and work on a config checker.
+1
postgres=# set work_mem = '1g';
ERROR: invalid value for parameter "work_mem": "1g"
Perhaps this would be a great place for a HINT listing all
valid inputs, if not there already?
- --
Greg Sabino Mullane greg@turnstep.com
End Point Corporation
PGP Key: 0x14964AC8 200809081014
http://biglumber.com/x/web?pk=2529DF6AB8F79407E94445B4BC9B906714964AC8
-----BEGIN PGP SIGNATURE-----
iEYEAREDAAYFAkjFM2AACgkQvJuQZxSWSsiDvACdE6Wlrnu3uQH4mOpuEMvX0VQe
rXoAoPLCR5jKTWQH4GsHDtz5NNZXq4vA
=nRMS
-----END PGP SIGNATURE-----
Greg Sabino Mullane wrote:
Tom Lane wrote:
postgres=# set work_mem = '1g';
ERROR: invalid value for parameter "work_mem": "1g"Perhaps this would be a great place for a HINT listing all
valid inputs, if not there already?
alvherre=# set work_mem = '1g';
ERROR: invalid value for parameter "work_mem": "1g"
HINT: Valid units for this parameter are "kB", "MB", and "GB".
--
Alvaro Herrera http://www.CommandPrompt.com/
PostgreSQL Replication, Consulting, Custom Development, 24x7 support
"Greg Sabino Mullane" <greg@turnstep.com> writes:
Tom Lane wrote:
My vote is to reject the patch and work on a config checker.
+1
postgres=# set work_mem = '1g';
ERROR: invalid value for parameter "work_mem": "1g"Perhaps this would be a great place for a HINT listing all
valid inputs, if not there already?
It is, I paraphrased it on my original message as:
HINT: It's perfectly clear what you want but I'm going to refuse to do
it until you type it exactly as I say: "GB"
--
Gregory Stark
EnterpriseDB http://www.enterprisedb.com
Get trained by Bruce Momjian - ask me about EnterpriseDB's PostgreSQL training!
Gregory Stark wrote:
"Greg Sabino Mullane" <greg@turnstep.com> writes:
Tom Lane wrote:
My vote is to reject the patch and work on a config checker.
+1
postgres=# set work_mem = '1g';
ERROR: invalid value for parameter "work_mem": "1g"Perhaps this would be a great place for a HINT listing all
valid inputs, if not there already?It is, I paraphrased it on my original message as:
HINT: It's perfectly clear what you want but I'm going to refuse to do
it until you type it exactly as I say: "GB"
It's good as a joke, but what if the user says '1024b'? Does it mean
1024 blocks or one kilobyte? If blocks, what size are we talking, the
usual 512 bytes, or our BLCKSZ?
--
Alvaro Herrera http://www.CommandPrompt.com/
PostgreSQL Replication, Consulting, Custom Development, 24x7 support
Alvaro Herrera <alvherre@commandprompt.com> writes:
It's good as a joke, but what if the user says '1024b'? Does it mean
1024 blocks or one kilobyte? If blocks, what size are we talking, the
usual 512 bytes, or our BLCKSZ?
For what guc would you find a unit of posix-style "blocks" relevant?!
--
Gregory Stark
EnterpriseDB http://www.enterprisedb.com
Ask me about EnterpriseDB's 24x7 Postgres support!
Gregory Stark <stark@enterprisedb.com> writes:
Alvaro Herrera <alvherre@commandprompt.com> writes:
It's good as a joke, but what if the user says '1024b'? Does it mean
1024 blocks or one kilobyte? If blocks, what size are we talking, the
usual 512 bytes, or our BLCKSZ?
For what guc would you find a unit of posix-style "blocks" relevant?!
The point isn't whether it's relevant, the point is that there's a
fairly serious doubt as to what the user thought it meant.
regards, tom lane
Tom Lane <tgl@sss.pgh.pa.us> writes:
Gregory Stark <stark@enterprisedb.com> writes:
Alvaro Herrera <alvherre@commandprompt.com> writes:
It's good as a joke, but what if the user says '1024b'? Does it mean
1024 blocks or one kilobyte? If blocks, what size are we talking, the
usual 512 bytes, or our BLCKSZ?For what guc would you find a unit of posix-style "blocks" relevant?!
The point isn't whether it's relevant, the point is that there's a
fairly serious doubt as to what the user thought it meant.
My point was that we don't accept any form of "b" today and nobody has
suggested we should.
--
Gregory Stark
EnterpriseDB http://www.enterprisedb.com
Ask me about EnterpriseDB's RemoteDBA services!
Settings in postgresql.conf are currently case-insensitive. Except
for the units.And, of course, filenames when you are using a case-sensitive
filesystem. Because these are things that are defined by some
convention other than the ones the PGDG made up. Since units fall
into that category, it seems to me that we're stuck with using
external conventions.
Just a thought... since there are very few (if any) places where a
user would specify a variable in terms of bits (kilobits, megabits,
gigabits), we could make the units case-insensitive and assume that
kb, gb, and mb (and all case variants) refer to some number of bytes.
If a user wants to specify a variable in terms of bits, he would have
to spell out the units completely, as in "4 gigabits", "20 kilobits",
or "16 megabits".
-- Korry
Marko Kreen wrote:
Thirdly, please don't use "standard units" argument, unless you plan to
propose use of "KiB, MiB, GiB" at the same moment.
In defense of standard units, if the postgres docs say
"Postgres will round up to the nearest power of 2"
kB and MB seem very clear to me. If we want to silently
accept other abbreviations and acronyms too (mb, mIb, whatever)
that doesn't bother me, tho.
On Mon, 8 Sep 2008 10:32:40 -0400
Alvaro Herrera <alvherre@commandprompt.com> wrote:
Gregory Stark wrote:
"Greg Sabino Mullane" <greg@turnstep.com> writes:
Tom Lane wrote:
My vote is to reject the patch and work on a config checker.
+1
+1
Joshua D. Drake
--
The PostgreSQL Company since 1997: http://www.commandprompt.com/
PostgreSQL Community Conference: http://www.postgresqlconference.org/
United States PostgreSQL Association: http://www.postgresql.us/
Donate to the PostgreSQL Project: http://www.postgresql.org/about/donate
Tom Lane wrote:
"Greg Stark" <stark@enterprisedb.com> writes:
On Mon, Sep 8, 2008 at 2:11 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
But of course case insensitivity isn't going to fix that example for you.
So we're right back at the question of where we should draw the line in
trying to accept variant input.Well it's not a perfect precedent but for example, dd accepts:
G (2^30)
M (2^20)
k (2^10)
K (2^10)
Kb (10^3)
MB (10^6)
GB (10^9)
b (512)Hmm. I could get behind a proposal to allow single-letter abbreviations
if it could be made to work across the board,
The SQL standard actually specifies something about that. You can
define the length of large object types (CLOB and BLOB) with multipliers
K, M, and G, as in
CREATE TABLE foo ( bar BLOB(5 M) );
These multipliers are case insensitive, of course. (And their are
1024-based, FWIW.)
So I could imagine that we generalize this approach to make these
multipliers available in other positions.
This would have definitional problems of its own, however. If you
interpret K, M, and G strictly as unit-less multipliers, then
SET shared_buffers = 2 G
would mean
SET shared_buffers = 2 * 1073741824
meaning
SET shared_buffers = 2147463648
which is not the same thing as the current
SET shared_buffer = '2GB'
This also affects the solution to another GUC units complaint that the
quotes are annoying, which I support.
We could possibly settle some of these arguments if we could redefine
all memory parameters to use one byte as base unit, and then allow some
ambiguity and unit omission from there. But that would probably cause
much havoc, so we are stuck with a certain degree of inconsistency anyhow.
This would have definitional problems of its own, however. If you interpret
K, M, and G strictly as unit-less multipliers, thenSET shared_buffers = 2 G
I don't think it would be a good idea to make them unit-less, for
exactly the reasons you mention.
We could possibly settle some of these arguments if we could redefine all
memory parameters to use one byte as base unit, and then allow some
ambiguity and unit omission from there. But that would probably cause much
havoc, so we are stuck with a certain degree of inconsistency anyhow.
A good start might be to always OUTPUT memory parameters using the
same base unit.
portal=# show shared_buffers;
shared_buffers
----------------
24MB
(1 row)
portal=# show temp_buffers;
temp_buffers
--------------
1024
(1 row)
Kilobytes seems like the most reasonable choice, because we definitely
have variables where you would want to set a value less than 1
megabyte, and I doubt we have (or will ever need) any where we the
granularity is finer than than 1 kilobyte.
Beyond that, how about moving in the direction of deprecating
unit-less settings altogether? In other words, if you want 1024
shared buffers, you should be saying 8192kB or 8MB rather than 1024.
We could issue a WARNING for 8.4 and eventually move to rejecting that
syntax altogether. That gets everything into the same base unit
without ever change the semantics of any particular value.
...Robert
On Thu, 2008-09-04 at 11:51 -0400, Andrew Sullivan wrote:
On Thu, Sep 04, 2008 at 07:01:18AM -0700, Steve Atkins wrote:
Settings in postgresql.conf are currently case-insensitive. Except
for the units.And, of course, filenames when you are using a case-sensitive
filesystem. Because these are things that are defined by some
convention other than the ones the PGDG made up. Since units fall
into that category, it seems to me that we're stuck with using
external conventions.one right now. If the answer to that is something along the lines
of we don't support megaabits for shared_buffers, and never will because
nobody in their right mind would ever intend to use megabits
to set their shared buffer size... that's a useful datapoint when
it comes to designing for usability.And you are going to establish this worldwide convention on what
someone in right mind would do how, exactly? For instance, I think
nobody in right mind would use "KB" to mean "kilobytes".
Except those following JEDEC standards ;)
from http://en.wikipedia.org/wiki/JEDEC_memory_standards
-------------------
JEDEC Standard 100B.01[1] defines the "prefix to units of semiconductor
storage capacity" as follows:
* kilo (K): A multiplier equal to 1024 (210).
* mega (M): A multiplier equal to 1 048 576 (220 or K2, where K =
1024).
* giga (G): A multiplier equal to 1 073 741 824 (230 or K3, where
K = 1024).
-------------------
I'm not sure if this applies only to storage _capacity_ or also to stuff
stored using said capacity.
but in general I think it is easier to train millions of DBAs to use kB
than to achieve consensus about what "everybody" assumes on this list,
so I also give "+1" to working on config checker "instead" .
------------------
Hannu
On Tue, 9 Sep 2008, Robert Haas wrote:
A good start might be to always OUTPUT memory parameters using the
same base unit.
SHOW gives output that matches what you input. If you want to see things
with consistant units, look at pg_settings:
# select name,unit,setting,current_setting(name) from pg_settings where
name='shared_buffers' or name='temp_buffers';
name | unit | setting | current_setting
----------------+------+---------+-----------------
shared_buffers | 8kB | 131072 | 1GB
temp_buffers | 8kB | 1024 | 1024
Beyond that, how about moving in the direction of deprecating
unit-less settings altogether? In other words, if you want 1024
shared buffers, you should be saying 8192kB or 8MB rather than 1024.
We could issue a WARNING for 8.4 and eventually move to rejecting that
syntax altogether. That gets everything into the same base unit
without ever change the semantics of any particular value.
Deprecating setting the value directly in its base unit, so that all the
memory parameters are specified as a number of bytes, would sidestep a lot
of the issues people complain about. What I would like to see (but don't
have nearly enough time to argue in support of considering the resistance
to change here) is that this syntax:
shared_buffers=1024
Would assume the user meant 1024 *bytes*, with the server silently
rounding that up to the nearest 8k block. Then the whole issue of "do
they mean bits or bytes?" goes away, because no one would ever have to
include the "B". That paves the way for making it easy to support
case-insensitive values without pedantic confusion. As I see it these
should all give you the same thing:
shared_buffers=16777216
shared_buffers=16384k
shared_buffers=16384K
shared_buffers=16M
shared_buffers=16m
Because that's what would make life easiest for those configuring the
software.
Since this suggestion will inevitably lead to cries of befowled backward
compatibility, what I've been doing instead of participating in this
thread is working on a tool that will make it easy to convert old
postgresql.conf files to support a new version. Then the tool can convert
all the places someone uses the old syntax into the new.
--
* Greg Smith gsmith@gregsmith.com http://www.gregsmith.com Baltimore, MD
Greg Smith <gsmith@gregsmith.com> writes:
What I would like to see (but don't
have nearly enough time to argue in support of considering the resistance
to change here) is that this syntax:
shared_buffers=1024
Would assume the user meant 1024 *bytes*, with the server silently
rounding that up to the nearest 8k block. Then the whole issue of "do
they mean bits or bytes?" goes away, because no one would ever have to
include the "B".
How do you come to that conclusion? Leaving off the unit entirely
certainly doesn't make the user's intent clearer.
There's also a pretty serious compatibility problem, which is that
settings that had always worked before would suddenly be completely
broken (off by a factor of 8192 qualifies as "broken" in my book).
I think that if we wanted to change anything here, we'd have to
*require* a unit spec on unit-affected parameters, at least for a period
of several releases. Otherwise the confusion would be horrendous.
That paves the way for making it easy to support
case-insensitive values without pedantic confusion.
Again, you're just making this up. It doesn't make anything clearer.
regards, tom lane
On Tue, 9 Sep 2008, Tom Lane wrote:
How do you come to that conclusion? Leaving off the unit entirely
certainly doesn't make the user's intent clearer.
Same way I do all my conclusions in this area--talking to people in the
field regularly who've never configured a postgresql.conf before. I
highly recommend it for a fresh view.
Here's how this works every time I go throught it. When you first
encounter someone who is new to PostgreSQL, after they find out
"shared_buffers" is a memory allocation they presume it's in bytes until
they find out otherwise. And then they're slightly annoyed that a) if
they accidentally don't include a unit all the values get way bigger
because of some backward compatibility nonsense they don't care about and
b) that it's case sensitive. Since some of the arguments against (b) ("Mb
could mean megabits!") diminish if the recommended practice is to just
keep the multiplier in there, so the byte part of the unit is optional and
not used by the default postgresql.conf, that seems the most reasonable
way to proceed to me.
Unlike things like network speed where it's more complicated, memory is
measured in bytes unless there's a different multiplier attached by most
people. Greg Stark already made this same observation yesterday: "But
the point is that yes, people expect to type "100M" or "1G" and have that
work. Plenty of us do it all the time with dd or other tools already."
I don't actually expect any adjustment here but was getting a little bored
watching everyone replay
http://archives.postgresql.org/pgsql-hackers/2006-07/msg01229.php with
barely any changes from the first time.
--
* Greg Smith gsmith@gregsmith.com http://www.gregsmith.com Baltimore, MD
A good start might be to always OUTPUT memory parameters using the
same base unit.SHOW gives output that matches what you input.
Not for me it doesn't.
portal=# show temp_buffers;
temp_buffers
--------------
1024
(1 row)
portal=# set temp_buffers = '16MB';
SET
portal=# show temp_buffers;
temp_buffers
--------------
2048
(1 row)
Deprecating setting the value directly in its base unit, so that all the
memory parameters are specified as a number of bytes, would sidestep a lot
of the issues people complain about. What I would like to see (but don't
have nearly enough time to argue in support of considering the resistance to
change here) is that this syntax:shared_buffers=1024
Silly me, perhaps, but in the absence of a unit, I would assume that a
number in this context refers to a number of BUFFERS. I'd be willing
to bet money that if you showed this line to a bunch of people who
knew nothing about postgresql but were reasonably computer-savvy,
you'd get a lot more people who thought that was a number of buffers
than who thought it was a number of bytes.
Compounding the problem, of course, is the fact, that right now,
that's exactly what it does mean. While it's probably acceptable to
change the semantics of postgresql.conf in such a way that this syntax
is no longer accepted, or generates a warning, it's almost certainly a
bad idea to change it to silently mean something that is four orders
of magnitude different what it currently means.
Would assume the user meant 1024 *bytes*, with the server silently rounding
that up to the nearest 8k block. Then the whole issue of "do they mean bits
or bytes?" goes away, because no one would ever have to include the "B".
I don't believe this issue exists in the first place, because anyone
who would specify the size of their shared buffer pool in bits should
have their head examined. But if, hypothetically, someone were
confused, I can't imagine that taking away the "B" is somehow going to
be more clear.
...Robert
Robert Haas wrote:
bits...bytes...blocks...m...M
I can't imagine that taking away the "B" is somehow going to
be more clear.
If clarity is the goal, I'd want the following:
a) Verbosely spelling out the units in the default config file
temp_buffers = 16 megabytes
or
temp_buffers = 16 milliblocks :-)
Naive users who favor cut&paste will use the verbose words
that should leave little room for confusion. Power-users
who know the short forms from the docs will presumably have
read the descriptions.
b) having "show" show verbosely spelled out units.
db=# show temp_buffers;
temp_buffers
--------------
16000000 bytes
(1 row)
c) having "set" show a NOTICE with the verbose word for the units
db=# set temp_buffers = '16mb';
NOTICE: setting temp_buffers to 16000000 bytes
a) Verbosely spelling out the units in the default config file
temp_buffers = 16 megabytes
or
temp_buffers = 16 milliblocks :-)
Naive users who favor cut&paste will use the verbose words
that should leave little room for confusion. Power-users
who know the short forms from the docs will presumably have
read the descriptions.
I think it would make a lot of sense to encourage adding the word
"buffers" or "blocks" when that is the unit in question. This is all
religion at this point, but I find it difficult to believe that there
is any real need to spell out megabytes.
...Robert
Robert Haas wrote:
A good start might be to always OUTPUT memory parameters using the
same base unit.SHOW gives output that matches what you input.
Not for me it doesn't.
portal=# show temp_buffers;
temp_buffers
--------------
1024
(1 row)portal=# set temp_buffers = '16MB';
SET
portal=# show temp_buffers;
temp_buffers
--------------
2048
(1 row)
temp_buffers is actually special-cased in the code because
/*
* We show the GUC var until local buffers have been initialized, and
* NLocBuffer afterwards.
*/
It is not clear to me right now why that is a good idea. But it is only
this one paramter.
The actual logic that SHOW uses in the general case is to reformat the
value with the largest unit that allows for an integer value to be
presented. So this is neither your nor Greg's idea, but I think it
gives useful behavior in practice.
Peter Eisentraut <peter_e@gmx.net> writes:
temp_buffers is actually special-cased in the code because
/*
* We show the GUC var until local buffers have been initialized, and
* NLocBuffer afterwards.
*/
It is not clear to me right now why that is a good idea.
I think the reason for it is that you can change the setting within a
particular session, but only up until temp buffers have been initialized
(ie, when you first touch a temp table). If we just made it act like
other GUCs then SHOW would be lying to you about the effective value
if you changed it after that point.
An easy workaround would be to make the variable PGC_BACKEND, but that
seems to me to lose useful flexibility. Or we could invent a context
category specifically for this type of behavior, but is it worth that?
The narrowest fix would be to just teach the show hook to format its
result properly. I seem to recall having looked at that and been
annoyed by the amount of copy-and-paste required.
[ thinks for a bit... ] Or maybe we should get rid of the show hook
and instead put in an assign hook that prevents changes after the
initialization event. The problem is that you can't throw error
while reading, eg, a new config file. So while this might be the most
user-friendly approach, I think it would take some API change for assign
hooks.
regards, tom lane
temp_buffers is actually special-cased in the code because
/*
* We show the GUC var until local buffers have been initialized, and
* NLocBuffer afterwards.
*/It is not clear to me right now why that is a good idea. But it is only
this one paramter.
OK, well that's not so bad then, although it would be nice to make it
consistent.
The actual logic that SHOW uses in the general case is to reformat the value
with the largest unit that allows for an integer value to be presented. So
this is neither your nor Greg's idea, but I think it gives useful behavior
in practice.
Yes, that's a totally sensible choice as well. What do you think of
the idea of always requiring an explicit unit, either by deprecating
blocks as a unit altogether, or pushing users to specify "blocks" in
that case? It seemed to me from reading your previous response that
you thought this would make it more possible to be more flexible about
how MB, GB, etc. are specified, although I'm not exactly sure what the
relationship is.
...Robert