Index: doc/src/sgml/array.sgml
===================================================================
RCS file: /cvsroot/pgsql-server/doc/src/sgml/array.sgml,v
retrieving revision 1.36
diff -c -r1.36 array.sgml
*** doc/src/sgml/array.sgml	5 Aug 2004 03:29:11 -0000	1.36
--- doc/src/sgml/array.sgml	8 Aug 2004 04:49:48 -0000
***************
*** 95,104 ****
  </synopsis>
     where <replaceable>delim</replaceable> is the delimiter character
     for the type, as recorded in its <literal>pg_type</literal> entry.
!    (For all built-in types, this is the comma character
!    <quote><literal>,</literal></>.)  Each
!    <replaceable>val</replaceable> is either a constant of the array
!    element type, or a subarray.  An example of an array constant is
  <programlisting>
  '{{1,2,3},{4,5,6},{7,8,9}}'
  </programlisting>
--- 95,106 ----
  </synopsis>
     where <replaceable>delim</replaceable> is the delimiter character
     for the type, as recorded in its <literal>pg_type</literal> entry.
!    Among the standard data types provided in the
!    <productname>PostgreSQL</productname> distribution, type
!    <literal>box</> uses a semicolon (<literal>;</>) but all the others
!    use comma (<literal>,</>). Each <replaceable>val</replaceable> is
!    either a constant of the array element type, or a subarray. An example
!    of an array constant is
  <programlisting>
  '{{1,2,3},{4,5,6},{7,8,9}}'
  </programlisting>
***************
*** 161,167 ****
   </para>
  
   <para>
!   The <literal>ARRAY</literal> expression syntax may also be used:
  <programlisting>
  INSERT INTO sal_emp
      VALUES ('Bill',
--- 163,169 ----
   </para>
  
   <para>
!   The <literal>ARRAY</> constructor syntax may also be used:
  <programlisting>
  INSERT INTO sal_emp
      VALUES ('Bill',
***************
*** 176,183 ****
    Notice that the array elements are ordinary SQL constants or
    expressions; for instance, string literals are single quoted, instead of
    double quoted as they would be in an array literal.  The <literal>ARRAY</>
!   expression syntax is discussed in more detail in <xref
!   linkend="sql-syntax-array-constructors">.
   </para>
   </sect2>
  
--- 178,185 ----
    Notice that the array elements are ordinary SQL constants or
    expressions; for instance, string literals are single quoted, instead of
    double quoted as they would be in an array literal.  The <literal>ARRAY</>
!   constructor syntax is discussed in more detail in
!   <xref linkend="sql-syntax-array-constructors">.
   </para>
   </sect2>
  
***************
*** 524,533 ****
     use comma.)  In a multidimensional array, each dimension (row, plane,
     cube, etc.) gets its own level of curly braces, and delimiters
     must be written between adjacent curly-braced entities of the same level.
!    You may write whitespace before a left brace, after a right
!    brace, or before any individual item string.  Whitespace after an item
!    is not ignored, however: after skipping leading whitespace, everything
!    up to the next right brace or delimiter is taken as the item value.
    </para>
  
    <para>
--- 526,542 ----
     use comma.)  In a multidimensional array, each dimension (row, plane,
     cube, etc.) gets its own level of curly braces, and delimiters
     must be written between adjacent curly-braced entities of the same level.
!   </para>
! 
!   <para>
!    The array output routine will put double quotes around element values
!    if they are empty strings or contain curly braces, delimiter characters,
!    double quotes, backslashes, or white space.  Double quotes and backslashes
!    embedded in element values will be backslash-escaped.  For numeric
!    data types it is safe to assume that double quotes will never appear, but
!    for textual data types one should be prepared to cope with either presence
!    or absence of quotes.  (This is a change in behavior from pre-7.2
!    <productname>PostgreSQL</productname> releases.)
    </para>
  
    <para>
***************
*** 573,598 ****
  
    <para>
     As shown previously, when writing an array value you may write double
!    quotes around any individual array
!    element.  You <emphasis>must</> do so if the element value would otherwise
!    confuse the array-value parser.  For example, elements containing curly
!    braces, commas (or whatever the delimiter character is), double quotes,
!    backslashes, or leading white space must be double-quoted.  To put a double
!    quote or backslash in a quoted array element value, precede it with a
!    backslash.
!    Alternatively, you can use backslash-escaping to protect all data characters
!    that would otherwise be taken as array syntax or ignorable white space.
    </para>
  
    <para>
!    The array output routine will put double quotes around element values
!    if they are empty strings or contain curly braces, delimiter characters,
!    double quotes, backslashes, or white space.  Double quotes and backslashes
!    embedded in element values will be backslash-escaped.  For numeric
!    data types it is safe to assume that double quotes will never appear, but
!    for textual data types one should be prepared to cope with either presence
!    or absence of quotes.  (This is a change in behavior from pre-7.2
!    <productname>PostgreSQL</productname> releases.)
    </para>
  
   <note>
--- 582,603 ----
  
    <para>
     As shown previously, when writing an array value you may write double
!    quotes around any individual array element. You <emphasis>must</> do so
!    if the element value would otherwise confuse the array-value parser.
!    For example, elements containing curly braces, commas (or whatever the
!    delimiter character is), double quotes, backslashes, or leading white
!    space must be double-quoted.  To put a double quote or backslash in a
!    quoted array element value, precede it with a backslash. Alternatively,
!    you can use backslash-escaping to protect all data characters that would
!    otherwise be taken as array syntax.
    </para>
  
    <para>
!    You may write whitespace before a left brace or after a right
!    brace. You may also write whitespace before or after any individual item
!    string. In all of these cases the whitespace will be ignored. However,
!    whitespace within double quoted elements, or surrounded on both sides by
!    non-whitespace characters of an element, are not ignored.
    </para>
  
   <note>
***************
*** 616,625 ****
  
   <tip>
    <para>
!    The <literal>ARRAY</> constructor syntax is often easier to work with
!    than the array-literal syntax when writing array values in SQL commands.
!    In <literal>ARRAY</>, individual element values are written the same way
!    they would be written when not members of an array.
    </para>
   </tip>
   </sect2>
--- 621,631 ----
  
   <tip>
    <para>
!    The <literal>ARRAY</> constructor syntax (see
!    <xref linkend="sql-syntax-array-constructors">) is often easier to work
!    with than the array-literal syntax when writing array values in SQL
!    commands. In <literal>ARRAY</>, individual element values are written the
!    same way they would be written when not members of an array.
    </para>
   </tip>
   </sect2>
Index: src/backend/utils/adt/arrayfuncs.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/backend/utils/adt/arrayfuncs.c,v
retrieving revision 1.106
diff -c -r1.106 arrayfuncs.c
*** src/backend/utils/adt/arrayfuncs.c	5 Aug 2004 03:29:37 -0000	1.106
--- src/backend/utils/adt/arrayfuncs.c	8 Aug 2004 04:49:48 -0000
***************
*** 351,368 ****
   *		 The syntax for array input is C-like nested curly braces
   *-----------------------------------------------------------------------------
   */
  static int
  ArrayCount(char *str, int *dim, char typdelim)
  {
! 	int			nest_level = 0,
! 				i;
! 	int			ndim = 1,
! 				temp[MAXDIM],
! 				nelems[MAXDIM],
! 				nelems_last[MAXDIM];
! 	bool		scanning_string = false;
! 	bool		eoArray = false;
! 	char	   *ptr;
  
  	for (i = 0; i < MAXDIM; ++i)
  	{
--- 351,382 ----
   *		 The syntax for array input is C-like nested curly braces
   *-----------------------------------------------------------------------------
   */
+ typedef enum
+ {
+ 	ARRAY_NO_LEVEL,
+ 	ARRAY_LEVEL_STARTED,
+ 	ARRAY_ELEM_STARTED,
+ 	ARRAY_ELEM_COMPLETED,
+ 	ARRAY_QUOTED_ELEM_STARTED,
+ 	ARRAY_QUOTED_ELEM_COMPLETED,
+ 	ARRAY_ELEM_DELIMITED,
+ 	ARRAY_LEVEL_COMPLETED,
+ 	ARRAY_LEVEL_DELIMITED
+ } ArrayParseState;
+ 
  static int
  ArrayCount(char *str, int *dim, char typdelim)
  {
! 	int				nest_level = 0,
! 					i;
! 	int				ndim = 1,
! 					temp[MAXDIM],
! 					nelems[MAXDIM],
! 					nelems_last[MAXDIM];
! 	bool			scanning_string = false;
! 	bool			eoArray = false;
! 	char		   *ptr;
! 	ArrayParseState	parse_state = ARRAY_NO_LEVEL;
  
  	for (i = 0; i < MAXDIM; ++i)
  	{
***************
*** 370,375 ****
--- 384,390 ----
  		nelems_last[i] = nelems[i] = 1;
  	}
  
+ 	/* special case for an empty array */
  	if (strncmp(str, "{}", 2) == 0)
  		return 0;
  
***************
*** 389,394 ****
--- 404,423 ----
  						errmsg("malformed array literal: \"%s\"", str)));
  					break;
  				case '\\':
+ 					/*
+ 					 * An escape must be after a level start, after an
+ 					 * element start, or after an element delimiter. In any
+ 					 * case we now must be past an element start.
+ 					 */
+ 					if (parse_state != ARRAY_LEVEL_STARTED &&
+ 						parse_state != ARRAY_ELEM_STARTED &&
+ 						parse_state != ARRAY_QUOTED_ELEM_STARTED &&
+ 						parse_state != ARRAY_ELEM_DELIMITED)
+ 						ereport(ERROR,
+ 							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ 							errmsg("malformed array literal: \"%s\"", str)));
+ 					if (parse_state != ARRAY_QUOTED_ELEM_STARTED)
+ 						parse_state = ARRAY_ELEM_STARTED;
  					/* skip the escaped character */
  					if (*(ptr + 1))
  						ptr++;
***************
*** 398,408 ****
--- 427,464 ----
  						errmsg("malformed array literal: \"%s\"", str)));
  					break;
  				case '\"':
+ 					/*
+ 					 * A quote must be after a level start, after a quoted
+ 					 * element start, or after an element delimiter. In any
+ 					 * case we now must be past an element start.
+ 					 */
+ 					if (parse_state != ARRAY_LEVEL_STARTED &&
+ 						parse_state != ARRAY_QUOTED_ELEM_STARTED &&
+ 						parse_state != ARRAY_ELEM_DELIMITED)
+ 						ereport(ERROR,
+ 							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ 							errmsg("malformed array literal: \"%s\"", str)));
  					scanning_string = !scanning_string;
+ 					if (scanning_string)
+ 						parse_state = ARRAY_QUOTED_ELEM_STARTED;
+ 					else
+ 						parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
  					break;
  				case '{':
  					if (!scanning_string)
  					{
+ 						/*
+ 						 * A left brace can occur if no nesting has
+ 						 * occurred yet, after a level start, or
+ 						 * after a level delimiter.
+ 						 */
+ 						if (parse_state != ARRAY_NO_LEVEL &&
+ 							parse_state != ARRAY_LEVEL_STARTED &&
+ 							parse_state != ARRAY_LEVEL_DELIMITED)
+ 							ereport(ERROR,
+ 								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ 								errmsg("malformed array literal: \"%s\"", str)));
+ 						parse_state = ARRAY_LEVEL_STARTED;
  						if (nest_level >= MAXDIM)
  							ereport(ERROR,
  								(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
***************
*** 417,422 ****
--- 473,491 ----
  				case '}':
  					if (!scanning_string)
  					{
+ 						/*
+ 						 * A right brace can occur after an element start,
+ 						 * an element completion, a quoted element completion,
+ 						 * or a level completion.
+ 						 */
+ 						if (parse_state != ARRAY_ELEM_STARTED &&
+ 							parse_state != ARRAY_ELEM_COMPLETED &&
+ 							parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
+ 							parse_state != ARRAY_LEVEL_COMPLETED)
+ 							ereport(ERROR,
+ 								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ 								errmsg("malformed array literal: \"%s\"", str)));
+ 						parse_state = ARRAY_LEVEL_COMPLETED;
  						if (nest_level == 0)
  							ereport(ERROR,
  							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
***************
*** 445,454 ****
  					}
  					break;
  				default:
! 					if (*ptr == typdelim && !scanning_string)
  					{
! 						itemdone = true;
! 						nelems[nest_level - 1]++;
  					}
  					break;
  			}
--- 514,558 ----
  					}
  					break;
  				default:
! 					if (!scanning_string)
  					{
! 						if (*ptr == typdelim)
! 						{
! 							/*
! 							* Delimiters can occur after an element start,
! 							* an element completion, a quoted element
! 							* completion, or a level completion.
! 							*/
! 							if (parse_state != ARRAY_ELEM_STARTED &&
! 								parse_state != ARRAY_ELEM_COMPLETED &&
! 								parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
! 								parse_state != ARRAY_LEVEL_COMPLETED)
! 								ereport(ERROR,
! 									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
! 									errmsg("malformed array literal: \"%s\"", str)));
! 							if (parse_state == ARRAY_LEVEL_COMPLETED)
! 								parse_state = ARRAY_LEVEL_DELIMITED;
! 							else
! 								parse_state = ARRAY_ELEM_DELIMITED;
! 							itemdone = true;
! 							nelems[nest_level - 1]++;
! 						}
! 						else if (!isspace(*ptr))
! 						{
! 							/*
! 							* Other non-space characters must be after a level
! 							* start, after an element start, or after an element
! 							* delimiter. In any case we now must be past an
! 							* element start.
! 							*/
! 							if (parse_state != ARRAY_LEVEL_STARTED &&
! 								parse_state != ARRAY_ELEM_STARTED &&
! 								parse_state != ARRAY_ELEM_DELIMITED)
! 								ereport(ERROR,
! 									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
! 									errmsg("malformed array literal: \"%s\"", str)));
! 							parse_state = ARRAY_ELEM_STARTED;
! 						}
  					}
  					break;
  			}
***************
*** 511,522 ****
--- 615,629 ----
  	while (!eoArray)
  	{
  		bool		itemdone = false;
+ 		bool		itemquoted = false;
  		int			i = -1;
  		char	   *itemstart;
+ 		char	   *eptr;
  
  		/* skip leading whitespace */
  		while (isspace((unsigned char) *ptr))
  			ptr++;
+ 
  		itemstart = ptr;
  
  		while (!itemdone)
***************
*** 547,557 ****
  						char	   *cptr;
  
  						scanning_string = !scanning_string;
! 						/* Crunch the string on top of the quote. */
! 						for (cptr = ptr; *cptr != '\0'; cptr++)
! 							*cptr = *(cptr + 1);
! 						/* Back up to not miss following character. */
! 						ptr--;
  						break;
  					}
  				case '{':
--- 654,668 ----
  						char	   *cptr;
  
  						scanning_string = !scanning_string;
! 						if (scanning_string)
! 						{
! 							itemquoted = true;
! 							/* Crunch the string on top of the first quote. */
! 							for (cptr = ptr; *cptr != '\0'; cptr++)
! 								*cptr = *(cptr + 1);
! 							/* Back up to not miss following character. */
! 							ptr--;
! 						}
  						break;
  					}
  				case '{':
***************
*** 615,620 ****
--- 726,750 ----
  					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
  				   errmsg("malformed array literal: \"%s\"", arrayStr)));
  
+ 		/*
+ 		 * skip trailing whitespace
+ 		 */
+ 		eptr = ptr - 1;
+ 		if (!itemquoted)
+ 		{
+ 			/* skip to last non-NULL, non-space, character */
+ 			while ((*eptr == '\0') || (isspace((unsigned char) *eptr)))
+ 				eptr--;
+ 			*(++eptr) = '\0';
+ 		}
+ 		else
+ 		{
+ 			/* skip to last quote character */
+ 			while (*eptr != '"')
+ 				eptr--;
+ 			*eptr = '\0';
+ 		}
+ 
  		values[i] = FunctionCall3(inputproc,
  								  CStringGetDatum(itemstart),
  								  ObjectIdGetDatum(typioparam),
