From 379695587598a0af4490fef22f17f7f28f7df0ad Mon Sep 17 00:00:00 2001
From: Julien Rouhaud <julien.rouhaud@free.fr>
Date: Thu, 22 Apr 2021 02:15:54 +0800
Subject: [PATCH v3 4/4] Teach sqlol to use the new MODE_SINGLE_QUERY parser
 mode.

This way multi-statements commands using both core parser and sqlol parser can
be supported.

Also add a LOLCODE version of CREATE VIEW viewname AS to easily test
multi-statements commands.
---
 contrib/sqlol/Makefile              |  2 +
 contrib/sqlol/expected/01_sqlol.out | 74 +++++++++++++++++++++++++++++
 contrib/sqlol/repro.sql             | 18 +++++++
 contrib/sqlol/sql/01_sqlol.sql      | 40 ++++++++++++++++
 contrib/sqlol/sqlol.c               | 24 ++++++----
 contrib/sqlol/sqlol_gram.y          | 63 ++++++++++++------------
 contrib/sqlol/sqlol_kwlist.h        |  1 +
 contrib/sqlol/sqlol_scan.l          | 13 ++++-
 contrib/sqlol/sqlol_scanner.h       |  3 +-
 9 files changed, 192 insertions(+), 46 deletions(-)
 create mode 100644 contrib/sqlol/expected/01_sqlol.out
 create mode 100644 contrib/sqlol/repro.sql
 create mode 100644 contrib/sqlol/sql/01_sqlol.sql

diff --git a/contrib/sqlol/Makefile b/contrib/sqlol/Makefile
index 3850ac3fce..eaf94801c2 100644
--- a/contrib/sqlol/Makefile
+++ b/contrib/sqlol/Makefile
@@ -6,6 +6,8 @@ OBJS = \
 	sqlol.o sqlol_gram.o sqlol_scan.o sqlol_keywords.o
 PGFILEDESC = "sqlol - Toy alternative grammar based on LOLCODE"
 
+REGRESS = 01_sqlol
+
 sqlol_gram.h: sqlol_gram.c
 	touch $@
 
diff --git a/contrib/sqlol/expected/01_sqlol.out b/contrib/sqlol/expected/01_sqlol.out
new file mode 100644
index 0000000000..a18eaf6801
--- /dev/null
+++ b/contrib/sqlol/expected/01_sqlol.out
@@ -0,0 +1,74 @@
+LOAD 'sqlol';
+-- create a base table, falling back on core grammar
+CREATE TABLE t1 (id integer, val text);
+-- test a SQLOL statement
+HAI 1.2 I HAS A t1 GIMMEH id, "val" KTHXBYE\g
+ id | val 
+----+-----
+(0 rows)
+
+-- create a view in SQLOL
+HAI 1.2 MAEK I HAS A t1 GIMMEH id, "val" A v0 KTHXBYE\g
+-- combine standard SQL with a trailing SQLOL statement in multi-statements command
+CREATE VIEW v1 AS SELECT * FROM t1\; CREATE VIEW v2 AS SELECT * FROM t1\;HAI 1.2 I HAS A t1 GIMMEH "id", id KTHXBYE\g
+ id | id 
+----+----
+(0 rows)
+
+-- interleave standard SQL and SQLOL commands in multi-statements command
+CREATE VIEW v3 AS SELECT * FROM t1\; HAI 1.2 MAEK I HAS A t1 GIMMEH id, "val" A v4 KTHXBYE CREATE VIEW v5 AS SELECT * FROM t1\;HAI 1.2 I HAS A t1 GIMMEH "id", id KTHXBYE\g
+ id | id 
+----+----
+(0 rows)
+
+-- test MODE_SINGLE_QUERY with no trailing semicolon
+SELECT 1\;SELECT 2\;SELECT 3 \g
+ ?column? 
+----------
+        3
+(1 row)
+
+-- test empty statement ignoring
+\;\;select 1 \g
+ ?column? 
+----------
+        1
+(1 row)
+
+-- check the created views
+\d
+       List of relations
+ Schema | Name | Type  | Owner 
+--------+------+-------+-------
+ public | t1   | table | rjuju
+ public | v0   | view  | rjuju
+ public | v1   | view  | rjuju
+ public | v2   | view  | rjuju
+ public | v3   | view  | rjuju
+ public | v4   | view  | rjuju
+ public | v5   | view  | rjuju
+(7 rows)
+
+--
+-- Error position
+--
+SELECT 1\;err;
+ERROR:  syntax error at or near "err"
+LINE 1: SELECT 1;err;
+                 ^
+-- sqlol won't trigger an error on incorrect GIMME keyword, so core parser will
+-- complain about HAI
+SELECT 1\;HAI 1.2 I HAS A t1 GIMME id KTHXBYE\g
+ERROR:  syntax error at or near "HAI"
+LINE 1: SELECT 1;HAI 1.2 I HAS A t1 GIMME id KTHXBYE
+                 ^
+-- sqlol will trigger the error about too many qualifiers on t1
+SELECT 1\;HAI 1.2 I HAS A some.thing.public.t1 GIMMEH id KTHXBYE\g
+ERROR:  improper qualified name (too many dotted names): some.thing.public.t1
+LINE 1: SELECT 1;HAI 1.2 I HAS A some.thing.public.t1 GIMMEH id KTHX...
+                                 ^
+-- position reported outside of the parser/scanner should be correct too
+SELECT 1\;SELECT * FROM notatable;
+ERROR:  relation "notatable" does not exist
+LINE 1: SELECT 1;SELECT * FROM notatable;
+                               ^
diff --git a/contrib/sqlol/repro.sql b/contrib/sqlol/repro.sql
new file mode 100644
index 0000000000..0ebcb53160
--- /dev/null
+++ b/contrib/sqlol/repro.sql
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS t1 CASCADE;
+
+LOAD 'sqlol';
+
+\;\; SELECT 1\;
+
+CREATE TABLE t1 (id integer, val text);
+
+HAI 1.2 I HAS A t1 GIMMEH id, "val" KTHXBYE\g
+
+HAI 1.2 MAEK I HAS A t1 GIMMEH id, "val" A v0 KTHXBYE\g
+
+CREATE VIEW v1 AS SELECT * FROM t1\; CREATE VIEW v2 AS SELECT * FROM t1\;HAI 1.2 I HAS A t1 GIMMEH "id", id KTHXBYE\g
+
+CREATE VIEW v3 AS SELECT * FROM t1\; HAI 1.2 MAEK I HAS A t1 GIMMEH id, "val" A v4 KTHXBYE CREATE VIEW v5 AS SELECT * FROM t1\;HAI 1.2 I HAS A t1 GIMMEH "id", id KTHXBYE\g
+
+SELECT 1\;SELECT 2\;SELECT 3 \g
+\d
diff --git a/contrib/sqlol/sql/01_sqlol.sql b/contrib/sqlol/sql/01_sqlol.sql
new file mode 100644
index 0000000000..918caf94c0
--- /dev/null
+++ b/contrib/sqlol/sql/01_sqlol.sql
@@ -0,0 +1,40 @@
+LOAD 'sqlol';
+
+-- create a base table, falling back on core grammar
+CREATE TABLE t1 (id integer, val text);
+
+-- test a SQLOL statement
+HAI 1.2 I HAS A t1 GIMMEH id, "val" KTHXBYE\g
+
+-- create a view in SQLOL
+HAI 1.2 MAEK I HAS A t1 GIMMEH id, "val" A v0 KTHXBYE\g
+
+-- combine standard SQL with a trailing SQLOL statement in multi-statements command
+CREATE VIEW v1 AS SELECT * FROM t1\; CREATE VIEW v2 AS SELECT * FROM t1\;HAI 1.2 I HAS A t1 GIMMEH "id", id KTHXBYE\g
+
+-- interleave standard SQL and SQLOL commands in multi-statements command
+CREATE VIEW v3 AS SELECT * FROM t1\; HAI 1.2 MAEK I HAS A t1 GIMMEH id, "val" A v4 KTHXBYE CREATE VIEW v5 AS SELECT * FROM t1\;HAI 1.2 I HAS A t1 GIMMEH "id", id KTHXBYE\g
+
+-- test MODE_SINGLE_QUERY with no trailing semicolon
+SELECT 1\;SELECT 2\;SELECT 3 \g
+
+-- test empty statement ignoring
+\;\;select 1 \g
+
+-- check the created views
+\d
+
+--
+-- Error position
+--
+SELECT 1\;err;
+
+-- sqlol won't trigger an error on incorrect GIMME keyword, so core parser will
+-- complain about HAI
+SELECT 1\;HAI 1.2 I HAS A t1 GIMME id KTHXBYE\g
+
+-- sqlol will trigger the error about too many qualifiers on t1
+SELECT 1\;HAI 1.2 I HAS A some.thing.public.t1 GIMMEH id KTHXBYE\g
+
+-- position reported outside of the parser/scanner should be correct too
+SELECT 1\;SELECT * FROM notatable;
diff --git a/contrib/sqlol/sqlol.c b/contrib/sqlol/sqlol.c
index b986966181..7d4e1b631f 100644
--- a/contrib/sqlol/sqlol.c
+++ b/contrib/sqlol/sqlol.c
@@ -26,7 +26,8 @@ static parser_hook_type prev_parser_hook = NULL;
 void		_PG_init(void);
 void		_PG_fini(void);
 
-static List *sqlol_parser_hook(const char *str, RawParseMode mode);
+static List *sqlol_parser_hook(const char *str, RawParseMode mode, int offset,
+							   bool *error);
 
 
 /*
@@ -54,23 +55,25 @@ _PG_fini(void)
  * sqlol_parser_hook: parse our grammar
  */
 static List *
-sqlol_parser_hook(const char *str, RawParseMode mode)
+sqlol_parser_hook(const char *str, RawParseMode mode, int offset, bool *error)
 {
 	sqlol_yyscan_t yyscanner;
 	sqlol_base_yy_extra_type yyextra;
 	int			yyresult;
 
-	if (mode != RAW_PARSE_DEFAULT)
+	if (mode != RAW_PARSE_DEFAULT && mode != RAW_PARSE_SINGLE_QUERY)
 	{
 		if (prev_parser_hook)
-			return (*prev_parser_hook) (str, mode);
-		else
-			return raw_parser(str, mode);
+			return (*prev_parser_hook) (str, mode, offset, error);
+
+		*error = true;
+		return NIL;
 	}
 
 	/* initialize the flex scanner */
 	yyscanner = sqlol_scanner_init(str, &yyextra.sqlol_yy_extra,
-							 sqlol_ScanKeywords, sqlol_NumScanKeywords);
+							 sqlol_ScanKeywords, sqlol_NumScanKeywords,
+							 offset);
 
 	/* initialize the bison parser */
 	sqlol_parser_init(&yyextra);
@@ -88,9 +91,10 @@ sqlol_parser_hook(const char *str, RawParseMode mode)
 	if (yyresult)
 	{
 		if (prev_parser_hook)
-			return (*prev_parser_hook) (str, mode);
-		else
-			return raw_parser(str, mode);
+			return (*prev_parser_hook) (str, mode, offset, error);
+
+		*error = true;
+		return NIL;
 	}
 
 	return yyextra.parsetree;
diff --git a/contrib/sqlol/sqlol_gram.y b/contrib/sqlol/sqlol_gram.y
index 64d00d14ca..4c36cfef5e 100644
--- a/contrib/sqlol/sqlol_gram.y
+++ b/contrib/sqlol/sqlol_gram.y
@@ -20,6 +20,7 @@
 
 #include "catalog/namespace.h"
 #include "nodes/makefuncs.h"
+#include "catalog/pg_class_d.h"
 
 #include "sqlol_gramparse.h"
 
@@ -106,10 +107,10 @@ static List *check_indirection(List *indirection, sqlol_yyscan_t yyscanner);
 	ResTarget			*target;
 }
 
-%type <node>	stmt toplevel_stmt GimmehStmt simple_gimmeh columnref
+%type <node>	stmt toplevel_stmt GimmehStmt MaekStmt simple_gimmeh columnref
 				indirection_el
 
-%type <list>	parse_toplevel stmtmulti gimmeh_list indirection
+%type <list>	parse_toplevel rawstmt gimmeh_list indirection
 
 %type <range>	qualified_name
 
@@ -134,22 +135,19 @@ static List *check_indirection(List *indirection, sqlol_yyscan_t yyscanner);
  */
 
 /* ordinary key words in alphabetical order */
-%token <keyword> A GIMMEH HAI HAS I KTHXBYE
-
+%token <keyword> A GIMMEH HAI HAS I KTHXBYE MAEK
 
 %%
 
 /*
  *	The target production for the whole parse.
- *
- * Ordinarily we parse a list of statements, but if we see one of the
- * special MODE_XXX symbols as first token, we parse something else.
- * The options here correspond to enum RawParseMode, which see for details.
  */
 parse_toplevel:
-			stmtmulti
+			rawstmt
 			{
 				pg_yyget_extra(yyscanner)->parsetree = $1;
+
+				YYACCEPT;
 			}
 		;
 
@@ -163,24 +161,11 @@ parse_toplevel:
  * we'd get -1 for the location in such cases.
  * We also take care to discard empty statements entirely.
  */
-stmtmulti:	stmtmulti KTHXBYE toplevel_stmt
-				{
-					if ($1 != NIL)
-					{
-						/* update length of previous stmt */
-						updateRawStmtEnd(llast_node(RawStmt, $1), @2);
-					}
-					if ($3 != NULL)
-						$$ = lappend($1, makeRawStmt($3, @2 + 1));
-					else
-						$$ = $1;
-				}
-			| toplevel_stmt
+rawstmt:	toplevel_stmt KTHXBYE
 				{
-					if ($1 != NULL)
-						$$ = list_make1(makeRawStmt($1, 0));
-					else
-						$$ = NIL;
+					RawStmt *raw = makeRawStmt($1, 0);
+					updateRawStmtEnd(raw, @2 + 7);
+					$$ = list_make1(raw);
 				}
 		;
 
@@ -189,13 +174,12 @@ stmtmulti:	stmtmulti KTHXBYE toplevel_stmt
  * those words have different meanings in function bodys.
  */
 toplevel_stmt:
-			stmt
+			HAI FCONST stmt { $$ = $3; }
 		;
 
 stmt:
 			GimmehStmt
-			| /*EMPTY*/
-				{ $$ = NULL; }
+			| MaekStmt
 		;
 
 /*****************************************************************************
@@ -209,12 +193,11 @@ GimmehStmt:
 		;
 
 simple_gimmeh:
-			HAI FCONST I HAS A qualified_name
-			GIMMEH gimmeh_list
+			I HAS A qualified_name GIMMEH gimmeh_list
 				{
 					SelectStmt *n = makeNode(SelectStmt);
-					n->targetList = $8;
-					n->fromClause = list_make1($6);
+					n->targetList = $6;
+					n->fromClause = list_make1($4);
 					$$ = (Node *)n;
 				}
 		;
@@ -233,6 +216,20 @@ gimmeh_el:
 				$$->location = @1;
 			}
 
+MaekStmt:
+		MAEK GimmehStmt A qualified_name
+			{
+				ViewStmt *n = makeNode(ViewStmt);
+				n->view = $4;
+				n->view->relpersistence = RELPERSISTENCE_PERMANENT;
+				n->aliases = NIL;
+				n->query = $2;
+				n->replace = false;
+				n->options = NIL;
+				n->withCheckOption = false;
+				$$ = (Node *) n;
+			}
+
 qualified_name:
 			ColId
 				{
diff --git a/contrib/sqlol/sqlol_kwlist.h b/contrib/sqlol/sqlol_kwlist.h
index 2de3893ee4..8b50d88df9 100644
--- a/contrib/sqlol/sqlol_kwlist.h
+++ b/contrib/sqlol/sqlol_kwlist.h
@@ -19,3 +19,4 @@ PG_KEYWORD("hai", HAI, RESERVED_KEYWORD)
 PG_KEYWORD("has", HAS, UNRESERVED_KEYWORD)
 PG_KEYWORD("i", I, UNRESERVED_KEYWORD)
 PG_KEYWORD("kthxbye", KTHXBYE, UNRESERVED_KEYWORD)
+PG_KEYWORD("maek", MAEK, UNRESERVED_KEYWORD)
diff --git a/contrib/sqlol/sqlol_scan.l b/contrib/sqlol/sqlol_scan.l
index a7088b8390..e6d4d53446 100644
--- a/contrib/sqlol/sqlol_scan.l
+++ b/contrib/sqlol/sqlol_scan.l
@@ -412,8 +412,10 @@ sqlol_yyscan_t
 sqlol_scanner_init(const char *str,
 			 sqlol_yy_extra_type *yyext,
 			 const sqlol_ScanKeyword *keywords,
-			 int num_keywords)
+			 int num_keywords,
+			 int offset)
 {
+	YY_BUFFER_STATE state;
 	Size		slen = strlen(str);
 	yyscan_t	scanner;
 
@@ -432,13 +434,20 @@ sqlol_scanner_init(const char *str,
 	yyext->scanbuflen = slen;
 	memcpy(yyext->scanbuf, str, slen);
 	yyext->scanbuf[slen] = yyext->scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
-	yy_scan_buffer(yyext->scanbuf, slen + 2, scanner);
+	state = yy_scan_buffer(yyext->scanbuf, slen + 2, scanner);
 
 	/* initialize literal buffer to a reasonable but expansible size */
 	yyext->literalalloc = 1024;
 	yyext->literalbuf = (char *) palloc(yyext->literalalloc);
 	yyext->literallen = 0;
 
+	/*
+	 * Adjust the offset in the input string.  This is required in single-query
+	 * mode, as we need to register the same token locations as we would have
+	 * in normal mode with multi-statement query string.
+	 */
+	state->yy_buf_pos += offset;
+
 	return scanner;
 }
 
diff --git a/contrib/sqlol/sqlol_scanner.h b/contrib/sqlol/sqlol_scanner.h
index 0a497e9d91..57f95867ee 100644
--- a/contrib/sqlol/sqlol_scanner.h
+++ b/contrib/sqlol/sqlol_scanner.h
@@ -108,7 +108,8 @@ extern PGDLLIMPORT const uint16 sqlol_ScanKeywordTokens[];
 extern sqlol_yyscan_t sqlol_scanner_init(const char *str,
 								  sqlol_yy_extra_type *yyext,
 								  const sqlol_ScanKeyword *keywords,
-								  int num_keywords);
+								  int num_keywords,
+								  int offset);
 extern void sqlol_scanner_finish(sqlol_yyscan_t yyscanner);
 extern int	sqlol_yylex(sqlol_YYSTYPE *lvalp, YYLTYPE *llocp,
 					   sqlol_yyscan_t yyscanner);
-- 
2.31.1

