diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 392eb70..de84b77 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3802,6 +3802,51 @@ SELECT * FROM parent WHERE key = 2400; + + force_parallel_mode (enum) + + force_parallel_mode configuration parameter + + + + + Allows the use of parallel queries for testing purposes even in cases + where no performance benefit is expected. + The allowed values of force_parallel_mode are + off (use parallel mode only when it is expected to improve + performance), on (force parallel query for all queries + for which it is thought to be safe), and regress (like + on, but with additional behavior changes to facilitate automated + regression testing). + + + + More specifically, setting this value to on will add + a Gather node to the top of any query plan for which this + appears to be safe, so that the query runs inside of a parallel worker. + Even when a parallel worker is not available or cannot be used, + operations such as starting a subtransaction that would be prohibited + in a parallel query context will be prohibited unless the planner + believes that this will cause the query to fail. If failures or + unexpected results occur when this option is set, some functions used + by the query may need to be marked PARALLEL UNSAFE + (or, possibly, PARALLEL RESTRICTED). + + + + Setting this value to regress has all of the same effects + as setting it to on plus some additional effect that are + intended to facilitate automated regression testing. Normally, + messages from a parallel worker are prefixed with a context line, + but a setting of regress suppresses this to guarantee + reproducible results. Also, the Gather nodes added to + plans by this setting are hidden from the EXPLAIN output + so that the output matches what would be obtained if this setting + were turned off. + + + + diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml index 2adeeff..6a3b622 100644 --- a/doc/src/sgml/ddl.sgml +++ b/doc/src/sgml/ddl.sgml @@ -495,8 +495,8 @@ CREATE TABLE products ( - Unique constraints ensure that the data contained in a column or a - group of columns is unique with respect to all the rows in the + Unique constraints ensure that the data contained in a column, or a + group of columns, is unique among all the rows in the table. The syntax is: CREATE TABLE products ( @@ -518,8 +518,8 @@ CREATE TABLE products ( - If a unique constraint refers to a group of columns, the columns - are listed separated by commas: + To define a unique constraint for a group of columns, write it as a + table constraint with the column names separated by commas: CREATE TABLE example ( a integer, @@ -546,9 +546,10 @@ CREATE TABLE products ( Adding a unique constraint will automatically create a unique B-tree - index on the column or group of columns used in the constraint. - A uniqueness constraint on only some rows can be enforced by creating - a partial index. + index on the column or group of columns listed in the constraint. + A uniqueness restriction covering only some rows cannot be written as + a unique constraint, but it is possible to enforce such a restriction by + creating a unique partial index. @@ -557,10 +558,10 @@ CREATE TABLE products ( - In general, a unique constraint is violated when there is more than + In general, a unique constraint is violated if there is more than one row in the table where the values of all of the columns included in the constraint are equal. - However, two null values are not considered equal in this + However, two null values are never considered equal in this comparison. That means even in the presence of a unique constraint it is possible to store duplicate rows that contain a null value in at least one of the constrained @@ -584,8 +585,9 @@ CREATE TABLE products ( - Technically, a primary key constraint is simply a combination of a - unique constraint and a not-null constraint. So, the following + A primary key constraint indicates that a column, or group of columns, + can be used as a unique identifier for rows in the table. This + requires that the values be both unique and not null. So, the following two table definitions accept the same data: CREATE TABLE products ( @@ -605,7 +607,7 @@ CREATE TABLE products ( - Primary keys can also constrain more than one column; the syntax + Primary keys can span more than one column; the syntax is similar to unique constraints: CREATE TABLE example ( @@ -618,31 +620,31 @@ CREATE TABLE example ( - A primary key indicates that a column or group of columns can be - used as a unique identifier for rows in the table. (This is a - direct consequence of the definition of a primary key. Note that - a unique constraint does not, by itself, provide a unique identifier - because it does not exclude null values.) This is useful both for - documentation purposes and for client applications. For example, - a GUI application that allows modifying row values probably needs - to know the primary key of a table to be able to identify rows - uniquely. - - - Adding a primary key will automatically create a unique B-tree index - on the column or group of columns used in the primary key. + on the column or group of columns listed in the primary key, and will + force the column(s) to be marked NOT NULL. A table can have at most one primary key. (There can be any number - of unique and not-null constraints, which are functionally the same - thing, but only one can be identified as the primary key.) + of unique and not-null constraints, which are functionally almost the + same thing, but only one can be identified as the primary key.) Relational database theory dictates that every table must have a primary key. This rule is not enforced by PostgreSQL, but it is usually best to follow it. + + + Primary keys are useful both for + documentation purposes and for client applications. For example, + a GUI application that allows modifying row values probably needs + to know the primary key of a table to be able to identify rows + uniquely. There are also various ways in which the database system + makes use of a primary key if one has been declared; for example, + the primary key defines the default target column(s) for foreign keys + referencing its table. + diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index f0c94d5..cd234db 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -506,25 +506,25 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI PRIMARY KEY ( column_name [, ... ] ) (table constraint) - The primary key constraint specifies that a column or columns of a table - can contain only unique (non-duplicate), nonnull values. - Technically, PRIMARY KEY is merely a - combination of UNIQUE and NOT NULL, but - identifying a set of columns as primary key also provides - metadata about the design of the schema, as a primary key - implies that other tables - can rely on this set of columns as a unique identifier for rows. - - - - Only one primary key can be specified for a table, whether as a + The PRIMARY KEY constraint specifies that a column or + columns of a table can contain only unique (non-duplicate), nonnull + values. Only one primary key can be specified for a table, whether as a column constraint or a table constraint. The primary key constraint should name a set of columns that is - different from other sets of columns named by any unique - constraint defined for the same table. + different from the set of columns named by any unique + constraint defined for the same table. (Otherwise, the unique + constraint is redundant and will be discarded.) + + + + PRIMARY KEY enforces the same data constraints as + a combination of UNIQUE and NOT NULL, but + identifying a set of columns as the primary key also provides metadata + about the design of the schema, since a primary key implies that other + tables can rely on this set of columns as a unique identifier for rows. diff --git a/doc/src/sgml/release-9.1.sgml b/doc/src/sgml/release-9.1.sgml index f5dab31..4ab11e2 100644 --- a/doc/src/sgml/release-9.1.sgml +++ b/doc/src/sgml/release-9.1.sgml @@ -1,6 +1,506 @@ + + Release 9.1.20 + + + Release Date + 2016-02-11 + + + + This release contains a variety of fixes from 9.1.19. + For information about new features in the 9.1 major release, see + . + + + + Migration to Version 9.1.20 + + + A dump/restore is not required for those running 9.1.X. + + + + However, if you are upgrading from a version earlier than 9.1.16, + see . + + + + + + Changes + + + + + + Perform an immediate shutdown if the postmaster.pid file + is removed (Tom Lane) + + + + The postmaster now checks every minute or so + that postmaster.pid is still there and still contains its + own PID. If not, it performs an immediate shutdown, as though it had + received SIGQUIT. The main motivation for this change + is to ensure that failed buildfarm runs will get cleaned up without + manual intervention; but it also serves to limit the bad effects if a + DBA forcibly removes postmaster.pid and then starts a new + postmaster. + + + + + + In SERIALIZABLE transaction isolation mode, serialization + anomalies could be missed due to race conditions during insertions + (Kevin Grittner, Thomas Munro) + + + + + + Fix failure to emit appropriate WAL records when doing ALTER + TABLE ... SET TABLESPACE for unlogged relations (Michael Paquier, + Andres Freund) + + + + Even though the relation's data is unlogged, the move must be logged or + the relation will be inaccessible after a standby is promoted to master. + + + + + + Fix possible misinitialization of unlogged relations at the end of + crash recovery (Andres Freund, Michael Paquier) + + + + + + Fix ALTER COLUMN TYPE to reconstruct inherited check + constraints properly (Tom Lane) + + + + + + Fix REASSIGN OWNED to change ownership of composite types + properly (Álvaro Herrera) + + + + + + Fix REASSIGN OWNED and ALTER OWNER to correctly + update granted-permissions lists when changing owners of data types, + foreign data wrappers, or foreign servers (Bruce Momjian, + Álvaro Herrera) + + + + + + Fix REASSIGN OWNED to ignore foreign user mappings, + rather than fail (Álvaro Herrera) + + + + + + Add more defenses against bad planner cost estimates for GIN index + scans when the index's internal statistics are very out-of-date + (Tom Lane) + + + + + + Make planner cope with hypothetical GIN indexes suggested by an index + advisor plug-in (Julien Rouhaud) + + + + + + Fix dumping of whole-row Vars in ROW() + and VALUES() lists (Tom Lane) + + + + + + Fix possible internal overflow in numeric division + (Dean Rasheed) + + + + + + Fix enforcement of restrictions inside parentheses within regular + expression lookahead constraints (Tom Lane) + + + + Lookahead constraints aren't allowed to contain backrefs, and + parentheses within them are always considered non-capturing, according + to the manual. However, the code failed to handle these cases properly + inside a parenthesized subexpression, and would give unexpected + results. + + + + + + Conversion of regular expressions to indexscan bounds could produce + incorrect bounds from regexps containing lookahead constraints + (Tom Lane) + + + + + + Fix regular-expression compiler to handle loops of constraint arcs + (Tom Lane) + + + + The code added for CVE-2007-4772 was both incomplete, in that it didn't + handle loops involving more than one state, and incorrect, in that it + could cause assertion failures (though there seem to be no bad + consequences of that in a non-assert build). Multi-state loops would + cause the compiler to run until the query was canceled or it reached + the too-many-states error condition. + + + + + + Improve memory-usage accounting in regular-expression compiler + (Tom Lane) + + + + This causes the code to emit regular expression is too + complex errors in some cases that previously used unreasonable + amounts of time and memory. + + + + + + Improve performance of regular-expression compiler (Tom Lane) + + + + + + Make %h and %r escapes + in log_line_prefix work for messages emitted due + to log_connections (Tom Lane) + + + + Previously, %h/%r started to work just after a + new session had emitted the connection received log message; + now they work for that message too. + + + + + + On Windows, ensure the shared-memory mapping handle gets closed in + child processes that don't need it (Tom Lane, Amit Kapila) + + + + This oversight resulted in failure to recover from crashes + whenever logging_collector is turned on. + + + + + + Fix possible failure to detect socket EOF in non-blocking mode on + Windows (Tom Lane) + + + + It's not entirely clear whether this problem can happen in pre-9.5 + branches, but if it did, the symptom would be that a walsender process + would wait indefinitely rather than noticing a loss of connection. + + + + + + Avoid leaking a token handle during SSPI authentication + (Christian Ullrich) + + + + + + In psql, ensure that libreadline's idea + of the screen size is updated when the terminal window size changes + (Merlin Moncure) + + + + Previously, libreadline did not notice if the window + was resized during query output, leading to strange behavior during + later input of multiline queries. + + + + + + Fix psql's \det command to interpret its + pattern argument the same way as other \d commands with + potentially schema-qualified patterns do (Reece Hart) + + + + + + Avoid possible crash in psql's \c command + when previous connection was via Unix socket and command specifies a + new hostname and same username (Tom Lane) + + + + + + In pg_ctl start -w, test child process status directly + rather than relying on heuristics (Tom Lane, Michael Paquier) + + + + Previously, pg_ctl relied on an assumption that the new + postmaster would always create postmaster.pid within five + seconds. But that can fail on heavily-loaded systems, + causing pg_ctl to report incorrectly that the + postmaster failed to start. + + + + Except on Windows, this change also means that a pg_ctl start + -w done immediately after another such command will now reliably + fail, whereas previously it would report success if done within two + seconds of the first command. + + + + + + In pg_ctl start -w, don't attempt to use a wildcard listen + address to connect to the postmaster (Kondo Yuta) + + + + On Windows, pg_ctl would fail to detect postmaster + startup if listen_addresses is set to 0.0.0.0 + or ::, because it would try to use that value verbatim as + the address to connect to, which doesn't work. Instead assume + that 127.0.0.1 or ::1, respectively, is the + right thing to use. + + + + + + In pg_ctl on Windows, check service status to decide + where to send output, rather than checking if standard output is a + terminal (Michael Paquier) + + + + + + In pg_dump and pg_basebackup, adopt + the GNU convention for handling tar-archive members exceeding 8GB + (Tom Lane) + + + + The POSIX standard for tar file format does not allow + archive member files to exceed 8GB, but most modern implementations + of tar support an extension that fixes that. Adopt + this extension so that pg_dump with + + + + + Fix assorted corner-case bugs in pg_dump's processing + of extension member objects (Tom Lane) + + + + + + Make pg_dump mark a view's triggers as needing to be + processed after its rule, to prevent possible failure during + parallel pg_restore (Tom Lane) + + + + + + Ensure that relation option values are properly quoted + in pg_dump (Kouhei Sutou, Tom Lane) + + + + A reloption value that isn't a simple identifier or number could lead + to dump/reload failures due to syntax errors in CREATE statements + issued by pg_dump. This is not an issue with any + reloption currently supported by core PostgreSQL, but + extensions could allow reloptions that cause the problem. + + + + + + Fix pg_upgrade's file-copying code to handle errors + properly on Windows (Bruce Momjian) + + + + + + Install guards in pgbench against corner-case overflow + conditions during evaluation of script-specified division or modulo + operators (Fabien Coelho, Michael Paquier) + + + + + + Prevent certain PL/Java parameters from being set by + non-superusers (Noah Misch) + + + + This change mitigates a PL/Java security bug + (CVE-2016-0766), which was fixed in PL/Java by marking + these parameters as superuser-only. To fix the security hazard for + sites that update PostgreSQL more frequently + than PL/Java, make the core code aware of them also. + + + + + + Improve libpq's handling of out-of-memory situations + (Michael Paquier, Amit Kapila, Heikki Linnakangas) + + + + + + Fix order of arguments + in ecpg-generated typedef statements + (Michael Meskes) + + + + + + Use %g not %f format + in ecpg's PGTYPESnumeric_from_double() + (Tom Lane) + + + + + + Fix ecpg-supplied header files to not contain comments + continued from a preprocessor directive line onto the next line + (Michael Meskes) + + + + Such a comment is rejected by ecpg. It's not yet clear + whether ecpg itself should be changed. + + + + + + Ensure that contrib/pgcrypto's crypt() + function can be interrupted by query cancel (Andreas Karlsson) + + + + + + Accept flex versions later than 2.5.x + (Tom Lane, Michael Paquier) + + + + Now that flex 2.6.0 has been released, the version checks in our build + scripts needed to be adjusted. + + + + + + Install our missing script where PGXS builds can find it + (Jim Nasby) + + + + This allows sane behavior in a PGXS build done on a machine where build + tools such as bison are missing. + + + + + + Ensure that dynloader.h is included in the installed + header files in MSVC builds (Bruce Momjian, Michael Paquier) + + + + + + Add variant regression test expected-output file to match behavior of + current libxml2 (Tom Lane) + + + + The fix for libxml2's CVE-2015-7499 causes it not to + output error context reports in some cases where it used to do so. + This seems to be a bug, but we'll probably have to live with it for + some time, so work around it. + + + + + + Update time zone data files to tzdata release 2016a for + DST law changes in Cayman Islands, Metlakatla, and Trans-Baikal + Territory (Zabaykalsky Krai), plus historical corrections for Pakistan. + + + + + + + + Release 9.1.19 diff --git a/doc/src/sgml/release-9.2.sgml b/doc/src/sgml/release-9.2.sgml index bee0c9e..78154cb 100644 --- a/doc/src/sgml/release-9.2.sgml +++ b/doc/src/sgml/release-9.2.sgml @@ -1,6 +1,537 @@ + + Release 9.2.15 + + + Release Date + 2016-02-11 + + + + This release contains a variety of fixes from 9.2.14. + For information about new features in the 9.2 major release, see + . + + + + Migration to Version 9.2.15 + + + A dump/restore is not required for those running 9.2.X. + + + + However, if you are upgrading from a version earlier than 9.2.11, + see . + + + + + + Changes + + + + + + Perform an immediate shutdown if the postmaster.pid file + is removed (Tom Lane) + + + + The postmaster now checks every minute or so + that postmaster.pid is still there and still contains its + own PID. If not, it performs an immediate shutdown, as though it had + received SIGQUIT. The main motivation for this change + is to ensure that failed buildfarm runs will get cleaned up without + manual intervention; but it also serves to limit the bad effects if a + DBA forcibly removes postmaster.pid and then starts a new + postmaster. + + + + + + In SERIALIZABLE transaction isolation mode, serialization + anomalies could be missed due to race conditions during insertions + (Kevin Grittner, Thomas Munro) + + + + + + Fix failure to emit appropriate WAL records when doing ALTER + TABLE ... SET TABLESPACE for unlogged relations (Michael Paquier, + Andres Freund) + + + + Even though the relation's data is unlogged, the move must be logged or + the relation will be inaccessible after a standby is promoted to master. + + + + + + Fix possible misinitialization of unlogged relations at the end of + crash recovery (Andres Freund, Michael Paquier) + + + + + + Fix ALTER COLUMN TYPE to reconstruct inherited check + constraints properly (Tom Lane) + + + + + + Fix REASSIGN OWNED to change ownership of composite types + properly (Álvaro Herrera) + + + + + + Fix REASSIGN OWNED and ALTER OWNER to correctly + update granted-permissions lists when changing owners of data types, + foreign data wrappers, or foreign servers (Bruce Momjian, + Álvaro Herrera) + + + + + + Fix REASSIGN OWNED to ignore foreign user mappings, + rather than fail (Álvaro Herrera) + + + + + + Add more defenses against bad planner cost estimates for GIN index + scans when the index's internal statistics are very out-of-date + (Tom Lane) + + + + + + Make planner cope with hypothetical GIN indexes suggested by an index + advisor plug-in (Julien Rouhaud) + + + + + + Fix dumping of whole-row Vars in ROW() + and VALUES() lists (Tom Lane) + + + + + + Fix possible internal overflow in numeric division + (Dean Rasheed) + + + + + + Fix enforcement of restrictions inside parentheses within regular + expression lookahead constraints (Tom Lane) + + + + Lookahead constraints aren't allowed to contain backrefs, and + parentheses within them are always considered non-capturing, according + to the manual. However, the code failed to handle these cases properly + inside a parenthesized subexpression, and would give unexpected + results. + + + + + + Conversion of regular expressions to indexscan bounds could produce + incorrect bounds from regexps containing lookahead constraints + (Tom Lane) + + + + + + Fix regular-expression compiler to handle loops of constraint arcs + (Tom Lane) + + + + The code added for CVE-2007-4772 was both incomplete, in that it didn't + handle loops involving more than one state, and incorrect, in that it + could cause assertion failures (though there seem to be no bad + consequences of that in a non-assert build). Multi-state loops would + cause the compiler to run until the query was canceled or it reached + the too-many-states error condition. + + + + + + Improve memory-usage accounting in regular-expression compiler + (Tom Lane) + + + + This causes the code to emit regular expression is too + complex errors in some cases that previously used unreasonable + amounts of time and memory. + + + + + + Improve performance of regular-expression compiler (Tom Lane) + + + + + + Make %h and %r escapes + in log_line_prefix work for messages emitted due + to log_connections (Tom Lane) + + + + Previously, %h/%r started to work just after a + new session had emitted the connection received log message; + now they work for that message too. + + + + + + On Windows, ensure the shared-memory mapping handle gets closed in + child processes that don't need it (Tom Lane, Amit Kapila) + + + + This oversight resulted in failure to recover from crashes + whenever logging_collector is turned on. + + + + + + Fix possible failure to detect socket EOF in non-blocking mode on + Windows (Tom Lane) + + + + It's not entirely clear whether this problem can happen in pre-9.5 + branches, but if it did, the symptom would be that a walsender process + would wait indefinitely rather than noticing a loss of connection. + + + + + + Avoid leaking a token handle during SSPI authentication + (Christian Ullrich) + + + + + + In psql, ensure that libreadline's idea + of the screen size is updated when the terminal window size changes + (Merlin Moncure) + + + + Previously, libreadline did not notice if the window + was resized during query output, leading to strange behavior during + later input of multiline queries. + + + + + + Fix psql's \det command to interpret its + pattern argument the same way as other \d commands with + potentially schema-qualified patterns do (Reece Hart) + + + + + + Avoid possible crash in psql's \c command + when previous connection was via Unix socket and command specifies a + new hostname and same username (Tom Lane) + + + + + + In pg_ctl start -w, test child process status directly + rather than relying on heuristics (Tom Lane, Michael Paquier) + + + + Previously, pg_ctl relied on an assumption that the new + postmaster would always create postmaster.pid within five + seconds. But that can fail on heavily-loaded systems, + causing pg_ctl to report incorrectly that the + postmaster failed to start. + + + + Except on Windows, this change also means that a pg_ctl start + -w done immediately after another such command will now reliably + fail, whereas previously it would report success if done within two + seconds of the first command. + + + + + + In pg_ctl start -w, don't attempt to use a wildcard listen + address to connect to the postmaster (Kondo Yuta) + + + + On Windows, pg_ctl would fail to detect postmaster + startup if listen_addresses is set to 0.0.0.0 + or ::, because it would try to use that value verbatim as + the address to connect to, which doesn't work. Instead assume + that 127.0.0.1 or ::1, respectively, is the + right thing to use. + + + + + + In pg_ctl on Windows, check service status to decide + where to send output, rather than checking if standard output is a + terminal (Michael Paquier) + + + + + + In pg_dump and pg_basebackup, adopt + the GNU convention for handling tar-archive members exceeding 8GB + (Tom Lane) + + + + The POSIX standard for tar file format does not allow + archive member files to exceed 8GB, but most modern implementations + of tar support an extension that fixes that. Adopt + this extension so that pg_dump with + + + + + Fix assorted corner-case bugs in pg_dump's processing + of extension member objects (Tom Lane) + + + + + + Make pg_dump mark a view's triggers as needing to be + processed after its rule, to prevent possible failure during + parallel pg_restore (Tom Lane) + + + + + + Ensure that relation option values are properly quoted + in pg_dump (Kouhei Sutou, Tom Lane) + + + + A reloption value that isn't a simple identifier or number could lead + to dump/reload failures due to syntax errors in CREATE statements + issued by pg_dump. This is not an issue with any + reloption currently supported by core PostgreSQL, but + extensions could allow reloptions that cause the problem. + + + + + + Fix pg_upgrade's file-copying code to handle errors + properly on Windows (Bruce Momjian) + + + + + + Install guards in pgbench against corner-case overflow + conditions during evaluation of script-specified division or modulo + operators (Fabien Coelho, Michael Paquier) + + + + + + Fix failure to localize messages emitted + by pg_receivexlog and pg_recvlogical + (Ioseph Kim) + + + + + + Avoid dump/reload problems when using both plpython2 + and plpython3 (Tom Lane) + + + + In principle, both versions of PL/Python can be used in + the same database, though not in the same session (because the two + versions of libpython cannot safely be used concurrently). + However, pg_restore and pg_upgrade both + do things that can fall foul of the same-session restriction. Work + around that by changing the timing of the check. + + + + + + Fix PL/Python regression tests to pass with Python 3.5 + (Peter Eisentraut) + + + + + + Prevent certain PL/Java parameters from being set by + non-superusers (Noah Misch) + + + + This change mitigates a PL/Java security bug + (CVE-2016-0766), which was fixed in PL/Java by marking + these parameters as superuser-only. To fix the security hazard for + sites that update PostgreSQL more frequently + than PL/Java, make the core code aware of them also. + + + + + + Improve libpq's handling of out-of-memory situations + (Michael Paquier, Amit Kapila, Heikki Linnakangas) + + + + + + Fix order of arguments + in ecpg-generated typedef statements + (Michael Meskes) + + + + + + Use %g not %f format + in ecpg's PGTYPESnumeric_from_double() + (Tom Lane) + + + + + + Fix ecpg-supplied header files to not contain comments + continued from a preprocessor directive line onto the next line + (Michael Meskes) + + + + Such a comment is rejected by ecpg. It's not yet clear + whether ecpg itself should be changed. + + + + + + Ensure that contrib/pgcrypto's crypt() + function can be interrupted by query cancel (Andreas Karlsson) + + + + + + Accept flex versions later than 2.5.x + (Tom Lane, Michael Paquier) + + + + Now that flex 2.6.0 has been released, the version checks in our build + scripts needed to be adjusted. + + + + + + Install our missing script where PGXS builds can find it + (Jim Nasby) + + + + This allows sane behavior in a PGXS build done on a machine where build + tools such as bison are missing. + + + + + + Ensure that dynloader.h is included in the installed + header files in MSVC builds (Bruce Momjian, Michael Paquier) + + + + + + Add variant regression test expected-output file to match behavior of + current libxml2 (Tom Lane) + + + + The fix for libxml2's CVE-2015-7499 causes it not to + output error context reports in some cases where it used to do so. + This seems to be a bug, but we'll probably have to live with it for + some time, so work around it. + + + + + + Update time zone data files to tzdata release 2016a for + DST law changes in Cayman Islands, Metlakatla, and Trans-Baikal + Territory (Zabaykalsky Krai), plus historical corrections for Pakistan. + + + + + + + + Release 9.2.14 diff --git a/doc/src/sgml/release-9.3.sgml b/doc/src/sgml/release-9.3.sgml index b637908..0f4907d 100644 --- a/doc/src/sgml/release-9.3.sgml +++ b/doc/src/sgml/release-9.3.sgml @@ -1,6 +1,616 @@ + + Release 9.3.11 + + + Release Date + 2016-02-11 + + + + This release contains a variety of fixes from 9.3.10. + For information about new features in the 9.3 major release, see + . + + + + Migration to Version 9.3.11 + + + A dump/restore is not required for those running 9.3.X. + + + + However, if you are upgrading from a version earlier than 9.3.9, + see . + + + + + + Changes + + + + + + Perform an immediate shutdown if the postmaster.pid file + is removed (Tom Lane) + + + + The postmaster now checks every minute or so + that postmaster.pid is still there and still contains its + own PID. If not, it performs an immediate shutdown, as though it had + received SIGQUIT. The main motivation for this change + is to ensure that failed buildfarm runs will get cleaned up without + manual intervention; but it also serves to limit the bad effects if a + DBA forcibly removes postmaster.pid and then starts a new + postmaster. + + + + + + In SERIALIZABLE transaction isolation mode, serialization + anomalies could be missed due to race conditions during insertions + (Kevin Grittner, Thomas Munro) + + + + + + Fix failure to emit appropriate WAL records when doing ALTER + TABLE ... SET TABLESPACE for unlogged relations (Michael Paquier, + Andres Freund) + + + + Even though the relation's data is unlogged, the move must be logged or + the relation will be inaccessible after a standby is promoted to master. + + + + + + Fix possible misinitialization of unlogged relations at the end of + crash recovery (Andres Freund, Michael Paquier) + + + + + + Ensure walsender slots are fully re-initialized when being re-used + (Magnus Hagander) + + + + + + Fix ALTER COLUMN TYPE to reconstruct inherited check + constraints properly (Tom Lane) + + + + + + Fix REASSIGN OWNED to change ownership of composite types + properly (Álvaro Herrera) + + + + + + Fix REASSIGN OWNED and ALTER OWNER to correctly + update granted-permissions lists when changing owners of data types, + foreign data wrappers, or foreign servers (Bruce Momjian, + Álvaro Herrera) + + + + + + Fix REASSIGN OWNED to ignore foreign user mappings, + rather than fail (Álvaro Herrera) + + + + + + Fix possible crash after doing query rewrite for an updatable view + (Stephen Frost) + + + + + + Fix planner's handling of LATERAL references (Tom + Lane) + + + + This fixes some corner cases that led to failed to build any + N-way joins or could not devise a query plan planner + failures. + + + + + + Add more defenses against bad planner cost estimates for GIN index + scans when the index's internal statistics are very out-of-date + (Tom Lane) + + + + + + Make planner cope with hypothetical GIN indexes suggested by an index + advisor plug-in (Julien Rouhaud) + + + + + + Speed up generation of unique table aliases in EXPLAIN and + rule dumping, and ensure that generated aliases do not + exceed NAMEDATALEN (Tom Lane) + + + + + + Fix dumping of whole-row Vars in ROW() + and VALUES() lists (Tom Lane) + + + + + + Fix possible internal overflow in numeric division + (Dean Rasheed) + + + + + + Fix enforcement of restrictions inside parentheses within regular + expression lookahead constraints (Tom Lane) + + + + Lookahead constraints aren't allowed to contain backrefs, and + parentheses within them are always considered non-capturing, according + to the manual. However, the code failed to handle these cases properly + inside a parenthesized subexpression, and would give unexpected + results. + + + + + + Conversion of regular expressions to indexscan bounds could produce + incorrect bounds from regexps containing lookahead constraints + (Tom Lane) + + + + + + Fix regular-expression compiler to handle loops of constraint arcs + (Tom Lane) + + + + The code added for CVE-2007-4772 was both incomplete, in that it didn't + handle loops involving more than one state, and incorrect, in that it + could cause assertion failures (though there seem to be no bad + consequences of that in a non-assert build). Multi-state loops would + cause the compiler to run until the query was canceled or it reached + the too-many-states error condition. + + + + + + Improve memory-usage accounting in regular-expression compiler + (Tom Lane) + + + + This causes the code to emit regular expression is too + complex errors in some cases that previously used unreasonable + amounts of time and memory. + + + + + + Improve performance of regular-expression compiler (Tom Lane) + + + + + + Make %h and %r escapes + in log_line_prefix work for messages emitted due + to log_connections (Tom Lane) + + + + Previously, %h/%r started to work just after a + new session had emitted the connection received log message; + now they work for that message too. + + + + + + On Windows, ensure the shared-memory mapping handle gets closed in + child processes that don't need it (Tom Lane, Amit Kapila) + + + + This oversight resulted in failure to recover from crashes + whenever logging_collector is turned on. + + + + + + Fix possible failure to detect socket EOF in non-blocking mode on + Windows (Tom Lane) + + + + It's not entirely clear whether this problem can happen in pre-9.5 + branches, but if it did, the symptom would be that a walsender process + would wait indefinitely rather than noticing a loss of connection. + + + + + + Avoid leaking a token handle during SSPI authentication + (Christian Ullrich) + + + + + + In psql, ensure that libreadline's idea + of the screen size is updated when the terminal window size changes + (Merlin Moncure) + + + + Previously, libreadline did not notice if the window + was resized during query output, leading to strange behavior during + later input of multiline queries. + + + + + + Fix psql's \det command to interpret its + pattern argument the same way as other \d commands with + potentially schema-qualified patterns do (Reece Hart) + + + + + + Avoid possible crash in psql's \c command + when previous connection was via Unix socket and command specifies a + new hostname and same username (Tom Lane) + + + + + + In pg_ctl start -w, test child process status directly + rather than relying on heuristics (Tom Lane, Michael Paquier) + + + + Previously, pg_ctl relied on an assumption that the new + postmaster would always create postmaster.pid within five + seconds. But that can fail on heavily-loaded systems, + causing pg_ctl to report incorrectly that the + postmaster failed to start. + + + + Except on Windows, this change also means that a pg_ctl start + -w done immediately after another such command will now reliably + fail, whereas previously it would report success if done within two + seconds of the first command. + + + + + + In pg_ctl start -w, don't attempt to use a wildcard listen + address to connect to the postmaster (Kondo Yuta) + + + + On Windows, pg_ctl would fail to detect postmaster + startup if listen_addresses is set to 0.0.0.0 + or ::, because it would try to use that value verbatim as + the address to connect to, which doesn't work. Instead assume + that 127.0.0.1 or ::1, respectively, is the + right thing to use. + + + + + + In pg_ctl on Windows, check service status to decide + where to send output, rather than checking if standard output is a + terminal (Michael Paquier) + + + + + + In pg_dump and pg_basebackup, adopt + the GNU convention for handling tar-archive members exceeding 8GB + (Tom Lane) + + + + The POSIX standard for tar file format does not allow + archive member files to exceed 8GB, but most modern implementations + of tar support an extension that fixes that. Adopt + this extension so that pg_dump with + + + + + Fix assorted corner-case bugs in pg_dump's processing + of extension member objects (Tom Lane) + + + + + + Make pg_dump mark a view's triggers as needing to be + processed after its rule, to prevent possible failure during + parallel pg_restore (Tom Lane) + + + + + + Ensure that relation option values are properly quoted + in pg_dump (Kouhei Sutou, Tom Lane) + + + + A reloption value that isn't a simple identifier or number could lead + to dump/reload failures due to syntax errors in CREATE statements + issued by pg_dump. This is not an issue with any + reloption currently supported by core PostgreSQL, but + extensions could allow reloptions that cause the problem. + + + + + + Avoid repeated password prompts during parallel pg_dump + (Zeus Kronion) + + + + + + Fix pg_upgrade's file-copying code to handle errors + properly on Windows (Bruce Momjian) + + + + + + Install guards in pgbench against corner-case overflow + conditions during evaluation of script-specified division or modulo + operators (Fabien Coelho, Michael Paquier) + + + + + + Fix failure to localize messages emitted + by pg_receivexlog and pg_recvlogical + (Ioseph Kim) + + + + + + Avoid dump/reload problems when using both plpython2 + and plpython3 (Tom Lane) + + + + In principle, both versions of PL/Python can be used in + the same database, though not in the same session (because the two + versions of libpython cannot safely be used concurrently). + However, pg_restore and pg_upgrade both + do things that can fall foul of the same-session restriction. Work + around that by changing the timing of the check. + + + + + + Fix PL/Python regression tests to pass with Python 3.5 + (Peter Eisentraut) + + + + + + Fix premature clearing of libpq's input buffer when + socket EOF is seen (Tom Lane) + + + + This mistake caused libpq to sometimes not report the + backend's final error message before reporting server closed the + connection unexpectedly. + + + + + + Prevent certain PL/Java parameters from being set by + non-superusers (Noah Misch) + + + + This change mitigates a PL/Java security bug + (CVE-2016-0766), which was fixed in PL/Java by marking + these parameters as superuser-only. To fix the security hazard for + sites that update PostgreSQL more frequently + than PL/Java, make the core code aware of them also. + + + + + + Improve libpq's handling of out-of-memory situations + (Michael Paquier, Amit Kapila, Heikki Linnakangas) + + + + + + Fix order of arguments + in ecpg-generated typedef statements + (Michael Meskes) + + + + + + Use %g not %f format + in ecpg's PGTYPESnumeric_from_double() + (Tom Lane) + + + + + + Fix ecpg-supplied header files to not contain comments + continued from a preprocessor directive line onto the next line + (Michael Meskes) + + + + Such a comment is rejected by ecpg. It's not yet clear + whether ecpg itself should be changed. + + + + + + Fix hstore_to_json_loose()'s test for whether + an hstore value can be converted to a JSON number (Tom Lane) + + + + Previously this function could be fooled by non-alphanumeric trailing + characters, leading to emitting syntactically-invalid JSON. + + + + + + Ensure that contrib/pgcrypto's crypt() + function can be interrupted by query cancel (Andreas Karlsson) + + + + + + Accept flex versions later than 2.5.x + (Tom Lane, Michael Paquier) + + + + Now that flex 2.6.0 has been released, the version checks in our build + scripts needed to be adjusted. + + + + + + Improve reproducibility of build output by ensuring filenames are given + to the linker in a fixed order (Christoph Berg) + + + + This avoids possible bitwise differences in the produced executable + files from one build to the next. + + + + + + Install our missing script where PGXS builds can find it + (Jim Nasby) + + + + This allows sane behavior in a PGXS build done on a machine where build + tools such as bison are missing. + + + + + + Ensure that dynloader.h is included in the installed + header files in MSVC builds (Bruce Momjian, Michael Paquier) + + + + + + Add variant regression test expected-output file to match behavior of + current libxml2 (Tom Lane) + + + + The fix for libxml2's CVE-2015-7499 causes it not to + output error context reports in some cases where it used to do so. + This seems to be a bug, but we'll probably have to live with it for + some time, so work around it. + + + + + + Update time zone data files to tzdata release 2016a for + DST law changes in Cayman Islands, Metlakatla, and Trans-Baikal + Territory (Zabaykalsky Krai), plus historical corrections for Pakistan. + + + + + + + + Release 9.3.10 diff --git a/doc/src/sgml/release-9.4.sgml b/doc/src/sgml/release-9.4.sgml index 2c4c642..7ba54a9 100644 --- a/doc/src/sgml/release-9.4.sgml +++ b/doc/src/sgml/release-9.4.sgml @@ -491,16 +491,6 @@ Branch: REL9_1_STABLE [b00c79b5b] 2015-10-16 14:43:18 -0400 - - Make %h and %r escapes @@ -558,16 +548,6 @@ Branch: REL9_1_STABLE [d05103b77] 2016-01-04 17:41:33 -0500 - - Avoid leaking a token handle during SSPI authentication @@ -605,16 +585,6 @@ Branch: REL9_1_STABLE [db462a44e] 2015-12-17 16:55:51 -0500 - - Fix psql's \det command to interpret its @@ -699,16 +669,6 @@ Branch: REL9_1_STABLE [87deb55a4] 2015-11-08 17:31:24 -0500 - - In pg_ctl on Windows, check service status to decide @@ -750,16 +710,6 @@ Branch: REL9_1_STABLE [6df62ef43] 2015-11-23 00:32:01 -0500 - - Fix assorted corner-case bugs in pg_dump's processing @@ -767,29 +717,6 @@ Branch: REL9_1_STABLE [5108013db] 2016-01-13 18:55:27 -0500 - - - - - Fix improper quoting of domain constraint names - in pg_dump (Elvis Pranskevichus) - - - - - Make pg_dump mark a view's triggers as needing to be @@ -867,16 +794,6 @@ Branch: REL9_1_STABLE [c36064e43] 2015-11-24 17:18:27 -0500 - - Install guards in pgbench against corner-case overflow @@ -887,19 +804,6 @@ Branch: REL9_1_STABLE [4c8b07d3c] 2016-02-03 09:25:34 -0500 - - - - Suppress useless warning message when pg_receivexlog - connects to a pre-9.4 server (Marco Nenciarini) - - - - - Avoid dump/reload problems when using both plpython2 @@ -933,15 +826,22 @@ Branch: REL9_5_STABLE [5ef26b8de] 2016-01-11 20:06:47 -0500 - In principle, both versions of PL/Python can be used in the same - database, though not in the same session (because the two versions of - libpython cannot safely be used concurrently). + In principle, both versions of PL/Python can be used in + the same database, though not in the same session (because the two + versions of libpython cannot safely be used concurrently). However, pg_restore and pg_upgrade both do things that can fall foul of the same-session restriction. Work around that by changing the timing of the check. + + + Fix PL/Python regression tests to pass with Python 3.5 + (Peter Eisentraut) + + + - - - - - Fix PL/Python regression tests to pass with Python 3.5 - (Peter Eisentraut) - - - Fix premature clearing of libpq's input buffer when @@ -979,6 +863,21 @@ Branch: REL9_5_STABLE [a66c1fcdd] 2016-01-08 11:39:28 -0500 + + + Prevent certain PL/Java parameters from being set by + non-superusers (Noah Misch) + + + + This change mitigates a PL/Java security bug + (CVE-2016-0766), which was fixed in PL/Java by marking + these parameters as superuser-only. To fix the security hazard for + sites that update PostgreSQL more frequently + than PL/Java, make the core code aware of them also. + + + - Fix ecpg-supplied header files to not contain comments @@ -1053,14 +943,6 @@ Branch: REL9_1_STABLE [79782b407] 2016-02-01 13:19:43 +0100 - - Fix hstore_to_json_loose()'s test for whether @@ -1090,16 +972,6 @@ Branch: REL9_1_STABLE [1b6102eb7] 2015-12-27 13:03:19 -0300 - - In contrib/postgres_fdw, fix bugs triggered by use @@ -1135,27 +1007,6 @@ Branch: REL9_2_STABLE [7f94a5c10] 2015-12-10 10:19:31 -0500 - - - - - Fix ill-advised restriction of NAMEDATALEN to be less - than 256 (Robert Haas, Tom Lane) - - - - - Improve reproducibility of build output by ensuring filenames are given @@ -1190,20 +1041,10 @@ Branch: REL9_1_STABLE [2a37a103b] 2015-12-11 16:14:48 -0500 - - Ensure that dynloader.h is included in the installed - header files in MSVC builds (Michael Paquier) + header files in MSVC builds (Bruce Momjian, Michael Paquier) @@ -1231,16 +1072,6 @@ Branch: REL9_1_STABLE [386dcd539] 2015-12-11 19:08:40 -0500 - - Update time zone data files to tzdata release 2016a for diff --git a/doc/src/sgml/release-9.5.sgml b/doc/src/sgml/release-9.5.sgml index f47b7c2..6639be5 100644 --- a/doc/src/sgml/release-9.5.sgml +++ b/doc/src/sgml/release-9.5.sgml @@ -1,6 +1,403 @@ + + Release 9.5.1 + + + Release Date + 2016-02-11 + + + + This release contains a variety of fixes from 9.5.0. + For information about new features in the 9.5 major release, see + . + + + + Migration to Version 9.5.1 + + + A dump/restore is not required for those running 9.5.X. + + + + + Changes + + + + + + + + Fix an oversight that caused hash joins to miss joining to some tuples + of the inner relation in rare cases (Tomas Vondra, Tom Lane) + + + + + + + + Make %h and %r escapes + in log_line_prefix work for messages emitted due + to log_connections (Tom Lane) + + + + Previously, %h/%r started to work just after a + new session had emitted the connection received log message; + now they work for that message too. + + + + + + + + Avoid leaking a token handle during SSPI authentication + (Christian Ullrich) + + + + + + + + Fix psql's \det command to interpret its + pattern argument the same way as other \d commands with + potentially schema-qualified patterns do (Reece Hart) + + + + + + + + In pg_ctl on Windows, check service status to decide + where to send output, rather than checking if standard output is a + terminal (Michael Paquier) + + + + + + + + Fix assorted corner-case bugs in pg_dump's processing + of extension member objects (Tom Lane) + + + + + + + + Fix improper quoting of domain constraint names + in pg_dump (Elvis Pranskevichus) + + + + + + + + Make pg_dump mark a view's triggers as needing to be + processed after its rule, to prevent possible failure during + parallel pg_restore (Tom Lane) + + + + + + + + Install guards in pgbench against corner-case overflow + conditions during evaluation of script-specified division or modulo + operators (Fabien Coelho, Michael Paquier) + + + + + + + + Suppress useless warning message when pg_receivexlog + connects to a pre-9.4 server (Marco Nenciarini) + + + + + + + + Avoid dump/reload problems when using both plpython2 + and plpython3 (Tom Lane) + + + + In principle, both versions of PL/Python can be used in + the same database, though not in the same session (because the two + versions of libpython cannot safely be used concurrently). + However, pg_restore and pg_upgrade both + do things that can fall foul of the same-session restriction. Work + around that by changing the timing of the check. + + + + + + + + Fix PL/Python regression tests to pass with Python 3.5 + (Peter Eisentraut) + + + + + + + + Prevent certain PL/Java parameters from being set by + non-superusers (Noah Misch) + + + + This change mitigates a PL/Java security bug + (CVE-2016-0766), which was fixed in PL/Java by marking + these parameters as superuser-only. To fix the security hazard for + sites that update PostgreSQL more frequently + than PL/Java, make the core code aware of them also. + + + + + + + + Fix ecpg-supplied header files to not contain comments + continued from a preprocessor directive line onto the next line + (Michael Meskes) + + + + Such a comment is rejected by ecpg. It's not yet clear + whether ecpg itself should be changed. + + + + + + + + Fix hstore_to_json_loose()'s test for whether + an hstore value can be converted to a JSON number (Tom Lane) + + + + Previously this function could be fooled by non-alphanumeric trailing + characters, leading to emitting syntactically-invalid JSON. + + + + + + + + In contrib/postgres_fdw, fix bugs triggered by use + of tableoid in data-modifying commands (Etsuro Fujita, + Robert Haas) + + + + + + + + Fix ill-advised restriction of NAMEDATALEN to be less + than 256 (Robert Haas, Tom Lane) + + + + + + + + Improve reproducibility of build output by ensuring filenames are given + to the linker in a fixed order (Christoph Berg) + + + + This avoids possible bitwise differences in the produced executable + files from one build to the next. + + + + + + + + Ensure that dynloader.h is included in the installed + header files in MSVC builds (Bruce Momjian, Michael Paquier) + + + + + + + + Update time zone data files to tzdata release 2016a for + DST law changes in Cayman Islands, Metlakatla, and Trans-Baikal + Territory (Zabaykalsky Krai), plus historical corrections for Pakistan. + + + + + + + + Release 9.5 diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c index 8eea092..4f91cd0 100644 --- a/src/backend/access/transam/parallel.c +++ b/src/backend/access/transam/parallel.c @@ -22,6 +22,7 @@ #include "libpq/pqformat.h" #include "libpq/pqmq.h" #include "miscadmin.h" +#include "optimizer/planmain.h" #include "storage/ipc.h" #include "storage/sinval.h" #include "storage/spin.h" @@ -432,6 +433,9 @@ LaunchParallelWorkers(ParallelContext *pcxt) if (pcxt->nworkers == 0) return; + /* We need to be a lock group leader. */ + BecomeLockGroupLeader(); + /* If we do have workers, we'd better have a DSM segment. */ Assert(pcxt->seg != NULL); @@ -952,6 +956,19 @@ ParallelWorkerMain(Datum main_arg) */ /* + * Join locking group. We must do this before anything that could try + * to acquire a heavyweight lock, because any heavyweight locks acquired + * to this point could block either directly against the parallel group + * leader or against some process which in turn waits for a lock that + * conflicts with the parallel group leader, causing an undetected + * deadlock. (If we can't join the lock group, the leader has gone away, + * so just exit quietly.) + */ + if (!BecomeLockGroupMember(fps->parallel_master_pgproc, + fps->parallel_master_pid)) + return; + + /* * Load libraries that were loaded by original backend. We want to do * this before restoring GUCs, because the libraries might define custom * variables. @@ -1063,7 +1080,8 @@ ParallelExtensionTrampoline(dsm_segment *seg, shm_toc *toc) static void ParallelErrorContext(void *arg) { - errcontext("parallel worker, PID %d", *(int32 *) arg); + if (force_parallel_mode != FORCE_PARALLEL_REGRESS) + errcontext("parallel worker, PID %d", *(int32 *) arg); } /* diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index 26f9114..02b72a3 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -13,13 +13,18 @@ */ #include "postgres.h" +#include "access/htup_details.h" #include "catalog/index.h" +#include "catalog/pg_constraint.h" +#include "commands/constraint.h" #include "commands/trigger.h" #include "executor/executor.h" #include "utils/builtins.h" #include "utils/rel.h" +#include "utils/syscache.h" #include "utils/tqual.h" +static char *tryExtractNotNull_internal(Node *node, Relation rel); /* * unique_key_recheck - trigger function to do a deferred uniqueness check. @@ -193,3 +198,125 @@ unique_key_recheck(PG_FUNCTION_ARGS) return PointerGetDatum(NULL); } + +Constraint * +createCheckNotNullConstraint(Oid nspid, char *constraint_name, + const char *relname, const char *colname) +{ + Constraint *check = makeNode(Constraint); + ColumnRef *colref; + NullTest *nulltest; + + colref = (ColumnRef *) makeNode(ColumnRef); + colref->fields = list_make1(makeString(pstrdup(colname))); + + nulltest = (NullTest *) makeNode(NullTest); + nulltest->argisrow = false; /* FIXME -- may be bogus! */ + nulltest->nulltesttype = IS_NOT_NULL; + nulltest->arg = (Expr *) colref; + + check->contype = CONSTR_CHECK; + check->location = -1; + check->conname = constraint_name ? constraint_name : + ChooseConstraintName(relname, colname, "not_null", nspid, + NIL); + check->raw_expr = (Node *) nulltest; + check->cooked_expr = NULL; + + return check; +} + +/* + * Given a CHECK constraint, examine it and determine whether it is CHECK (col + * IS NOT NULL). If it is, return the column name for which it is. Otherwise + * return NULL. + */ +char * +tryExtractNotNullFromCheckConstr(Constraint *constr) +{ + char *retval; + + Assert(constr->contype == CONSTR_CHECK); + + if (constr->raw_expr != NULL) + { + Assert(constr->cooked_expr == NULL); // as in heap.c + retval = tryExtractNotNull_internal(constr->raw_expr, NULL); + } + else + { + Assert(constr->cooked_expr != NULL); // as in heap.c + retval = tryExtractNotNull_internal(stringToNode(constr->cooked_expr), NULL); + } + + return retval; +} + +/* + * As above, but use a pg_constraint row as input. + * + * tupdesc is pg_constraint's tuple descriptor, and rel is the relation the + * constraint is for. + */ +char * +tryExtractNotNullFromCatalog(HeapTuple constrTup, TupleDesc tupdesc, + Relation rel) +{ + Datum val; + bool isnull; + char *conbin; + Node *node; + + val = SysCacheGetAttr(CONSTROID, constrTup, Anum_pg_constraint_conbin, + &isnull); + if (isnull) + elog(ERROR, "null conbin for constraint %u", + HeapTupleGetOid(constrTup)); + conbin = TextDatumGetCString(val); + node = (Node *) stringToNode(conbin); + + return tryExtractNotNull_internal(node, rel); +} + +/* + * Worker for the above + */ +static char * +tryExtractNotNull_internal(Node *node, Relation rel) +{ + if (IsA(node, NullTest)) + { + NullTest *nulltest = (NullTest *) node; + + if (nulltest->nulltesttype == IS_NOT_NULL) + { + if (IsA(nulltest->arg, ColumnRef)) + { + ColumnRef *colref = (ColumnRef *) nulltest->arg; + + if (list_length(colref->fields) == 1) + return strVal(linitial(colref->fields)); + } + if (IsA(nulltest->arg, Var)) + { + Var *var = (Var *) nulltest->arg; + TupleDesc tupdesc; + + if (!RelationIsValid(rel)) + elog(ERROR, + "no relation given to extract constraint from"); + tupdesc = RelationGetDescr(rel); + return NameStr(tupdesc->attrs[var->varattno - 1]->attname); + } + } + } + + /* + * XXX Need to check a few more possible wordings of NOT NULL: + * + * - foo IS DISTINCT FROM NULL + * - NOT (foo IS NULL) + */ + + return NULL; +} diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index fcb0331..514568e 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -312,6 +312,7 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) create->inhRelations = NIL; create->ofTypename = NULL; create->constraints = NIL; + create->notnullcols = NIL; create->options = into->options; create->oncommit = into->onCommit; create->tablespacename = into->tableSpaceName; diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 25d8ca0..ee13136 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -23,6 +23,7 @@ #include "foreign/fdwapi.h" #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" +#include "optimizer/planmain.h" #include "parser/parsetree.h" #include "rewrite/rewriteHandler.h" #include "tcop/tcopprot.h" @@ -572,6 +573,7 @@ void ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc) { Bitmapset *rels_used = NULL; + PlanState *ps; Assert(queryDesc->plannedstmt != NULL); es->pstmt = queryDesc->plannedstmt; @@ -580,7 +582,17 @@ ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc) es->rtable_names = select_rtable_names_for_explain(es->rtable, rels_used); es->deparse_cxt = deparse_context_for_plan_rtable(es->rtable, es->rtable_names); - ExplainNode(queryDesc->planstate, NIL, NULL, NULL, es); + + /* + * Sometimes we mark a Gather node as "invisible", which means that it's + * not displayed in EXPLAIN output. The purpose of this is to allow + * running regression tests with force_parallel_mode=regress to get the + * same results as running the same tests with force_parallel_mode=off. + */ + ps = queryDesc->planstate; + if (IsA(ps, GatherState) &&((Gather *) ps->plan)->invisible) + ps = outerPlanState(ps); + ExplainNode(ps, NIL, NULL, NULL, es); } /* diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index eeda3b4..38eacf2 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -47,6 +47,7 @@ #include "catalog/toasting.h" #include "commands/cluster.h" #include "commands/comment.h" +#include "commands/constraint.h" #include "commands/defrem.h" #include "commands/event_trigger.h" #include "commands/policy.h" @@ -61,7 +62,6 @@ #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" -#include "nodes/parsenodes.h" #include "optimizer/clauses.h" #include "optimizer/planner.h" #include "parser/parse_clause.h" @@ -270,8 +270,9 @@ struct DropRelationCallbackState #define ATT_FOREIGN_TABLE 0x0020 static void truncate_check_rel(Relation rel); -static List *MergeAttributes(List *schema, List *supers, char relpersistence, - List **supOids, List **supconstr, int *supOidCount); +static List *MergeAttributes(List *schema, List *notnullcols, List *supers, + char relpersistence, List **supOids, List **supconstr, + int *supOidCount); static bool MergeCheckConstraint(List *constraints, char *name, Node *expr); static void MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel); static void MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel); @@ -339,8 +340,9 @@ static void add_column_collation_dependency(Oid relid, int32 attnum, Oid collid) static void ATPrepAddOids(List **wqueue, Relation rel, bool recurse, AlterTableCmd *cmd, LOCKMODE lockmode); static ObjectAddress ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode); -static ObjectAddress ATExecSetNotNull(AlteredTableInfo *tab, Relation rel, - const char *colName, LOCKMODE lockmode); +static ObjectAddress ATExecSetNotNull(List **wqueue, AlteredTableInfo *tab, + Relation rel, char *constrname, const char *colName, + LOCKMODE lockmode); static ObjectAddress ATExecColumnDefault(Relation rel, const char *colName, Node *newDefault, LOCKMODE lockmode); static void ATPrepSetStatistics(Relation rel, const char *colName, @@ -370,6 +372,11 @@ static ObjectAddress ATAddCheckConstraint(List **wqueue, Constraint *constr, bool recurse, bool recursing, bool is_readd, LOCKMODE lockmode); +static ObjectAddress ATAddCheckConstraint_internal(List **wqueue, + AlteredTableInfo *tab, Relation rel, + Constraint *constr, + bool recurse, bool recursing, bool is_readd, + bool check_it, LOCKMODE lockmode); static ObjectAddress ATAddForeignKeyConstraint(AlteredTableInfo *tab, Relation rel, Constraint *fkconstraint, LOCKMODE lockmode); static void ATExecDropConstraint(Relation rel, const char *constrName, @@ -575,7 +582,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, * Look up inheritance ancestors and generate relation schema, including * inherited attributes. */ - schema = MergeAttributes(schema, stmt->inhRelations, + schema = MergeAttributes(schema, stmt->notnullcols, stmt->inhRelations, stmt->relation->relpersistence, &inheritOids, &old_constraints, &parentOidCount); @@ -1356,6 +1363,8 @@ storage_name(char c) * Input arguments: * 'schema' is the column/attribute definition for the table. (It's a list * of ColumnDef's.) It is destructively changed. + * 'notnullcols' is a list of column names that have NOT NULL constraints. + * Some of these columns may already have is_not_null already set. * 'supers' is a list of names (as RangeVar nodes) of parent relations. * 'relpersistence' is a persistence type of the table. * @@ -1408,10 +1417,12 @@ storage_name(char c) *---------- */ static List * -MergeAttributes(List *schema, List *supers, char relpersistence, - List **supOids, List **supconstr, int *supOidCount) +MergeAttributes(List *schema, List *notnullcols, List *supers, + char relpersistence, List **supOids, List **supconstr, + int *supOidCount) { ListCell *entry; + ListCell *cell; List *inhSchema = NIL; List *parentOids = NIL; List *constraints = NIL; @@ -1896,6 +1907,23 @@ MergeAttributes(List *schema, List *supers, char relpersistence, } /* + * If we have NOT NULL constraint declarations, set the is_not_null bits + * in the correspoding ColumnDef elements. + */ + foreach (cell, notnullcols) + { + char *colname = lfirst(cell); + + foreach (entry, schema) + { + ColumnDef *coldef = lfirst(entry); + + if (strcmp(coldef->colname, colname) == 0) + coldef->is_not_null = true; + } + } + + /* * If we found any conflicting parent default values, check to make sure * they were overridden by the child. */ @@ -3479,7 +3507,7 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel, address = ATExecDropNotNull(rel, cmd->name, lockmode); break; case AT_SetNotNull: /* ALTER COLUMN SET NOT NULL */ - address = ATExecSetNotNull(tab, rel, cmd->name, lockmode); + address = ATExecSetNotNull(wqueue, tab, rel, NULL, cmd->name, lockmode); break; case AT_SetStatistics: /* ALTER COLUMN SET STATISTICS */ address = ATExecSetStatistics(rel, cmd->name, cmd->def, lockmode); @@ -5281,13 +5309,14 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode) * NULL, InvalidObjectAddress is returned. */ static ObjectAddress -ATExecSetNotNull(AlteredTableInfo *tab, Relation rel, - const char *colName, LOCKMODE lockmode) +ATExecSetNotNull(List **wqueue, AlteredTableInfo *tab, Relation rel, + char *constrname, const char *colName, LOCKMODE lockmode) { HeapTuple tuple; AttrNumber attnum; Relation attr_rel; ObjectAddress address; + Constraint *newconstr; /* * lookup the attribute @@ -5332,6 +5361,20 @@ ATExecSetNotNull(AlteredTableInfo *tab, Relation rel, else address = InvalidObjectAddress; + /* + * We also need to add a new pg_constraint row. Use + * ATAddCheckConstraint_internal for that, but let it know that it + * doesn't need to test the constraint; we already informed it above, + * if necessary. + */ + newconstr = createCheckNotNullConstraint(rel->rd_rel->relnamespace, + constrname, + NameStr(rel->rd_rel->relname), + colName); + + ATAddCheckConstraint_internal(wqueue, tab, rel, newconstr, + true, false, false, true, lockmode); + InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), attnum); @@ -6108,6 +6151,40 @@ ATAddCheckConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, Constraint *constr, bool recurse, bool recursing, bool is_readd, LOCKMODE lockmode) { + char *colname; + ObjectAddress address; + + /* + * If the constraint we're adding is CHECK (col IS NOT NULL), then we route + * it through ATExecSetNotNull instead of working directly with it; that + * function is responsible for getting back to us to recurse, etc. + * + * The reason for this is to get the attnotnull bit set for the column, and + * also to avoid having a second NOT NULL constraint for a column that + * might already have one. (XXX is the latter actually a desirable + * property? Consider inherited tables here.) + */ + Assert(constr->contype == CONSTR_CHECK); + + colname = tryExtractNotNullFromCheckConstr(constr); + if (colname != NULL) + address = ATExecSetNotNull(wqueue, tab, rel, constr->conname, + colname, lockmode); + else + /* Not a single-column NOT NULL constraint -- do the regular dance */ + address = ATAddCheckConstraint_internal(wqueue, tab, rel, constr, + recurse, recursing, is_readd, + true, lockmode); + + return address; +} + +static ObjectAddress +ATAddCheckConstraint_internal(List **wqueue, AlteredTableInfo *tab, + Relation rel, Constraint *constr, bool recurse, + bool recursing, bool is_readd, + bool check_it, LOCKMODE lockmode) +{ List *newcons; ListCell *lcon; List *children; @@ -6214,8 +6291,9 @@ ATAddCheckConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, childtab = ATGetQueueEntry(wqueue, childrel); /* Recurse to child */ - ATAddCheckConstraint(wqueue, childtab, childrel, - constr, recurse, true, is_readd, lockmode); + ATAddCheckConstraint_internal(wqueue, childtab, childrel, + constr, recurse, true, is_readd, check_it, + lockmode); heap_close(childrel, NoLock); } @@ -7681,6 +7759,7 @@ ATExecDropConstraint(Relation rel, const char *constrName, while (HeapTupleIsValid(tuple = systable_getnext(scan))) { ObjectAddress conobj; + char *colName; con = (Form_pg_constraint) GETSTRUCT(tuple); @@ -7694,6 +7773,22 @@ ATExecDropConstraint(Relation rel, const char *constrName, errmsg("cannot drop inherited constraint \"%s\" of relation \"%s\"", constrName, RelationGetRelationName(rel)))); + if (con->contype == CONSTRAINT_CHECK) + { + /* + * If it's a CHECK constraint, verify whether it is NOT NULL. + * If it is, then we may need to unset the attnotnull bit as well. + */ + colName = tryExtractNotNullFromCatalog(tuple, + RelationGetDescr(conrel), + rel); + if (colName != NULL) + { + /* do something! */ + elog(NOTICE, "colname is %s", colName); + } + } + is_no_inherit_constraint = con->connoinherit; /* diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 47160e4..9ed09a7 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -1575,8 +1575,19 @@ ExecHashRemoveNextSkewBucket(HashJoinTable hashtable) if (batchno == hashtable->curbatch) { /* Move the tuple to the main hash table */ - hashTuple->next = hashtable->buckets[bucketno]; - hashtable->buckets[bucketno] = hashTuple; + HashJoinTuple copyTuple; + + /* + * We must copy the tuple into the dense storage, else it will not + * be found by, eg, ExecHashIncreaseNumBatches. + */ + copyTuple = (HashJoinTuple) dense_alloc(hashtable, tupleSize); + memcpy(copyTuple, hashTuple, tupleSize); + pfree(hashTuple); + + copyTuple->next = hashtable->buckets[bucketno]; + hashtable->buckets[bucketno] = copyTuple; + /* We have reduced skew space, but overall space doesn't change */ hashtable->spaceUsedSkew -= tupleSize; } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index a8b79fa..e54d174 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -334,6 +334,7 @@ _copyGather(const Gather *from) */ COPY_SCALAR_FIELD(num_workers); COPY_SCALAR_FIELD(single_copy); + COPY_SCALAR_FIELD(invisible); return newnode; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index d59b954..3e1c3e6 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -443,6 +443,7 @@ _outGather(StringInfo str, const Gather *node) WRITE_INT_FIELD(num_workers); WRITE_BOOL_FIELD(single_copy); + WRITE_BOOL_FIELD(invisible); } static void @@ -1824,6 +1825,7 @@ _outPlannerGlobal(StringInfo str, const PlannerGlobal *node) WRITE_BOOL_FIELD(hasRowSecurity); WRITE_BOOL_FIELD(parallelModeOK); WRITE_BOOL_FIELD(parallelModeNeeded); + WRITE_BOOL_FIELD(wholePlanParallelSafe); WRITE_BOOL_FIELD(hasForeignJoin); } diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 6c46151..e4d41ee 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -2053,6 +2053,7 @@ _readGather(void) READ_INT_FIELD(num_workers); READ_BOOL_FIELD(single_copy); + READ_BOOL_FIELD(invisible); READ_DONE(); } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 54ff7f6..6e0db08 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -212,6 +212,10 @@ create_plan(PlannerInfo *root, Path *best_path) /* Recursively process the path tree */ plan = create_plan_recurse(root, best_path); + /* Update parallel safety information if needed. */ + if (!best_path->parallel_safe) + root->glob->wholePlanParallelSafe = false; + /* Check we successfully assigned all NestLoopParams to plan nodes */ if (root->curOuterParams != NIL) elog(ERROR, "failed to assign all NestLoopParams to plan nodes"); @@ -4829,6 +4833,7 @@ make_gather(List *qptlist, plan->righttree = NULL; node->num_workers = nworkers; node->single_copy = single_copy; + node->invisible = false; return node; } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a09b4b5..a3cc274 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -48,10 +48,12 @@ #include "storage/dsm_impl.h" #include "utils/rel.h" #include "utils/selfuncs.h" +#include "utils/syscache.h" -/* GUC parameter */ +/* GUC parameters */ double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION; +int force_parallel_mode = FORCE_PARALLEL_OFF; /* Hook for plugins to get control in planner() */ planner_hook_type planner_hook = NULL; @@ -230,25 +232,31 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) !has_parallel_hazard((Node *) parse, true); /* - * glob->parallelModeOK should tell us whether it's necessary to impose - * the parallel mode restrictions, but we don't actually want to impose - * them unless we choose a parallel plan, so that people who mislabel - * their functions but don't use parallelism anyway aren't harmed. - * However, it's useful for testing purposes to be able to force the - * restrictions to be imposed whenever a parallel plan is actually chosen - * or not. + * glob->parallelModeNeeded should tell us whether it's necessary to + * impose the parallel mode restrictions, but we don't actually want to + * impose them unless we choose a parallel plan, so that people who + * mislabel their functions but don't use parallelism anyway aren't + * harmed. But when force_parallel_mode is set, we enable the restrictions + * whenever possible for testing purposes. * - * (It's been suggested that we should always impose these restrictions - * whenever glob->parallelModeOK is true, so that it's easier to notice - * incorrectly-labeled functions sooner. That might be the right thing to - * do, but for now I've taken this approach. We could also control this - * with a GUC.) + * glob->wholePlanParallelSafe should tell us whether it's OK to stick a + * Gather node on top of the entire plan. However, it only needs to be + * accurate when force_parallel_mode is 'on' or 'regress', so we don't + * bother doing the work otherwise. The value we set here is just a + * preliminary guess; it may get changed from true to false later, but + * not visca versa. */ -#ifdef FORCE_PARALLEL_MODE - glob->parallelModeNeeded = glob->parallelModeOK; -#else - glob->parallelModeNeeded = false; -#endif + if (force_parallel_mode == FORCE_PARALLEL_OFF || !glob->parallelModeOK) + { + glob->parallelModeNeeded = false; + glob->wholePlanParallelSafe = false; /* either false or don't care */ + } + else + { + glob->parallelModeNeeded = true; + glob->wholePlanParallelSafe = + !has_parallel_hazard((Node *) parse, false); + } /* Determine what fraction of the plan is likely to be scanned */ if (cursorOptions & CURSOR_OPT_FAST_PLAN) @@ -293,6 +301,35 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) } /* + * At present, we don't copy subplans to workers. The presence of a + * subplan in one part of the plan doesn't preclude the use of parallelism + * in some other part of the plan, but it does preclude the possibility of + * regarding the entire plan parallel-safe. + */ + if (glob->subplans != NULL) + glob->wholePlanParallelSafe = false; + + /* + * Optionally add a Gather node for testing purposes, provided this is + * actually a safe thing to do. + */ + if (glob->wholePlanParallelSafe && + force_parallel_mode != FORCE_PARALLEL_OFF) + { + Gather *gather = makeNode(Gather); + + gather->plan.targetlist = top_plan->targetlist; + gather->plan.qual = NIL; + gather->plan.lefttree = top_plan; + gather->plan.righttree = NULL; + gather->num_workers = 1; + gather->single_copy = true; + gather->invisible = (force_parallel_mode == FORCE_PARALLEL_REGRESS); + root->glob->parallelModeNeeded = true; + top_plan = &gather->plan; + } + + /* * If any Params were generated, run through the plan tree and compute * each plan node's extParam/allParam sets. Ideally we'd merge this into * set_plan_references' tree traversal, but for now it has to be separate diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index a65b2977..97300ae 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -40,6 +40,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_type.h" #include "commands/comment.h" +#include "commands/constraint.h" #include "commands/defrem.h" #include "commands/tablecmds.h" #include "commands/tablespace.h" @@ -80,6 +81,7 @@ typedef struct List *ckconstraints; /* CHECK constraints */ List *fkconstraints; /* FOREIGN KEY constraints */ List *ixconstraints; /* index-creating constraints */ + List *notnulls; /* list of column names declared NOT NULL */ List *inh_indexes; /* cloned indexes from INCLUDING INDEXES */ List *blist; /* "before list" of things to do before * creating the table */ @@ -105,6 +107,8 @@ typedef struct static void transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column); +static Constraint *transformNotNullConstraint(CreateStmtContext *cxt, + Constraint *constraint, ColumnDef *column); static void transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint); static void transformTableLikeClause(CreateStmtContext *cxt, @@ -224,6 +228,7 @@ transformCreateStmt(CreateStmt *stmt, const char *queryString) cxt.ckconstraints = NIL; cxt.fkconstraints = NIL; cxt.ixconstraints = NIL; + cxt.notnulls = NIL; cxt.inh_indexes = NIL; cxt.blist = NIL; cxt.alist = NIL; @@ -333,6 +338,7 @@ transformCreateStmt(CreateStmt *stmt, const char *queryString) */ stmt->tableElts = cxt.columns; stmt->constraints = cxt.ckconstraints; + stmt->notnullcols = cxt.notnulls; result = lappend(cxt.blist, stmt); result = list_concat(result, cxt.alist); @@ -528,6 +534,9 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column) foreach(clist, column->constraints) { + Constraint *newckconstr; + char *colname; + constraint = lfirst(clist); Assert(IsA(constraint, Constraint)); @@ -546,6 +555,11 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column) break; case CONSTR_NOTNULL: + /* + * For NOT NULL declarations, we need to mark the column as + * not nullable; and furthermore we need to create a new + * CHECK constraint for this. + */ if (saw_nullable && !column->is_not_null) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -554,6 +568,9 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column) parser_errposition(cxt->pstate, constraint->location))); column->is_not_null = TRUE; + newckconstr = transformNotNullConstraint(cxt, constraint, + column); + cxt->ckconstraints = lappend(cxt->ckconstraints, newckconstr); saw_nullable = true; break; @@ -572,6 +589,21 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column) case CONSTR_CHECK: cxt->ckconstraints = lappend(cxt->ckconstraints, constraint); + /* + * If there is a CHECK (foo IS NOT NULL) constraint + * declaration, we check the column name used in the + * constraint. If it's the same name as the column being + * defined, simply set the is_not_null flag in the column + * definition; otherwise remember the column name for later. + */ + colname = tryExtractNotNullFromCheckConstr(constraint); + if (colname != NULL) + { + if (strcmp(colname, column->colname) == 0) + column->is_not_null = true; + else + cxt->notnulls = lappend(cxt->notnulls, colname); + } break; case CONSTR_PRIMARY: @@ -663,6 +695,8 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column) static void transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint) { + char *colname; + switch (constraint->contype) { case CONSTR_PRIMARY: @@ -697,6 +731,9 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint) case CONSTR_CHECK: cxt->ckconstraints = lappend(cxt->ckconstraints, constraint); + colname = tryExtractNotNullFromCheckConstr(constraint); + if (colname != NULL) + cxt->notnulls = lappend(cxt->notnulls, colname); break; case CONSTR_FOREIGN: @@ -728,6 +765,30 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint) } /* + * Given a NOT NULL column declaration, transform it into a new Constraint node + * representing the equivalent CHECK (col) IS NOT NULL. + */ +static Constraint * +transformNotNullConstraint(CreateStmtContext *cxt, Constraint *constraint, + ColumnDef *column) +{ + Constraint *check; + Oid nspid; + + if (cxt->rel) + nspid = RelationGetNamespace(cxt->rel); + else + nspid = RangeVarGetCreationNamespace(cxt->relation); + + check = createCheckNotNullConstraint(nspid, + NULL, + cxt->relation->relname, + column->colname); + + return check; +} + +/* * transformTableLikeClause * * Change the LIKE portion of a CREATE TABLE statement into @@ -2510,6 +2571,7 @@ transformAlterTableStmt(Oid relid, AlterTableStmt *stmt, cxt.ckconstraints = NIL; cxt.fkconstraints = NIL; cxt.ixconstraints = NIL; + cxt.notnulls = NIL; cxt.inh_indexes = NIL; cxt.blist = NIL; cxt.alist = NIL; diff --git a/src/backend/storage/lmgr/README b/src/backend/storage/lmgr/README index 8898e25..cb9c7d6 100644 --- a/src/backend/storage/lmgr/README +++ b/src/backend/storage/lmgr/README @@ -586,6 +586,69 @@ The caller can then send a cancellation signal. This implements the principle that autovacuum has a low locking priority (eg it must not block DDL on the table). +Group Locking +------------- + +As if all of that weren't already complicated enough, PostgreSQL now supports +parallelism (see src/backend/access/transam/README.parallel), which means that +we might need to resolve deadlocks that occur between gangs of related processes +rather than individual processes. This doesn't change the basic deadlock +detection algorithm very much, but it makes the bookkeeping more complicated. + +We choose to regard locks held by processes in the same parallel group as +non-conflicting. This means that two processes in a parallel group can hold +a self-exclusive lock on the same relation at the same time, or one process +can acquire an AccessShareLock while the other already holds AccessExclusiveLock. +This might seem dangerous and could be in some cases (more on that below), but +if we didn't do this then parallel query would be extremely prone to +self-deadlock. For example, a parallel query against a relation on which the +leader had already AccessExclusiveLock would hang, because the workers would +try to lock the same relation and be blocked by the leader; yet the leader can't +finish until it receives completion indications from all workers. An undetected +deadlock results. This is far from the only scenario where such a problem +happens. The same thing will occur if the leader holds only AccessShareLock, +the worker seeks AccessShareLock, but between the time the leader attempts to +acquire the lock and the time the worker attempts to acquire it, some other +process queues up waiting for an AccessExclusiveLock. In this case, too, an +indefinite hang results. + +It might seem that we could predict which locks the workers will attempt to +acquire and ensure before going parallel that those locks would be acquired +successfully. But this is very difficult to make work in a general way. For +example, a parallel worker's portion of the query plan could involve an +SQL-callable function which generates a query dynamically, and that query +might happen to hit a table on which the leader happens to hold +AccessExcusiveLock. By imposing enough restrictions on what workers can do, +we could eventually create a situation where their behavior can be adequately +restricted, but these restrictions would be fairly onerous, and even then, the +system required to decide whether the workers will succeed at acquiring the +necessary locks would be complex and possibly buggy. + +So, instead, we take the approach of deciding that locks within a lock group +do not conflict. This eliminates the possibility of an undetected deadlock, +but also opens up some problem cases: if the leader and worker try to do some +operation at the same time which would ordinarily be prevented by the heavyweight +lock mechanism, undefined behavior might result. In practice, the dangers are +modest. The leader and worker share the same transaction, snapshot, and combo +CID hash, and neither can perform any DDL or, indeed, write any data at all. +Thus, for either to read a table locked exclusively by the other is safe enough. +Problems would occur if the leader initiated parallelism from a point in the +code at which it had some backend-private state that made table access from +another process unsafe, for example after calling SetReindexProcessing and +before calling ResetReindexProcessing, catastrophe could ensue, because the +worker won't have that state. Similarly, problems could occur with certain +kinds of non-relation locks, such as relation extension locks. It's no safer +for two related processes to extend the same relation at the time than for +unrelated processes to do the same. However, since parallel mode is strictly +read-only at present, neither this nor most of the similar cases can arise at +present. To allow parallel writes, we'll either need to (1) further enhance +the deadlock detector to handle those types of locks in a different way than +other types; or (2) have parallel workers use some other mutual exclusion +method for such cases; or (3) revise those cases so that they no longer use +heavyweight locking in the first place (which is not a crazy idea, given that +such lock acquisitions are not expected to deadlock and that heavyweight lock +acquisition is fairly slow anyway). + User Locks (Advisory Locks) --------------------------- diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c index a68aaf6..69f678b 100644 --- a/src/backend/storage/lmgr/deadlock.c +++ b/src/backend/storage/lmgr/deadlock.c @@ -38,6 +38,7 @@ typedef struct { PGPROC *waiter; /* the waiting process */ PGPROC *blocker; /* the process it is waiting for */ + LOCK *lock; /* the lock it is waiting for */ int pred; /* workspace for TopoSort */ int link; /* workspace for TopoSort */ } EDGE; @@ -72,6 +73,9 @@ static bool FindLockCycle(PGPROC *checkProc, EDGE *softEdges, int *nSoftEdges); static bool FindLockCycleRecurse(PGPROC *checkProc, int depth, EDGE *softEdges, int *nSoftEdges); +static bool FindLockCycleRecurseMember(PGPROC *checkProc, + PGPROC *checkProcLeader, + int depth, EDGE *softEdges, int *nSoftEdges); static bool ExpandConstraints(EDGE *constraints, int nConstraints); static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints, PGPROC **ordering); @@ -449,18 +453,15 @@ FindLockCycleRecurse(PGPROC *checkProc, EDGE *softEdges, /* output argument */ int *nSoftEdges) /* output argument */ { - PGPROC *proc; - PGXACT *pgxact; - LOCK *lock; - PROCLOCK *proclock; - SHM_QUEUE *procLocks; - LockMethod lockMethodTable; - PROC_QUEUE *waitQueue; - int queue_size; - int conflictMask; int i; - int numLockModes, - lm; + dlist_iter iter; + + /* + * If this process is a lock group member, check the leader instead. (Note + * that we might be the leader, in which case this is a no-op.) + */ + if (checkProc->lockGroupLeader != NULL) + checkProc = checkProc->lockGroupLeader; /* * Have we already seen this proc? @@ -494,13 +495,57 @@ FindLockCycleRecurse(PGPROC *checkProc, visitedProcs[nVisitedProcs++] = checkProc; /* - * If the proc is not waiting, we have no outgoing waits-for edges. + * If the process is waiting, there is an outgoing waits-for edge to each + * process that blocks it. */ - if (checkProc->links.next == NULL) - return false; - lock = checkProc->waitLock; - if (lock == NULL) - return false; + if (checkProc->links.next != NULL && checkProc->waitLock != NULL && + FindLockCycleRecurseMember(checkProc, checkProc, depth, softEdges, + nSoftEdges)) + return true; + + /* + * If the process is not waiting, there could still be outgoing waits-for + * edges if it is part of a lock group, because other members of the lock + * group might be waiting even though this process is not. (Given lock + * groups {A1, A2} and {B1, B2}, if A1 waits for B1 and B2 waits for A2, + * that is a deadlock even neither of B1 and A2 are waiting for anything.) + */ + dlist_foreach(iter, &checkProc->lockGroupMembers) + { + PGPROC *memberProc; + + memberProc = dlist_container(PGPROC, lockGroupLink, iter.cur); + + if (memberProc->links.next != NULL && memberProc->waitLock != NULL && + memberProc != checkProc && + FindLockCycleRecurseMember(memberProc, checkProc, depth, softEdges, + nSoftEdges)) + return true; + } + + return false; +} + +static bool +FindLockCycleRecurseMember(PGPROC *checkProc, + PGPROC *checkProcLeader, + int depth, + EDGE *softEdges, /* output argument */ + int *nSoftEdges) /* output argument */ +{ + PGPROC *proc; + LOCK *lock = checkProc->waitLock; + PGXACT *pgxact; + PROCLOCK *proclock; + SHM_QUEUE *procLocks; + LockMethod lockMethodTable; + PROC_QUEUE *waitQueue; + int queue_size; + int conflictMask; + int i; + int numLockModes, + lm; + lockMethodTable = GetLocksMethodTable(lock); numLockModes = lockMethodTable->numLockModes; conflictMask = lockMethodTable->conflictTab[checkProc->waitLockMode]; @@ -516,11 +561,14 @@ FindLockCycleRecurse(PGPROC *checkProc, while (proclock) { + PGPROC *leader; + proc = proclock->tag.myProc; pgxact = &ProcGlobal->allPgXact[proc->pgprocno]; + leader = proc->lockGroupLeader == NULL ? proc : proc->lockGroupLeader; - /* A proc never blocks itself */ - if (proc != checkProc) + /* A proc never blocks itself or any other lock group member */ + if (leader != checkProcLeader) { for (lm = 1; lm <= numLockModes; lm++) { @@ -601,10 +649,20 @@ FindLockCycleRecurse(PGPROC *checkProc, for (i = 0; i < queue_size; i++) { - proc = procs[i]; + PGPROC *leader; - /* Done when we reach the target proc */ - if (proc == checkProc) + proc = procs[i]; + leader = proc->lockGroupLeader == NULL ? proc : + proc->lockGroupLeader; + + /* + * TopoSort will always return an ordering with group members + * adjacent to each other in the wait queue (see comments + * therein). So, as soon as we reach a process in the same lock + * group as checkProc, we know we've found all the conflicts that + * precede any member of the lock group lead by checkProcLeader. + */ + if (leader == checkProcLeader) break; /* Is there a conflict with this guy's request? */ @@ -625,8 +683,9 @@ FindLockCycleRecurse(PGPROC *checkProc, * Add this edge to the list of soft edges in the cycle */ Assert(*nSoftEdges < MaxBackends); - softEdges[*nSoftEdges].waiter = checkProc; - softEdges[*nSoftEdges].blocker = proc; + softEdges[*nSoftEdges].waiter = checkProcLeader; + softEdges[*nSoftEdges].blocker = leader; + softEdges[*nSoftEdges].lock = lock; (*nSoftEdges)++; return true; } @@ -635,20 +694,52 @@ FindLockCycleRecurse(PGPROC *checkProc, } else { + PGPROC *lastGroupMember = NULL; + /* Use the true lock wait queue order */ waitQueue = &(lock->waitProcs); + + /* + * Find the last member of the lock group that is present in the wait + * queue. Anything after this is not a soft lock conflict. If group + * locking is not in use, then we know immediately which process we're + * looking for, but otherwise we've got to search the wait queue to + * find the last process actually present. + */ + if (checkProc->lockGroupLeader == NULL) + lastGroupMember = checkProc; + else + { + proc = (PGPROC *) waitQueue->links.next; + queue_size = waitQueue->size; + while (queue_size-- > 0) + { + if (proc->lockGroupLeader == checkProcLeader) + lastGroupMember = proc; + proc = (PGPROC *) proc->links.next; + } + Assert(lastGroupMember != NULL); + } + + /* + * OK, now rescan (or scan) the queue to identify the soft conflicts. + */ queue_size = waitQueue->size; - proc = (PGPROC *) waitQueue->links.next; - while (queue_size-- > 0) { + PGPROC *leader; + + leader = proc->lockGroupLeader == NULL ? proc : + proc->lockGroupLeader; + /* Done when we reach the target proc */ - if (proc == checkProc) + if (proc == lastGroupMember) break; /* Is there a conflict with this guy's request? */ - if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0) + if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0 && + leader != checkProcLeader) { /* This proc soft-blocks checkProc */ if (FindLockCycleRecurse(proc, depth + 1, @@ -665,8 +756,9 @@ FindLockCycleRecurse(PGPROC *checkProc, * Add this edge to the list of soft edges in the cycle */ Assert(*nSoftEdges < MaxBackends); - softEdges[*nSoftEdges].waiter = checkProc; - softEdges[*nSoftEdges].blocker = proc; + softEdges[*nSoftEdges].waiter = checkProcLeader; + softEdges[*nSoftEdges].blocker = leader; + softEdges[*nSoftEdges].lock = lock; (*nSoftEdges)++; return true; } @@ -711,8 +803,7 @@ ExpandConstraints(EDGE *constraints, */ for (i = nConstraints; --i >= 0;) { - PGPROC *proc = constraints[i].waiter; - LOCK *lock = proc->waitLock; + LOCK *lock = constraints[i].lock; /* Did we already make a list for this lock? */ for (j = nWaitOrders; --j >= 0;) @@ -778,7 +869,9 @@ TopoSort(LOCK *lock, PGPROC *proc; int i, j, + jj, k, + kk, last; /* First, fill topoProcs[] array with the procs in their current order */ @@ -798,41 +891,95 @@ TopoSort(LOCK *lock, * stores its list link in constraints[i].link (note any constraint will * be in just one list). The array index for the before-proc of the i'th * constraint is remembered in constraints[i].pred. + * + * Note that it's not necessarily the case that every constraint affects + * this particular wait queue. Prior to group locking, a process could be + * waiting for at most one lock. But a lock group can be waiting for + * zero, one, or multiple locks. Since topoProcs[] is an array of the + * processes actually waiting, while constraints[] is an array of group + * leaders, we've got to scan through topoProcs[] for each constraint, + * checking whether both a waiter and a blocker for that group are + * present. If so, the constraint is relevant to this wait queue; if not, + * it isn't. */ MemSet(beforeConstraints, 0, queue_size * sizeof(int)); MemSet(afterConstraints, 0, queue_size * sizeof(int)); for (i = 0; i < nConstraints; i++) { + /* + * Find a representative process that is on the lock queue and part of + * the waiting lock group. This may or may not be the leader, which + * may or may not be waiting at all. If there are any other processes + * in the same lock group on the queue, set their number of + * beforeConstraints to -1 to indicate that they should be emitted + * with their groupmates rather than considered separately. + */ proc = constraints[i].waiter; - /* Ignore constraint if not for this lock */ - if (proc->waitLock != lock) - continue; - /* Find the waiter proc in the array */ + Assert(proc != NULL); + jj = -1; for (j = queue_size; --j >= 0;) { - if (topoProcs[j] == proc) + PGPROC *waiter = topoProcs[j]; + + if (waiter == proc || waiter->lockGroupLeader == proc) + { + Assert(waiter->waitLock == lock); + if (jj == -1) + jj = j; + else + { + Assert(beforeConstraints[j] <= 0); + beforeConstraints[j] = -1; + } break; + } } - Assert(j >= 0); /* should have found a match */ - /* Find the blocker proc in the array */ + + /* If no matching waiter, constraint is not relevant to this lock. */ + if (jj < 0) + continue; + + /* + * Similarly, find a representative process that is on the lock queue + * and waiting for the blocking lock group. Again, this could be the + * leader but does not need to be. + */ proc = constraints[i].blocker; + Assert(proc != NULL); + kk = -1; for (k = queue_size; --k >= 0;) { - if (topoProcs[k] == proc) - break; + PGPROC *blocker = topoProcs[k]; + + if (blocker == proc || blocker->lockGroupLeader == proc) + { + Assert(blocker->waitLock == lock); + if (kk == -1) + kk = k; + else + { + Assert(beforeConstraints[k] <= 0); + beforeConstraints[k] = -1; + } + } } - Assert(k >= 0); /* should have found a match */ - beforeConstraints[j]++; /* waiter must come before */ + + /* If no matching blocker, constraint is not relevant to this lock. */ + if (kk < 0) + continue; + + beforeConstraints[jj]++; /* waiter must come before */ /* add this constraint to list of after-constraints for blocker */ - constraints[i].pred = j; - constraints[i].link = afterConstraints[k]; - afterConstraints[k] = i + 1; + constraints[i].pred = jj; + constraints[i].link = afterConstraints[kk]; + afterConstraints[kk] = i + 1; } + /*-------------------- * Now scan the topoProcs array backwards. At each step, output the - * last proc that has no remaining before-constraints, and decrease - * the beforeConstraints count of each of the procs it was constrained - * against. + * last proc that has no remaining before-constraints plus any other + * members of the same lock group; then decrease the beforeConstraints + * count of each of the procs it was constrained against. * i = index of ordering[] entry we want to output this time * j = search index for topoProcs[] * k = temp for scanning constraint list for proc j @@ -840,8 +987,11 @@ TopoSort(LOCK *lock, *-------------------- */ last = queue_size - 1; - for (i = queue_size; --i >= 0;) + for (i = queue_size - 1; i >= 0;) { + int c; + int nmatches = 0; + /* Find next candidate to output */ while (topoProcs[last] == NULL) last--; @@ -850,12 +1000,37 @@ TopoSort(LOCK *lock, if (topoProcs[j] != NULL && beforeConstraints[j] == 0) break; } + /* If no available candidate, topological sort fails */ if (j < 0) return false; - /* Output candidate, and mark it done by zeroing topoProcs[] entry */ - ordering[i] = topoProcs[j]; - topoProcs[j] = NULL; + + /* + * Output everything in the lock group. There's no point in outputing + * an ordering where members of the same lock group are not + * consecutive on the wait queue: if some other waiter is between two + * requests that belong to the same group, then either it conflicts + * with both of them and is certainly not a solution; or it conflicts + * with at most one of them and is thus isomorphic to an ordering + * where the group members are consecutive. + */ + proc = topoProcs[j]; + if (proc->lockGroupLeader != NULL) + proc = proc->lockGroupLeader; + Assert(proc != NULL); + for (c = 0; c <= last; ++c) + { + if (topoProcs[c] == proc || (topoProcs[c] != NULL && + topoProcs[c]->lockGroupLeader == proc)) + { + ordering[i - nmatches] = topoProcs[c]; + topoProcs[c] = NULL; + ++nmatches; + } + } + Assert(nmatches > 0); + i -= nmatches; + /* Update beforeConstraints counts of its predecessors */ for (k = afterConstraints[j]; k > 0; k = constraints[k - 1].link) beforeConstraints[constraints[k - 1].pred]--; diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 269fe14..e3e9599 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -35,6 +35,7 @@ #include "access/transam.h" #include "access/twophase.h" #include "access/twophase_rmgr.h" +#include "access/xact.h" #include "access/xlog.h" #include "miscadmin.h" #include "pg_trace.h" @@ -1136,6 +1137,18 @@ SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc, { uint32 partition = LockHashPartition(hashcode); + /* + * It might seem unsafe to access proclock->groupLeader without a lock, + * but it's not really. Either we are initializing a proclock on our + * own behalf, in which case our group leader isn't changing because + * the group leader for a process can only ever be changed by the + * process itself; or else we are transferring a fast-path lock to the + * main lock table, in which case that process can't change it's lock + * group leader without first releasing all of its locks (and in + * particular the one we are currently transferring). + */ + proclock->groupLeader = proc->lockGroupLeader != NULL ? + proc->lockGroupLeader : proc; proclock->holdMask = 0; proclock->releaseMask = 0; /* Add proclock to appropriate lists */ @@ -1255,9 +1268,10 @@ RemoveLocalLock(LOCALLOCK *locallock) * NOTES: * Here's what makes this complicated: one process's locks don't * conflict with one another, no matter what purpose they are held for - * (eg, session and transaction locks do not conflict). - * So, we must subtract off our own locks when determining whether the - * requested new lock conflicts with those already held. + * (eg, session and transaction locks do not conflict). Nor do the locks + * of one process in a lock group conflict with those of another process in + * the same group. So, we must subtract off these locks when determining + * whether the requested new lock conflicts with those already held. */ int LockCheckConflicts(LockMethod lockMethodTable, @@ -1267,8 +1281,12 @@ LockCheckConflicts(LockMethod lockMethodTable, { int numLockModes = lockMethodTable->numLockModes; LOCKMASK myLocks; - LOCKMASK otherLocks; + int conflictMask = lockMethodTable->conflictTab[lockmode]; + int conflictsRemaining[MAX_LOCKMODES]; + int totalConflictsRemaining = 0; int i; + SHM_QUEUE *procLocks; + PROCLOCK *otherproclock; /* * first check for global conflicts: If no locks conflict with my request, @@ -1279,40 +1297,91 @@ LockCheckConflicts(LockMethod lockMethodTable, * type of lock that conflicts with request. Bitwise compare tells if * there is a conflict. */ - if (!(lockMethodTable->conflictTab[lockmode] & lock->grantMask)) + if (!(conflictMask & lock->grantMask)) { PROCLOCK_PRINT("LockCheckConflicts: no conflict", proclock); return STATUS_OK; } /* - * Rats. Something conflicts. But it could still be my own lock. We have - * to construct a conflict mask that does not reflect our own locks, but - * only lock types held by other processes. + * Rats. Something conflicts. But it could still be my own lock, or + * a lock held by another member of my locking group. First, figure out + * how many conflicts remain after subtracting out any locks I hold + * myself. */ myLocks = proclock->holdMask; - otherLocks = 0; for (i = 1; i <= numLockModes; i++) { - int myHolding = (myLocks & LOCKBIT_ON(i)) ? 1 : 0; - - if (lock->granted[i] > myHolding) - otherLocks |= LOCKBIT_ON(i); + if ((conflictMask & LOCKBIT_ON(i)) == 0) + { + conflictsRemaining[i] = 0; + continue; + } + conflictsRemaining[i] = lock->granted[i]; + if (myLocks & LOCKBIT_ON(i)) + --conflictsRemaining[i]; + totalConflictsRemaining += conflictsRemaining[i]; } - /* - * now check again for conflicts. 'otherLocks' describes the types of - * locks held by other processes. If one of these conflicts with the kind - * of lock that I want, there is a conflict and I have to sleep. - */ - if (!(lockMethodTable->conflictTab[lockmode] & otherLocks)) + /* If no conflicts remain, we get the lock. */ + if (totalConflictsRemaining == 0) { - /* no conflict. OK to get the lock */ - PROCLOCK_PRINT("LockCheckConflicts: resolved", proclock); + PROCLOCK_PRINT("LockCheckConflicts: resolved (simple)", proclock); return STATUS_OK; } - PROCLOCK_PRINT("LockCheckConflicts: conflicting", proclock); + /* If no group locking, it's definitely a conflict. */ + if (proclock->groupLeader == MyProc && MyProc->lockGroupLeader == NULL) + { + Assert(proclock->tag.myProc == MyProc); + PROCLOCK_PRINT("LockCheckConflicts: conflicting (simple)", + proclock); + return STATUS_FOUND; + } + + /* + * Locks held in conflicting modes by members of our own lock group are + * not real conflicts; we can subtract those out and see if we still have + * a conflict. This is O(N) in the number of processes holding or awaiting + * locks on this object. We could improve that by making the shared memory + * state more complex (and larger) but it doesn't seem worth it. + */ + procLocks = &(lock->procLocks); + otherproclock = (PROCLOCK *) + SHMQueueNext(procLocks, procLocks, offsetof(PROCLOCK, lockLink)); + while (otherproclock != NULL) + { + if (proclock != otherproclock && + proclock->groupLeader == otherproclock->groupLeader && + (otherproclock->holdMask & conflictMask) != 0) + { + int intersectMask = otherproclock->holdMask & conflictMask; + + for (i = 1; i <= numLockModes; i++) + { + if ((intersectMask & LOCKBIT_ON(i)) != 0) + { + if (conflictsRemaining[i] <= 0) + elog(PANIC, "proclocks held do not match lock"); + conflictsRemaining[i]--; + totalConflictsRemaining--; + } + } + + if (totalConflictsRemaining == 0) + { + PROCLOCK_PRINT("LockCheckConflicts: resolved (group)", + proclock); + return STATUS_OK; + } + } + otherproclock = (PROCLOCK *) + SHMQueueNext(procLocks, &otherproclock->lockLink, + offsetof(PROCLOCK, lockLink)); + } + + /* Nope, it's a real conflict. */ + PROCLOCK_PRINT("LockCheckConflicts: conflicting (group)", proclock); return STATUS_FOUND; } @@ -3095,6 +3164,10 @@ PostPrepare_Locks(TransactionId xid) PROCLOCKTAG proclocktag; int partition; + /* Can't prepare a lock group follower. */ + Assert(MyProc->lockGroupLeader == NULL || + MyProc->lockGroupLeader == MyProc); + /* This is a critical section: any error means big trouble */ START_CRIT_SECTION(); @@ -3239,6 +3312,13 @@ PostPrepare_Locks(TransactionId xid) proclocktag.myProc = newproc; /* + * Update groupLeader pointer to point to the new proc. (We'd + * better not be a member of somebody else's lock group!) + */ + Assert(proclock->groupLeader == proclock->tag.myProc); + proclock->groupLeader = newproc; + + /* * Update the proclock. We should not find any existing entry for * the same hash key, since there can be only one entry for any * given lock with my own proc. @@ -3785,6 +3865,8 @@ lock_twophase_recover(TransactionId xid, uint16 info, */ if (!found) { + Assert(proc->lockGroupLeader == NULL); + proclock->groupLeader = proc; proclock->holdMask = 0; proclock->releaseMask = 0; /* Add proclock to appropriate lists */ diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 3690753..084be5a 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -263,6 +263,9 @@ InitProcGlobal(void) /* Initialize myProcLocks[] shared memory queues. */ for (j = 0; j < NUM_LOCK_PARTITIONS; j++) SHMQueueInit(&(procs[i].myProcLocks[j])); + + /* Initialize lockGroupMembers list. */ + dlist_init(&procs[i].lockGroupMembers); } /* @@ -397,6 +400,11 @@ InitProcess(void) MyProc->backendLatestXid = InvalidTransactionId; pg_atomic_init_u32(&MyProc->nextClearXidElem, INVALID_PGPROCNO); + /* Check that group locking fields are in a proper initial state. */ + Assert(MyProc->lockGroupLeaderIdentifier == 0); + Assert(MyProc->lockGroupLeader == NULL); + Assert(dlist_is_empty(&MyProc->lockGroupMembers)); + /* * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch * on it. That allows us to repoint the process latch, which so far @@ -556,6 +564,11 @@ InitAuxiliaryProcess(void) OwnLatch(&MyProc->procLatch); SwitchToSharedLatch(); + /* Check that group locking fields are in a proper initial state. */ + Assert(MyProc->lockGroupLeaderIdentifier == 0); + Assert(MyProc->lockGroupLeader == NULL); + Assert(dlist_is_empty(&MyProc->lockGroupMembers)); + /* * We might be reusing a semaphore that belonged to a failed process. So * be careful and reinitialize its value here. (This is not strictly @@ -794,6 +807,40 @@ ProcKill(int code, Datum arg) ReplicationSlotRelease(); /* + * Detach from any lock group of which we are a member. If the leader + * exist before all other group members, it's PGPROC will remain allocated + * until the last group process exits; that process must return the + * leader's PGPROC to the appropriate list. + */ + if (MyProc->lockGroupLeader != NULL) + { + PGPROC *leader = MyProc->lockGroupLeader; + LWLock *leader_lwlock = LockHashPartitionLockByProc(leader); + + LWLockAcquire(leader_lwlock, LW_EXCLUSIVE); + Assert(!dlist_is_empty(&leader->lockGroupMembers)); + dlist_delete(&MyProc->lockGroupLink); + if (dlist_is_empty(&leader->lockGroupMembers)) + { + leader->lockGroupLeaderIdentifier = 0; + leader->lockGroupLeader = NULL; + if (leader != MyProc) + { + procgloballist = leader->procgloballist; + + /* Leader exited first; return its PGPROC. */ + SpinLockAcquire(ProcStructLock); + leader->links.next = (SHM_QUEUE *) *procgloballist; + *procgloballist = leader; + SpinLockRelease(ProcStructLock); + } + } + else if (leader != MyProc) + MyProc->lockGroupLeader = NULL; + LWLockRelease(leader_lwlock); + } + + /* * Reset MyLatch to the process local one. This is so that signal * handlers et al can continue using the latch after the shared latch * isn't ours anymore. After that clear MyProc and disown the shared @@ -807,9 +854,20 @@ ProcKill(int code, Datum arg) procgloballist = proc->procgloballist; SpinLockAcquire(ProcStructLock); - /* Return PGPROC structure (and semaphore) to appropriate freelist */ - proc->links.next = (SHM_QUEUE *) *procgloballist; - *procgloballist = proc; + /* + * If we're still a member of a locking group, that means we're a leader + * which has somehow exited before its children. The last remaining child + * will release our PGPROC. Otherwise, release it now. + */ + if (proc->lockGroupLeader == NULL) + { + /* Since lockGroupLeader is NULL, lockGroupMembers should be empty. */ + Assert(dlist_is_empty(&proc->lockGroupMembers)); + + /* Return PGPROC structure (and semaphore) to appropriate freelist */ + proc->links.next = (SHM_QUEUE *) *procgloballist; + *procgloballist = proc; + } /* Update shared estimate of spins_per_delay */ ProcGlobal->spins_per_delay = update_spins_per_delay(ProcGlobal->spins_per_delay); @@ -942,9 +1000,31 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) bool allow_autovacuum_cancel = true; int myWaitStatus; PGPROC *proc; + PGPROC *leader = MyProc->lockGroupLeader; int i; /* + * If group locking is in use, locks held my members of my locking group + * need to be included in myHeldLocks. + */ + if (leader != NULL) + { + SHM_QUEUE *procLocks = &(lock->procLocks); + PROCLOCK *otherproclock; + + otherproclock = (PROCLOCK *) + SHMQueueNext(procLocks, procLocks, offsetof(PROCLOCK, lockLink)); + while (otherproclock != NULL) + { + if (otherproclock->groupLeader == leader) + myHeldLocks |= otherproclock->holdMask; + otherproclock = (PROCLOCK *) + SHMQueueNext(procLocks, &otherproclock->lockLink, + offsetof(PROCLOCK, lockLink)); + } + } + + /* * Determine where to add myself in the wait queue. * * Normally I should go at the end of the queue. However, if I already @@ -968,6 +1048,15 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) proc = (PGPROC *) waitQueue->links.next; for (i = 0; i < waitQueue->size; i++) { + /* + * If we're part of the same locking group as this waiter, its + * locks neither conflict with ours nor contribute to aheadRequsts. + */ + if (leader != NULL && leader == proc->lockGroupLeader) + { + proc = (PGPROC *) proc->links.next; + continue; + } /* Must he wait for me? */ if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks) { @@ -1658,3 +1747,66 @@ ProcSendSignal(int pid) SetLatch(&proc->procLatch); } } + +/* + * BecomeLockGroupLeader - designate process as lock group leader + * + * Once this function has returned, other processes can join the lock group + * by calling BecomeLockGroupMember. + */ +void +BecomeLockGroupLeader(void) +{ + LWLock *leader_lwlock; + + /* If we already did it, we don't need to do it again. */ + if (MyProc->lockGroupLeader == MyProc) + return; + + /* We had better not be a follower. */ + Assert(MyProc->lockGroupLeader == NULL); + + /* Create single-member group, containing only ourselves. */ + leader_lwlock = LockHashPartitionLockByProc(MyProc); + LWLockAcquire(leader_lwlock, LW_EXCLUSIVE); + MyProc->lockGroupLeader = MyProc; + MyProc->lockGroupLeaderIdentifier = MyProcPid; + dlist_push_head(&MyProc->lockGroupMembers, &MyProc->lockGroupLink); + LWLockRelease(leader_lwlock); +} + +/* + * BecomeLockGroupMember - designate process as lock group member + * + * This is pretty straightforward except for the possibility that the leader + * whose group we're trying to join might exit before we manage to do so; + * and the PGPROC might get recycled for an unrelated process. To avoid + * that, we require the caller to pass the PID of the intended PGPROC as + * an interlock. Returns true if we successfully join the intended lock + * group, and false if not. + */ +bool +BecomeLockGroupMember(PGPROC *leader, int pid) +{ + LWLock *leader_lwlock; + bool ok = false; + + /* Group leader can't become member of group */ + Assert(MyProc != leader); + + /* PID must be valid. */ + Assert(pid != 0); + + /* Try to join the group. */ + leader_lwlock = LockHashPartitionLockByProc(MyProc); + LWLockAcquire(leader_lwlock, LW_EXCLUSIVE); + if (leader->lockGroupLeaderIdentifier == pid) + { + ok = true; + MyProc->lockGroupLeader = leader; + dlist_push_tail(&leader->lockGroupMembers, &MyProc->lockGroupLink); + } + LWLockRelease(leader_lwlock); + + return ok; +} diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c index 90b1eb1..cdbf72c 100644 --- a/src/backend/utils/adt/datetime.c +++ b/src/backend/utils/adt/datetime.c @@ -43,8 +43,12 @@ static int DecodeTime(char *str, int fmask, int range, static const datetkn *datebsearch(const char *key, const datetkn *base, int nel); static int DecodeDate(char *str, int fmask, int *tmask, bool *is2digits, struct pg_tm * tm); -static void TrimTrailingZeros(char *str); -static void AppendSeconds(char *cp, int sec, fsec_t fsec, + +#ifndef HAVE_INT64_TIMESTAMP +static char *TrimTrailingZeros(char *str); +#endif /* HAVE_INT64_TIMESTAMP */ + +static char *AppendSeconds(char *cp, int sec, fsec_t fsec, int precision, bool fillzeros); static void AdjustFractSeconds(double frac, struct pg_tm * tm, fsec_t *fsec, int scale); @@ -398,57 +402,121 @@ GetCurrentTimeUsec(struct pg_tm * tm, fsec_t *fsec, int *tzp) /* TrimTrailingZeros() * ... resulting from printing numbers with full precision. * + * Returns a pointer to the new end of string. No NUL terminator is put + * there; callers are responsible for NUL terminating str themselves. + * * Before Postgres 8.4, this always left at least 2 fractional digits, * but conversations on the lists suggest this isn't desired * since showing '0.10' is misleading with values of precision(1). */ -static void +#ifndef HAVE_INT64_TIMESTAMP +static char * TrimTrailingZeros(char *str) { int len = strlen(str); while (len > 1 && *(str + len - 1) == '0' && *(str + len - 2) != '.') - { len--; - *(str + len) = '\0'; - } + return str + len; } +#endif /* HAVE_INT64_TIMESTAMP */ /* - * Append sections and fractional seconds (if any) at *cp. + * Append seconds and fractional seconds (if any) at *cp. + * * precision is the max number of fraction digits, fillzeros says to * pad to two integral-seconds digits. + * + * Returns a pointer to the new end of string. No NUL terminator is put + * there; callers are responsible for NUL terminating str themselves. + * * Note that any sign is stripped from the input seconds values. */ -static void +static char * AppendSeconds(char *cp, int sec, fsec_t fsec, int precision, bool fillzeros) { + Assert(precision >= 0); + +#ifdef HAVE_INT64_TIMESTAMP + /* fsec_t is just an int32 */ + + if (fillzeros) + cp = pg_ltostr_zeropad(cp, Abs(sec), 2); + else + cp = pg_ltostr(cp, Abs(sec)); + + if (fsec != 0) + { + int32 value = Abs(fsec); + char *end = &cp[precision + 1]; + bool gotnonzero = false; + + *cp++ = '.'; + + /* + * Append the fractional seconds part. Note that we don't want any + * trailing zeros here, so since we're building the number in reverse + * we'll skip appending zeros until we've output a non-zero digit. + */ + while (precision--) + { + int32 oldval = value; + int32 remainder; + + value /= 10; + remainder = oldval - value * 10; + + /* check if we got a non-zero */ + if (remainder) + gotnonzero = true; + + if (gotnonzero) + cp[precision] = '0' + remainder; + else + end = &cp[precision]; + } + + /* + * If we still have a non-zero value then precision must have not been + * enough to print the number. We punt the problem to pg_ltostr(), + * which will generate a correct answer in the minimum valid width. + */ + if (value) + return pg_ltostr(cp, Abs(fsec)); + + return end; + } + else + return cp; +#else + /* fsec_t is a double */ + if (fsec == 0) { if (fillzeros) - sprintf(cp, "%02d", abs(sec)); + return pg_ltostr_zeropad(cp, Abs(sec), 2); else - sprintf(cp, "%d", abs(sec)); + return pg_ltostr(cp, Abs(sec)); } else { -#ifdef HAVE_INT64_TIMESTAMP - if (fillzeros) - sprintf(cp, "%02d.%0*d", abs(sec), precision, (int) Abs(fsec)); - else - sprintf(cp, "%d.%0*d", abs(sec), precision, (int) Abs(fsec)); -#else if (fillzeros) sprintf(cp, "%0*.*f", precision + 3, precision, fabs(sec + fsec)); else sprintf(cp, "%.*f", precision, fabs(sec + fsec)); -#endif - TrimTrailingZeros(cp); + return TrimTrailingZeros(cp); } +#endif /* HAVE_INT64_TIMESTAMP */ } -/* Variant of above that's specialized to timestamp case */ -static void + +/* + * Variant of above that's specialized to timestamp case. + * + * Returns a pointer to the new end of string. No NUL terminator is put + * there; callers are responsible for NUL terminating str themselves. + */ +static char * AppendTimestampSeconds(char *cp, struct pg_tm * tm, fsec_t fsec) { /* @@ -459,7 +527,7 @@ AppendTimestampSeconds(char *cp, struct pg_tm * tm, fsec_t fsec) if (tm->tm_year <= 0) fsec = 0; #endif - AppendSeconds(cp, tm->tm_sec, fsec, MAX_TIMESTAMP_PRECISION, true); + return AppendSeconds(cp, tm->tm_sec, fsec, MAX_TIMESTAMP_PRECISION, true); } /* @@ -3831,9 +3899,12 @@ datebsearch(const char *key, const datetkn *base, int nel) } /* EncodeTimezone() - * Append representation of a numeric timezone offset to str. + * Copies representation of a numeric timezone offset to str. + * + * Returns a pointer to the new end of string. No NUL terminator is put + * there; callers are responsible for NUL terminating str themselves. */ -static void +static char * EncodeTimezone(char *str, int tz, int style) { int hour, @@ -3846,16 +3917,26 @@ EncodeTimezone(char *str, int tz, int style) hour = min / MINS_PER_HOUR; min -= hour * MINS_PER_HOUR; - str += strlen(str); /* TZ is negated compared to sign we wish to display ... */ *str++ = (tz <= 0 ? '+' : '-'); if (sec != 0) - sprintf(str, "%02d:%02d:%02d", hour, min, sec); + { + str = pg_ltostr_zeropad(str, hour, 2); + *str++ = ':'; + str = pg_ltostr_zeropad(str, min, 2); + *str++ = ':'; + str = pg_ltostr_zeropad(str, sec, 2); + } else if (min != 0 || style == USE_XSD_DATES) - sprintf(str, "%02d:%02d", hour, min); + { + str = pg_ltostr_zeropad(str, hour, 2); + *str++ = ':'; + str = pg_ltostr_zeropad(str, min, 2); + } else - sprintf(str, "%02d", hour); + str = pg_ltostr_zeropad(str, hour, 2); + return str; } /* EncodeDateOnly() @@ -3871,48 +3952,70 @@ EncodeDateOnly(struct pg_tm * tm, int style, char *str) case USE_ISO_DATES: case USE_XSD_DATES: /* compatible with ISO date formats */ - if (tm->tm_year > 0) - sprintf(str, "%04d-%02d-%02d", - tm->tm_year, tm->tm_mon, tm->tm_mday); - else - sprintf(str, "%04d-%02d-%02d %s", - -(tm->tm_year - 1), tm->tm_mon, tm->tm_mday, "BC"); + str = pg_ltostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); + *str++ = '-'; + str = pg_ltostr_zeropad(str, tm->tm_mon, 2); + *str++ = '-'; + str = pg_ltostr_zeropad(str, tm->tm_mday, 2); break; case USE_SQL_DATES: /* compatible with Oracle/Ingres date formats */ if (DateOrder == DATEORDER_DMY) - sprintf(str, "%02d/%02d", tm->tm_mday, tm->tm_mon); + { + str = pg_ltostr_zeropad(str, tm->tm_mday, 2); + *str++ = '/'; + str = pg_ltostr_zeropad(str, tm->tm_mon, 2); + } else - sprintf(str, "%02d/%02d", tm->tm_mon, tm->tm_mday); - if (tm->tm_year > 0) - sprintf(str + 5, "/%04d", tm->tm_year); - else - sprintf(str + 5, "/%04d %s", -(tm->tm_year - 1), "BC"); + { + str = pg_ltostr_zeropad(str, tm->tm_mon, 2); + *str++ = '/'; + str = pg_ltostr_zeropad(str, tm->tm_mday, 2); + } + *str++ = '/'; + str = pg_ltostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); break; case USE_GERMAN_DATES: /* German-style date format */ - sprintf(str, "%02d.%02d", tm->tm_mday, tm->tm_mon); - if (tm->tm_year > 0) - sprintf(str + 5, ".%04d", tm->tm_year); - else - sprintf(str + 5, ".%04d %s", -(tm->tm_year - 1), "BC"); + str = pg_ltostr_zeropad(str, tm->tm_mday, 2); + *str++ = '.'; + str = pg_ltostr_zeropad(str, tm->tm_mon, 2); + *str++ = '.'; + str = pg_ltostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); break; case USE_POSTGRES_DATES: default: /* traditional date-only style for Postgres */ if (DateOrder == DATEORDER_DMY) - sprintf(str, "%02d-%02d", tm->tm_mday, tm->tm_mon); + { + str = pg_ltostr_zeropad(str, tm->tm_mday, 2); + *str++ = '-'; + str = pg_ltostr_zeropad(str, tm->tm_mon, 2); + } else - sprintf(str, "%02d-%02d", tm->tm_mon, tm->tm_mday); - if (tm->tm_year > 0) - sprintf(str + 5, "-%04d", tm->tm_year); - else - sprintf(str + 5, "-%04d %s", -(tm->tm_year - 1), "BC"); + { + str = pg_ltostr_zeropad(str, tm->tm_mon, 2); + *str++ = '-'; + str = pg_ltostr_zeropad(str, tm->tm_mday, 2); + } + *str++ = '-'; + str = pg_ltostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); break; } + + if (tm->tm_year <= 0) + { + memcpy(str, " BC", 3); /* Don't copy NUL */ + str += 3; + } + *str = '\0'; } @@ -3927,13 +4030,14 @@ EncodeDateOnly(struct pg_tm * tm, int style, char *str) void EncodeTimeOnly(struct pg_tm * tm, fsec_t fsec, bool print_tz, int tz, int style, char *str) { - sprintf(str, "%02d:%02d:", tm->tm_hour, tm->tm_min); - str += strlen(str); - - AppendSeconds(str, tm->tm_sec, fsec, MAX_TIME_PRECISION, true); - + str = pg_ltostr_zeropad(str, tm->tm_hour, 2); + *str++ = ':'; + str = pg_ltostr_zeropad(str, tm->tm_min, 2); + *str++ = ':'; + str = AppendSeconds(str, tm->tm_sec, fsec, MAX_TIME_PRECISION, true); if (print_tz) - EncodeTimezone(str, tz, style); + str = EncodeTimezone(str, tz, style); + *str = '\0'; } @@ -3971,106 +4075,129 @@ EncodeDateTime(struct pg_tm * tm, fsec_t fsec, bool print_tz, int tz, const char case USE_ISO_DATES: case USE_XSD_DATES: /* Compatible with ISO-8601 date formats */ - - if (style == USE_ISO_DATES) - sprintf(str, "%04d-%02d-%02d %02d:%02d:", - (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), - tm->tm_mon, tm->tm_mday, tm->tm_hour, tm->tm_min); - else - sprintf(str, "%04d-%02d-%02dT%02d:%02d:", - (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), - tm->tm_mon, tm->tm_mday, tm->tm_hour, tm->tm_min); - - AppendTimestampSeconds(str + strlen(str), tm, fsec); - + str = pg_ltostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); + *str++ = '-'; + str = pg_ltostr_zeropad(str, tm->tm_mon, 2); + *str++ = '-'; + str = pg_ltostr_zeropad(str, tm->tm_mday, 2); + *str++ = (style == USE_ISO_DATES) ? ' ' : 'T'; + str = pg_ltostr_zeropad(str, tm->tm_hour, 2); + *str++ = ':'; + str = pg_ltostr_zeropad(str, tm->tm_min, 2); + *str++ = ':'; + str = AppendTimestampSeconds(str, tm, fsec); if (print_tz) - EncodeTimezone(str, tz, style); - - if (tm->tm_year <= 0) - sprintf(str + strlen(str), " BC"); + str = EncodeTimezone(str, tz, style); break; case USE_SQL_DATES: /* Compatible with Oracle/Ingres date formats */ - if (DateOrder == DATEORDER_DMY) - sprintf(str, "%02d/%02d", tm->tm_mday, tm->tm_mon); + { + str = pg_ltostr_zeropad(str, tm->tm_mday, 2); + *str++ = '/'; + str = pg_ltostr_zeropad(str, tm->tm_mon, 2); + } else - sprintf(str, "%02d/%02d", tm->tm_mon, tm->tm_mday); - - sprintf(str + 5, "/%04d %02d:%02d:", - (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), - tm->tm_hour, tm->tm_min); - - AppendTimestampSeconds(str + strlen(str), tm, fsec); + { + str = pg_ltostr_zeropad(str, tm->tm_mon, 2); + *str++ = '/'; + str = pg_ltostr_zeropad(str, tm->tm_mday, 2); + } + *str++ = '/'; + str = pg_ltostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); + *str++ = ' '; + str = pg_ltostr_zeropad(str, tm->tm_hour, 2); + *str++ = ':'; + str = pg_ltostr_zeropad(str, tm->tm_min, 2); + *str++ = ':'; + str = AppendTimestampSeconds(str, tm, fsec); /* * Note: the uses of %.*s in this function would be risky if the * timezone names ever contain non-ASCII characters. However, all - * TZ abbreviations in the Olson database are plain ASCII. + * TZ abbreviations in the IANA database are plain ASCII. */ - if (print_tz) { if (tzn) - sprintf(str + strlen(str), " %.*s", MAXTZLEN, tzn); + { + sprintf(str, " %.*s", MAXTZLEN, tzn); + str += strlen(str); + } else - EncodeTimezone(str, tz, style); + str = EncodeTimezone(str, tz, style); } - - if (tm->tm_year <= 0) - sprintf(str + strlen(str), " BC"); break; case USE_GERMAN_DATES: /* German variant on European style */ - - sprintf(str, "%02d.%02d", tm->tm_mday, tm->tm_mon); - - sprintf(str + 5, ".%04d %02d:%02d:", - (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), - tm->tm_hour, tm->tm_min); - - AppendTimestampSeconds(str + strlen(str), tm, fsec); + str = pg_ltostr_zeropad(str, tm->tm_mday, 2); + *str++ = '.'; + str = pg_ltostr_zeropad(str, tm->tm_mon, 2); + *str++ = '.'; + str = pg_ltostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); + *str++ = ' '; + str = pg_ltostr_zeropad(str, tm->tm_hour, 2); + *str++ = ':'; + str = pg_ltostr_zeropad(str, tm->tm_min, 2); + *str++ = ':'; + str = AppendTimestampSeconds(str, tm, fsec); if (print_tz) { if (tzn) - sprintf(str + strlen(str), " %.*s", MAXTZLEN, tzn); + { + sprintf(str, " %.*s", MAXTZLEN, tzn); + str += strlen(str); + } else - EncodeTimezone(str, tz, style); + str = EncodeTimezone(str, tz, style); } - - if (tm->tm_year <= 0) - sprintf(str + strlen(str), " BC"); break; case USE_POSTGRES_DATES: default: /* Backward-compatible with traditional Postgres abstime dates */ - day = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday); tm->tm_wday = j2day(day); - memcpy(str, days[tm->tm_wday], 3); - strcpy(str + 3, " "); - + str += 3; + *str++ = ' '; if (DateOrder == DATEORDER_DMY) - sprintf(str + 4, "%02d %3s", tm->tm_mday, months[tm->tm_mon - 1]); + { + str = pg_ltostr_zeropad(str, tm->tm_mday, 2); + *str++ = ' '; + memcpy(str, months[tm->tm_mon - 1], 3); + str += 3; + } else - sprintf(str + 4, "%3s %02d", months[tm->tm_mon - 1], tm->tm_mday); - - sprintf(str + 10, " %02d:%02d:", tm->tm_hour, tm->tm_min); - - AppendTimestampSeconds(str + strlen(str), tm, fsec); - - sprintf(str + strlen(str), " %04d", - (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1)); + { + memcpy(str, months[tm->tm_mon - 1], 3); + str += 3; + *str++ = ' '; + str = pg_ltostr_zeropad(str, tm->tm_mday, 2); + } + *str++ = ' '; + str = pg_ltostr_zeropad(str, tm->tm_hour, 2); + *str++ = ':'; + str = pg_ltostr_zeropad(str, tm->tm_min, 2); + *str++ = ':'; + str = AppendTimestampSeconds(str, tm, fsec); + *str++ = ' '; + str = pg_ltostr_zeropad(str, + (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4); if (print_tz) { if (tzn) - sprintf(str + strlen(str), " %.*s", MAXTZLEN, tzn); + { + sprintf(str, " %.*s", MAXTZLEN, tzn); + str += strlen(str); + } else { /* @@ -4079,15 +4206,19 @@ EncodeDateTime(struct pg_tm * tm, fsec_t fsec, bool print_tz, int tz, const char * avoid formatting something which would be rejected by * the date/time parser later. - thomas 2001-10-19 */ - sprintf(str + strlen(str), " "); - EncodeTimezone(str, tz, style); + *str++ = ' '; + str = EncodeTimezone(str, tz, style); } } - - if (tm->tm_year <= 0) - sprintf(str + strlen(str), " BC"); break; } + + if (tm->tm_year <= 0) + { + memcpy(str, " BC", 3); /* Don't copy NUL */ + str += 3; + } + *str = '\0'; } @@ -4242,7 +4373,8 @@ EncodeInterval(struct pg_tm * tm, fsec_t fsec, int style, char *str) day_sign, abs(mday), sec_sign, abs(hour), abs(min)); cp += strlen(cp); - AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true); + cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true); + *cp = '\0'; } else if (has_year_month) { @@ -4252,13 +4384,15 @@ EncodeInterval(struct pg_tm * tm, fsec_t fsec, int style, char *str) { sprintf(cp, "%d %d:%02d:", mday, hour, min); cp += strlen(cp); - AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true); + cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true); + *cp = '\0'; } else { sprintf(cp, "%d:%02d:", hour, min); cp += strlen(cp); - AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true); + cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true); + *cp = '\0'; } } break; @@ -4284,8 +4418,7 @@ EncodeInterval(struct pg_tm * tm, fsec_t fsec, int style, char *str) { if (sec < 0 || fsec < 0) *cp++ = '-'; - AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, false); - cp += strlen(cp); + cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, false); *cp++ = 'S'; *cp++ = '\0'; } @@ -4311,7 +4444,8 @@ EncodeInterval(struct pg_tm * tm, fsec_t fsec, int style, char *str) (minus ? "-" : (is_before ? "+" : "")), abs(hour), abs(min)); cp += strlen(cp); - AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true); + cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true); + *cp = '\0'; } break; @@ -4337,8 +4471,7 @@ EncodeInterval(struct pg_tm * tm, fsec_t fsec, int style, char *str) } else if (is_before) *cp++ = '-'; - AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, false); - cp += strlen(cp); + cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, false); sprintf(cp, " sec%s", (abs(sec) != 1 || fsec != 0) ? "s" : ""); is_zero = FALSE; diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index 880d304..6b10596 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -227,3 +227,164 @@ pg_lltoa(int64 value, char *a) *a-- = swap; } } + + +/* + * pg_ltostr_zeropad + * Converts 'value' into a decimal string representation stored at 'str'. + * 'minwidth' specifies the minimum width of the result; any extra space + * is filled up by prefixing the number with zeros. + * + * Returns the ending address of the string result (the last character written + * plus 1). Note that no NUL terminator is written. + * + * The intended use-case for this function is to build strings that contain + * multiple individual numbers, for example: + * + * str = pg_ltostr_zeropad(str, hours, 2); + * *str++ = ':'; + * str = pg_ltostr_zeropad(str, mins, 2); + * *str++ = ':'; + * str = pg_ltostr_zeropad(str, secs, 2); + * *str = '\0'; + * + * Note: Caller must ensure that 'str' points to enough memory to hold the + * result. + */ +char * +pg_ltostr_zeropad(char *str, int32 value, int32 minwidth) +{ + char *start = str; + char *end = &str[minwidth]; + int32 num = value; + + Assert(minwidth > 0); + + /* + * Handle negative numbers in a special way. We can't just write a '-' + * prefix and reverse the sign as that would overflow for INT32_MIN. + */ + if (num < 0) + { + *start++ = '-'; + minwidth--; + + /* + * Build the number starting at the last digit. Here remainder will + * be a negative number, so we must reverse the sign before adding '0' + * in order to get the correct ASCII digit. + */ + while (minwidth--) + { + int32 oldval = num; + int32 remainder; + + num /= 10; + remainder = oldval - num * 10; + start[minwidth] = '0' - remainder; + } + } + else + { + /* Build the number starting at the last digit */ + while (minwidth--) + { + int32 oldval = num; + int32 remainder; + + num /= 10; + remainder = oldval - num * 10; + start[minwidth] = '0' + remainder; + } + } + + /* + * If minwidth was not high enough to fit the number then num won't have + * been divided down to zero. We punt the problem to pg_ltostr(), which + * will generate a correct answer in the minimum valid width. + */ + if (num != 0) + return pg_ltostr(str, value); + + /* Otherwise, return last output character + 1 */ + return end; +} + +/* + * pg_ltostr + * Converts 'value' into a decimal string representation stored at 'str'. + * + * Returns the ending address of the string result (the last character written + * plus 1). Note that no NUL terminator is written. + * + * The intended use-case for this function is to build strings that contain + * multiple individual numbers, for example: + * + * str = pg_ltostr(str, a); + * *str++ = ' '; + * str = pg_ltostr(str, b); + * *str = '\0'; + * + * Note: Caller must ensure that 'str' points to enough memory to hold the + * result. + */ +char * +pg_ltostr(char *str, int32 value) +{ + char *start; + char *end; + + /* + * Handle negative numbers in a special way. We can't just write a '-' + * prefix and reverse the sign as that would overflow for INT32_MIN. + */ + if (value < 0) + { + *str++ = '-'; + + /* Mark the position we must reverse the string from. */ + start = str; + + /* Compute the result string backwards. */ + do + { + int32 oldval = value; + int32 remainder; + + value /= 10; + remainder = oldval - value * 10; + /* As above, we expect remainder to be negative. */ + *str++ = '0' - remainder; + } while (value != 0); + } + else + { + /* Mark the position we must reverse the string from. */ + start = str; + + /* Compute the result string backwards. */ + do + { + int32 oldval = value; + int32 remainder; + + value /= 10; + remainder = oldval - value * 10; + *str++ = '0' + remainder; + } while (value != 0); + } + + /* Remember the end+1 and back up 'str' to the last character. */ + end = str--; + + /* Reverse string. */ + while (start < str) + { + char swap = *start; + + *start++ = *str; + *str-- = swap; + } + + return end; +} diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 4efd298..490a090 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -5532,9 +5532,21 @@ get_insert_query_def(Query *query, deparse_context *context) /* Add a WHERE clause (for partial indexes) if given */ if (confl->arbiterWhere != NULL) { + bool save_varprefix; + + /* + * Force non-prefixing of Vars, since parser assumes that they + * belong to target relation. WHERE clause does not use + * InferenceElem, so this is separately required. + */ + save_varprefix = context->varprefix; + context->varprefix = false; + appendContextKeyword(context, " WHERE ", -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); get_rule_expr(confl->arbiterWhere, context, false); + + context->varprefix = save_varprefix; } } else if (confl->constraint != InvalidOid) @@ -7956,13 +7968,14 @@ get_rule_expr(Node *node, deparse_context *context, case T_InferenceElem: { InferenceElem *iexpr = (InferenceElem *) node; - bool varprefix = context->varprefix; + bool save_varprefix; bool need_parens; /* * InferenceElem can only refer to target relation, so a - * prefix is never useful. + * prefix is not useful, and indeed would cause parse errors. */ + save_varprefix = context->varprefix; context->varprefix = false; /* @@ -7982,7 +7995,7 @@ get_rule_expr(Node *node, deparse_context *context, if (need_parens) appendStringInfoChar(buf, ')'); - context->varprefix = varprefix; + context->varprefix = save_varprefix; if (iexpr->infercollid) appendStringInfo(buf, " COLLATE %s", diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 66c4791..31a69ca 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -379,6 +379,19 @@ static const struct config_enum_entry huge_pages_options[] = { {NULL, 0, false} }; +static const struct config_enum_entry force_parallel_mode_options[] = { + {"off", FORCE_PARALLEL_OFF, false}, + {"on", FORCE_PARALLEL_ON, false}, + {"regress", FORCE_PARALLEL_REGRESS, false}, + {"true", FORCE_PARALLEL_ON, true}, + {"false", FORCE_PARALLEL_OFF, true}, + {"yes", FORCE_PARALLEL_ON, true}, + {"no", FORCE_PARALLEL_OFF, true}, + {"1", FORCE_PARALLEL_ON, true}, + {"0", FORCE_PARALLEL_OFF, true}, + {NULL, 0, false} +}; + /* * Options for enum values stored in other modules */ @@ -863,6 +876,7 @@ static struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"geqo", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("Enables genetic query optimization."), @@ -3672,6 +3686,16 @@ static struct config_enum ConfigureNamesEnum[] = NULL, NULL, NULL }, + { + {"force_parallel_mode", PGC_USERSET, QUERY_TUNING_OTHER, + gettext_noop("Forces use of parallel query facilities."), + gettext_noop("If possible, run query using a parallel worker and with parallel restrictions.") + }, + &force_parallel_mode, + FORCE_PARALLEL_OFF, force_parallel_mode_options, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 029114f..09b2003 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -313,6 +313,7 @@ #from_collapse_limit = 8 #join_collapse_limit = 8 # 1 disables collapsing of explicit # JOIN clauses +#force_parallel_mode = off #------------------------------------------------------------------------------ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 64c2673..d09c6ce 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -6901,7 +6901,35 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) resetPQExpBuffer(q); - if (fout->remoteVersion >= 90200) + if (fout->remoteVersion >= 90600) + { + /* + * In 9.6, NOT NULL constraints are in pg_constraint and will be + * dumped as table constraints, so it's unnecessary to dump them + * here. + */ + appendPQExpBuffer(q, "SELECT a.attnum, a.attname, a.atttypmod, " + "a.attstattarget, a.attstorage, t.typstorage, " + "false as attnotnull, a.atthasdef, a.attisdropped, " + "a.attlen, a.attalign, a.attislocal, " + "pg_catalog.format_type(t.oid,a.atttypmod) AS atttypname, " + "array_to_string(a.attoptions, ', ') AS attoptions, " + "CASE WHEN a.attcollation <> t.typcollation " + "THEN a.attcollation ELSE 0 END AS attcollation, " + "pg_catalog.array_to_string(ARRAY(" + "SELECT pg_catalog.quote_ident(option_name) || " + "' ' || pg_catalog.quote_literal(option_value) " + "FROM pg_catalog.pg_options_to_table(attfdwoptions) " + "ORDER BY option_name" + "), E',\n ') AS attfdwoptions " + "FROM pg_catalog.pg_attribute a LEFT JOIN pg_catalog.pg_type t " + "ON a.atttypid = t.oid " + "WHERE a.attrelid = '%u'::pg_catalog.oid " + "AND a.attnum > 0::pg_catalog.int2 " + "ORDER BY a.attrelid, a.attnum", + tbinfo->dobj.catId.oid); + } + else if (fout->remoteVersion >= 90200) { /* * attfdwoptions is new in 9.2. diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 90992f2..378c40f 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201602041 +#define CATALOG_VERSION_NO 201602071 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index c6b4916..1c0ef9a 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -5146,23 +5146,23 @@ DATA(insert OID = 3993 ( dense_rank_final PGNSP PGUID 12 1 0 2276 0 f f f f f f DESCR("aggregate final function"); /* pg_upgrade support */ -DATA(insert OID = 3582 ( binary_upgrade_set_next_pg_type_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_pg_type_oid _null_ _null_ _null_ )); +DATA(insert OID = 3582 ( binary_upgrade_set_next_pg_type_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v r 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_pg_type_oid _null_ _null_ _null_ )); DESCR("for use by pg_upgrade"); -DATA(insert OID = 3584 ( binary_upgrade_set_next_array_pg_type_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_array_pg_type_oid _null_ _null_ _null_ )); +DATA(insert OID = 3584 ( binary_upgrade_set_next_array_pg_type_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v r 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_array_pg_type_oid _null_ _null_ _null_ )); DESCR("for use by pg_upgrade"); -DATA(insert OID = 3585 ( binary_upgrade_set_next_toast_pg_type_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_toast_pg_type_oid _null_ _null_ _null_ )); +DATA(insert OID = 3585 ( binary_upgrade_set_next_toast_pg_type_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v r 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_toast_pg_type_oid _null_ _null_ _null_ )); DESCR("for use by pg_upgrade"); -DATA(insert OID = 3586 ( binary_upgrade_set_next_heap_pg_class_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_heap_pg_class_oid _null_ _null_ _null_ )); +DATA(insert OID = 3586 ( binary_upgrade_set_next_heap_pg_class_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v r 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_heap_pg_class_oid _null_ _null_ _null_ )); DESCR("for use by pg_upgrade"); -DATA(insert OID = 3587 ( binary_upgrade_set_next_index_pg_class_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_index_pg_class_oid _null_ _null_ _null_ )); +DATA(insert OID = 3587 ( binary_upgrade_set_next_index_pg_class_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v r 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_index_pg_class_oid _null_ _null_ _null_ )); DESCR("for use by pg_upgrade"); -DATA(insert OID = 3588 ( binary_upgrade_set_next_toast_pg_class_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_toast_pg_class_oid _null_ _null_ _null_ )); +DATA(insert OID = 3588 ( binary_upgrade_set_next_toast_pg_class_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v r 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_toast_pg_class_oid _null_ _null_ _null_ )); DESCR("for use by pg_upgrade"); -DATA(insert OID = 3589 ( binary_upgrade_set_next_pg_enum_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_pg_enum_oid _null_ _null_ _null_ )); +DATA(insert OID = 3589 ( binary_upgrade_set_next_pg_enum_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v r 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_pg_enum_oid _null_ _null_ _null_ )); DESCR("for use by pg_upgrade"); -DATA(insert OID = 3590 ( binary_upgrade_set_next_pg_authid_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_pg_authid_oid _null_ _null_ _null_ )); +DATA(insert OID = 3590 ( binary_upgrade_set_next_pg_authid_oid PGNSP PGUID 12 1 0 0 0 f f f f t f v r 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ binary_upgrade_set_next_pg_authid_oid _null_ _null_ _null_ )); DESCR("for use by pg_upgrade"); -DATA(insert OID = 3591 ( binary_upgrade_create_empty_extension PGNSP PGUID 12 1 0 0 0 f f f f f f v s 7 0 2278 "25 25 16 25 1028 1009 1009" _null_ _null_ _null_ _null_ _null_ binary_upgrade_create_empty_extension _null_ _null_ _null_ )); +DATA(insert OID = 3591 ( binary_upgrade_create_empty_extension PGNSP PGUID 12 1 0 0 0 f f f f f f v r 7 0 2278 "25 25 16 25 1028 1009 1009" _null_ _null_ _null_ _null_ _null_ binary_upgrade_create_empty_extension _null_ _null_ _null_ )); DESCR("for use by pg_upgrade"); /* replication/origin.h */ diff --git a/src/include/commands/constraint.h b/src/include/commands/constraint.h new file mode 100644 index 0000000..99799b4 --- /dev/null +++ b/src/include/commands/constraint.h @@ -0,0 +1,29 @@ +/*------------------------------------------------------------------------- + * + * constraint.h + * PostgreSQL CONSTRAINT support declarations + * + * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/include/commands/constraint.h + * + *------------------------------------------------------------------------- + */ +#ifndef CONSTRAINT_H +#define CONSTRAINT_H + +#include "nodes/parsenodes.h" +#include "utils/relcache.h" + +extern Constraint *createCheckNotNullConstraint(Oid nspid, + char *constraint_name, const char *relname, + const char *colname); + +extern char *tryExtractNotNullFromCheckConstr(Constraint *constr); + +extern char *tryExtractNotNullFromCatalog(HeapTuple constrTup, + TupleDesc tupdesc, Relation rel); + +#endif /* CONSTRAINT_H */ diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 2fd0629..4ab39fd 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1735,10 +1735,11 @@ typedef struct VariableShowStmt * Create Table Statement * * NOTE: in the raw gram.y output, ColumnDef and Constraint nodes are - * intermixed in tableElts, and constraints is NIL. After parse analysis, - * tableElts contains just ColumnDefs, and constraints contains just - * Constraint nodes (in fact, only CONSTR_CHECK nodes, in the present - * implementation). + * intermixed in tableElts, and constraints and notnullcols are NIL. After + * parse analysis, tableElts contains just ColumnDefs, notnullcols has been + * filled with not-nullable column names from various sources, and constraints + * contains just Constraint nodes (in fact, only CONSTR_CHECK nodes, in the + * present implementation). * ---------------------- */ @@ -1751,6 +1752,7 @@ typedef struct CreateStmt * inhRelation) */ TypeName *ofTypename; /* OF typename */ List *constraints; /* constraints (list of Constraint nodes) */ + List *notnullcols; /* list of column names with NOT NULL */ List *options; /* options from WITH clause */ OnCommitAction oncommit; /* what do we do at COMMIT? */ char *tablespacename; /* table space to use, or NULL */ diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 55d6bbe..ae224cf 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -775,6 +775,7 @@ typedef struct Gather Plan plan; int num_workers; bool single_copy; + bool invisible; /* suppress EXPLAIN display (for testing)? */ } Gather; /* ---------------- diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 595438c..96198ae 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -108,6 +108,9 @@ typedef struct PlannerGlobal bool parallelModeOK; /* parallel mode potentially OK? */ bool parallelModeNeeded; /* parallel mode actually required? */ + + bool wholePlanParallelSafe; /* is the entire plan parallel safe? */ + bool hasForeignJoin; /* does have a pushed down foreign join */ } PlannerGlobal; diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 7ae7367..eaa642b 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -17,9 +17,18 @@ #include "nodes/plannodes.h" #include "nodes/relation.h" +/* possible values for force_parallel_mode */ +typedef enum +{ + FORCE_PARALLEL_OFF, + FORCE_PARALLEL_ON, + FORCE_PARALLEL_REGRESS +} ForceParallelMode; + /* GUC parameters */ #define DEFAULT_CURSOR_TUPLE_FRACTION 0.1 extern double cursor_tuple_fraction; +extern int force_parallel_mode; /* query_planner callback to compute query_pathkeys */ typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 43eca86..6b4e365 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -346,6 +346,7 @@ typedef struct PROCLOCK PROCLOCKTAG tag; /* unique identifier of proclock object */ /* data */ + PGPROC *groupLeader; /* group leader, or NULL if no lock group */ LOCKMASK holdMask; /* bitmask for lock types currently held */ LOCKMASK releaseMask; /* bitmask for lock types to be released */ SHM_QUEUE lockLink; /* list link in LOCK's list of proclocks */ @@ -457,7 +458,6 @@ typedef enum * worker */ } DeadLockState; - /* * The lockmgr's shared hash tables are partitioned to reduce contention. * To determine which partition a given locktag belongs to, compute the tag's @@ -473,6 +473,17 @@ typedef enum (&MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + (i)].lock) /* + * The deadlock detector needs to be able to access lockGroupLeader and + * related fields in the PGPROC, so we arrange for those fields to be protected + * by one of the lock hash partition locks. Since the deadlock detector + * acquires all such locks anyway, this makes it safe for it to access these + * fields without doing anything extra. To avoid contention as much as + * possible, we map different PGPROCs to different partition locks. + */ +#define LockHashPartitionLockByProc(p) \ + LockHashPartitionLock((p)->pgprocno) + +/* * function prototypes */ extern void InitLocks(void); diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 3441288..66ab255 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -155,6 +155,15 @@ struct PGPROC bool fpVXIDLock; /* are we holding a fast-path VXID lock? */ LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID * lock */ + + /* + * Support for lock groups. Use LockHashPartitionLockByProc to get the + * LWLock protecting these fields. + */ + int lockGroupLeaderIdentifier; /* MyProcPid, if I'm a leader */ + PGPROC *lockGroupLeader; /* lock group leader, if I'm a follower */ + dlist_head lockGroupMembers; /* list of members, if I'm a leader */ + dlist_node lockGroupLink; /* my member link, if I'm a member */ }; /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */ @@ -272,4 +281,7 @@ extern void LockErrorCleanup(void); extern void ProcWaitForSignal(void); extern void ProcSendSignal(int pid); +extern void BecomeLockGroupLeader(void); +extern bool BecomeLockGroupMember(PGPROC *leader, int pid); + #endif /* PROC_H */ diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index c9be32e..affcc01 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -290,6 +290,8 @@ extern int32 pg_atoi(const char *s, int size, int c); extern void pg_itoa(int16 i, char *a); extern void pg_ltoa(int32 l, char *a); extern void pg_lltoa(int64 ll, char *a); +extern char *pg_ltostr_zeropad(char *str, int32 value, int32 minwidth); +extern char *pg_ltostr(char *str, int32 value); /* * Per-opclass comparison functions for new btrees. These are diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 64f046e..0ac21bb 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2331,6 +2331,34 @@ select tt1.*, tt2.* from tt2 right join tt1 on tt1.joincol = tt2.joincol; reset enable_hashjoin; reset enable_nestloop; -- +-- regression test for bug #13908 (hash join with skew tuples & nbatch increase) +-- +set work_mem to '64kB'; +set enable_mergejoin to off; +explain (costs off) +select count(*) from tenk1 a, tenk1 b + where a.hundred = b.thousand and (b.fivethous % 10) < 10; + QUERY PLAN +------------------------------------------------------------ + Aggregate + -> Hash Join + Hash Cond: (a.hundred = b.thousand) + -> Index Only Scan using tenk1_hundred on tenk1 a + -> Hash + -> Seq Scan on tenk1 b + Filter: ((fivethous % 10) < 10) +(7 rows) + +select count(*) from tenk1 a, tenk1 b + where a.hundred = b.thousand and (b.fivethous % 10) < 10; + count +-------- + 100000 +(1 row) + +reset work_mem; +reset enable_mergejoin; +-- -- regression test for 8.2 bug with improper re-ordering of left joins -- create temp table tt3(f1 int, f2 text); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 28b061f..2bdba2d 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2846,7 +2846,7 @@ SELECT definition FROM pg_rules WHERE tablename = 'hats' ORDER BY rulename; CREATE RULE hat_nosert AS + ON INSERT TO hats DO INSTEAD INSERT INTO hat_data (hat_name, hat_color) + VALUES (new.hat_name, new.hat_color) ON CONFLICT(hat_name COLLATE "C" bpchar_pattern_ops)+ - WHERE (hat_data.hat_color = 'green'::bpchar) DO NOTHING + + WHERE (hat_color = 'green'::bpchar) DO NOTHING + RETURNING hat_data.hat_name, + hat_data.hat_color; (1 row) @@ -2871,7 +2871,7 @@ SELECT tablename, rulename, definition FROM pg_rules hats | hat_nosert | CREATE RULE hat_nosert AS + | | ON INSERT TO hats DO INSTEAD INSERT INTO hat_data (hat_name, hat_color) + | | VALUES (new.hat_name, new.hat_color) ON CONFLICT(hat_name COLLATE "C" bpchar_pattern_ops)+ - | | WHERE (hat_data.hat_color = 'green'::bpchar) DO NOTHING + + | | WHERE (hat_color = 'green'::bpchar) DO NOTHING + | | RETURNING hat_data.hat_name, + | | hat_data.hat_color; (1 row) diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 0358d00..fafbb3f 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -464,6 +464,22 @@ reset enable_hashjoin; reset enable_nestloop; -- +-- regression test for bug #13908 (hash join with skew tuples & nbatch increase) +-- + +set work_mem to '64kB'; +set enable_mergejoin to off; + +explain (costs off) +select count(*) from tenk1 a, tenk1 b + where a.hundred = b.thousand and (b.fivethous % 10) < 10; +select count(*) from tenk1 a, tenk1 b + where a.hundred = b.thousand and (b.fivethous % 10) < 10; + +reset work_mem; +reset enable_mergejoin; + +-- -- regression test for 8.2 bug with improper re-ordering of left joins --