Bug in 8.0.0rc3 query planner: constant column in view changes execution plan

Started by Jörg Hoppeover 21 years ago2 messagesbugs
Jump to latest
#1Jörg Hoppe
hoppe@geoinformationsdienst.de
--- This script will demonstrate a bug-like misbehaviour found in the query planner
--- of postgresql 8.0.0.rc3 server.
--- Run it as an user with administrative rights with psql
--- query planner behaviour as demonstrated:
--------------------------------------
--- Adding a constant expression column to a view definition leads to different
--- (and very bad) execution plans.
--- The constant expression column can be reduced to "1 AS constval".
---
--- querey planner behaviour as expected:
---------------------------------------------
--- SELECTing expressions, which do not access any table data,
--- should not influence the execution plan.
---
--- Platform:
-------------
--- * postgresql 8.0.0.rc3 server
--- * compiled with #define FUNC_MAX_ARGS = INDEX_MAX_KEYS = 250
---   in   .\src\include\pg_config_manual.h
--- * System: Windows 2000, Service Pack 4
---   AMD Athlon XP 2600+, 1.5 GB RAM.
--- * file "postgresql.conf" was not modified.
---
--- What this script executes:
-------------------------------
--- * three tables t_a, t_b and t_c are defined and populated with data (total ca. 5.000.000  records,
---   this may take half an hour on Athlon 2500.
--- * the tables are linked with each other via CONSTRAINT..REFERENCES,
---   indexes on foreign keys are generated.
--- * three very similar views v_test_good, v_test_strange and v_test_bad are defined,
---   which join all three tables.
--- * an identical SELECT is executed on each view, it uses LEFT JOIN .
---   SELECTing the view which contains column "1 AS constval" runs forever.
---   SELECTing the view which contains column "table.col/table.col AS constval" runs fine.
---
--- feed back email:
-------------------
--- hoppe@geoinformationsdienst.de

----------------------- BEGIN OF SQL CODE --------------------------------------
-- DELETE objects from previous test run
drop view v_test_good cascade ;
drop view v_test_strange cascade ;
drop view v_test_bad cascade ;

drop table t_c cascade ;
drop table t_b cascade ;
drop table t_a cascade ;

-- create tables
-- t_a is master, t_b is detail of t_a , t_c is detail of t_a

-- dummy columns col1..col6 will be filled with dummy data,
-- this is needed to reproduce the error!
CREATE TABLE t_a (a_id integer, info varchar,
col1 varchar,
col2 varchar,
col3 varchar,
col4 varchar,
col5 varchar,
col6 varchar,
CONSTRAINT pk_a PRIMARY KEY (a_id)
) ;

CREATE TABLE t_b (b_id integer, a_id integer, info varchar,
col1 varchar,
col2 varchar,
col3 varchar,
col4 varchar,
col5 varchar,
col6 varchar,
CONSTRAINT pk_b PRIMARY KEY (b_id)
) ;

CREATE TABLE t_c (c_id integer, a_id integer , info varchar,
col1 varchar,
col2 varchar,
col3 varchar,
col4 varchar,
col5 varchar,
col6 varchar,
CONSTRAINT pk_c PRIMARY KEY (c_id)
) ;

-- function to append detail data to a master table.
-- fills a master, if master_table IS NULL.
-- detail data is generated for master records with pk BETWWEN min_master_pk_val AND max_master_pk_val
CREATE OR REPLACE FUNCTION generate_detail_data(varchar, varchar,varchar,varchar,varchar,integer,integer,integer)
RETURNS varchar AS
$BODY$
BEGIN
DECLARE
master_table ALIAS FOR $1 ;
detail_table ALIAS FOR $2 ;
master_pk ALIAS FOR $3 ;
detail_pk ALIAS FOR $4 ;
detail_fk ALIAS FOR $5 ;
min_master_pk_val ALIAS FOR $6 ;
max_master_pk_val ALIAS FOR $7 ;
n ALIAS FOR $8 ;

maxrecord RECORD ;
masterrecord RECORD ;
v_detail_pk integer ;
v_detail_fk integer ;
i integer ;
s VARCHAR ;
BEGIN
FOR maxrecord IN EXECUTE 'SELECT MAX(' || detail_pk ||') AS pk FROM ' || detail_table LOOP
v_detail_pk := maxrecord.pk ; -- just 1 row!
END LOOP ;

IF v_detail_pk IS NULL THEN
v_detail_pk := 0 ; -- no records yet
END IF ;

if (master_table IS NULL) OR (master_pk IS NULL) THEN
-- Detail has no master
FOR i IN 1 .. n LOOP
v_detail_pk := v_detail_pk + 1 ;
-- some data ....
s := 'INSERTED detail #' || i
|| ' with id = '|| to_char(v_detail_pk)
|| ' at ' || TO_CHAR(current_timestamp) ;
EXECUTE ' INSERT INTO '
|| detail_table
|| '(' || detail_pk || ', info,col1,col2,col3,col4,col5,col6) '
|| ' values(' || v_detail_pk
|| ', ''' || s || ''''
|| ', ''' || s || ''''
|| ', ''' || s || ''''
|| ', ''' || s || ''''
|| ', ''' || s || ''''
|| ', ''' || s || ''''
|| ', ''' || s || ''''
|| ') ' ;
END LOOP ;
ELSE
-- link detail records with master
FOR masterrecord IN EXECUTE 'SELECT ' || master_pk ||' AS pk FROM ' || master_table
|| ' WHERE ' || master_pk || ' BETWEEN ' || min_master_pk_val || ' AND ' || max_master_pk_val
LOOP
v_detail_fk := masterrecord.pk ;
FOR i IN 1 .. n LOOP
v_detail_pk := v_detail_pk + 1 ;
-- some data ....
s := 'INSERTED detail #' || i
|| ' for master ' || masterrecord.pk
|| ' with id = ' || v_detail_pk
|| ' at ' || TO_CHAR(current_timestamp) ;
EXECUTE ' INSERT INTO '
|| detail_table
|| '(' || detail_pk || ',' || detail_fk || ', info,col1,col2,col3,col4,col5,col6) '
|| ' values(' || v_detail_pk || ',' || v_detail_fk
|| ', ''' || s || ''''
|| ', ''' || s || ''''
|| ', ''' || s || ''''
|| ', ''' || s || ''''
|| ', ''' || s || ''''
|| ', ''' || s || ''''
|| ', ''' || s || ''''
|| ') ' ;

END LOOP ;
END LOOP ;
END IF ;

return v_detail_pk ;
END ;
END ;
$BODY$
LANGUAGE 'plpgsql' ;

-- Fill data into structure

-- a = master = objekte
select generate_detail_data(null,'t_a', null, 'a_id', null, null, null, 2400000) ;
-- select count(*) from t_a ;

-- b = detail = raumelemente
select generate_detail_data('t_a','t_b', 'a_id', 'b_id', 'a_id', 0,1700000, 1) ;
-- select count(*) from t_b ;

-- c = detail = flurstuecke
select generate_detail_data('t_a','t_c', 'a_id', 'c_id', 'a_id', 0,1500000, 1) ;
-- select count(*) from t_c ;

-- now there exist 1.5000.000 entries in t_c, who have also corresponding entries in t_b

--- now set fk-constraints (and indexes) ;
ALTER TABLE t_b ADD CONSTRAINT fk_b_1 FOREIGN KEY (a_id) REFERENCES t_a (a_id) ON UPDATE NO ACTION ON DELETE NO ACTION DEFERRABLE 
INITIALLY IMMEDIATE ;
ALTER TABLE t_c ADD CONSTRAINT fk_c_1 FOREIGN KEY (a_id) REFERENCES t_a (a_id) ON UPDATE NO ACTION ON DELETE NO ACTION DEFERRABLE 
INITIALLY IMMEDIATE;

CREATE INDEX idx_b_fk1 ON t_b(a_id) ;
CREATE INDEX idx_c_fk1 ON t_c(a_id) ;

VACUUM FULL ANALYZE t_a ;
VACUUM FULL ANALYZE t_b ;
VACUUM FULL ANALYZE t_c ;

-- this view contains a constant column,
-- this leads to bad execution plans
CREATE OR REPLACE VIEW v_test_bad AS SELECT
t_a.a_id,
t_b.b_id,
t_c.c_id,
1 AS constcol
FROM t_a, t_b, t_c
WHERE t_a.a_id = t_b.a_id
AND t_a.a_id = t_c.a_id ;

-- this view contains no constant columns, ... execution plan is good
CREATE OR REPLACE VIEW v_test_good AS SELECT
t_a.a_id,
t_b.b_id,
t_c.c_id
FROM t_a, t_b, t_c
WHERE t_a.a_id = t_b.a_id
AND t_a.a_id = t_c.a_id ;

-- this view contains a expression columns, which yield also a cosntant "1",
-- ... execution plan is good!?!
CREATE OR REPLACE VIEW v_test_strange AS SELECT
t_a.a_id,
t_b.b_id,
t_c.c_id,
t_a.a_id / t_a.a_id AS constcol
FROM t_a, t_b, t_c
WHERE t_a.a_id = t_b.a_id
AND t_a.a_id = t_c.a_id ;

-- Now test the 3 views with code from a real application ...

-- This one runs well
SELECT R.a_ID, R.b_ID
FROM t_b R LEFT JOIN v_test_good V on R.a_id = V.a_id
WHERE r.b_id between 900000 and 900999
AND v.a_id = v.a_id -- this cheat is necessary to make it runnable ...
;

-- This one runs well, too
SELECT R.a_ID, R.b_ID, v.constcol
FROM t_b R LEFT JOIN v_test_strange V on R.a_id = V.a_id
WHERE r.b_id between 900000 and 900999
AND v.a_id = v.a_id -- this cheat is necessary to make it runnable ...
;

-- This one runs forever ...
SELECT R.a_ID, R.b_ID, v.constcol
FROM t_b R LEFT JOIN v_test_bad V on R.a_id = V.a_id
WHERE r.b_id between 900000 and 900999
AND v.a_id = v.a_id
;

#2Tom Lane
tgl@sss.pgh.pa.us
In reply to: Jörg Hoppe (#1)
Re: Bug in 8.0.0rc3 query planner: constant column in view changes execution plan

=?ISO-8859-15?Q?J=F6rg_Hoppe?= <hoppe@geoinformationsdienst.de> writes:

--- SELECTing expressions, which do not access any table data,
--- should not influence the execution plan.

Unfortunately, that assertion is dead wrong.

SELECT R.a_ID, R.b_ID, v.constcol
FROM t_b R LEFT JOIN v_test_bad V on R.a_id = V.a_id
WHERE r.b_id between 900000 and 900999
AND v.a_id = v.a_id
;

The reason this behaves differently from the others is that a
constant-one column from v_test_bad won't automatically go to NULL
when the underlying table row is expanded to NULLs by the left join.
That prevents flattening of the view. See has_nullable_targetlist()
in prepjointree.c.

has_nullable_targetlist could be smarter than it is, but no improvement
in its intelligence would change the behavior in the case you give.
The only way this could be made to work is a fairly fundamental change
in the handling of variables in an execution tree, such that expressions
emitted by a view get evaluated below the point of the outer join rather
than above it. I've looked at this a bit and concluded that it probably
would not be a win overall ... indeed, it arguably might cause runtime
failures that do not occur now (eg, division by zero in a row that would
never have been evaluated otherwise).

regards, tom lane