BUG #14398: Order of Joins results in different results returned
The following bug has been logged on the website:
Bug reference: 14398
Logged by: Vijay Chemburkar
Email address: vjchem@gmail.com
PostgreSQL version: 9.5.2
Operating system: OS X 10.11.6
Description:
This appears to ultimately be a casting issue, but it was surprising to us
and thus I thought I'd file a bug report. I have a query that has two JOINs,
where each JOIN uses parameters $1 and $2. The order of the JOIN changes the
results returned. I can get the results I expect by explicitly casting $1
and $2 or by changing the order of the JOINs. Is this expected behavior? It
feels like I should get an error if the casting is ambiguous or that the
order of the JOINs should not change the results.
Here's a simplified version that repros this. get_articles_2 returns the
results I'd expect. I can make get_articles_1 return the same results if I
cast the parameters as dates or make them dates when I call execute.
CREATE TABLE article(
id SERIAL NOT NULL UNIQUE,
PRIMARY KEY (id)
);
CREATE TABLE article_metric (
id SERIAL NOT NULL UNIQUE,
article_id INTEGER NOT NULL,
data_date DATE NOT NULL,
metric_name CHARACTER VARYING(100) NOT NULL,
value JSONB NOT NULL,
PRIMARY KEY (id),
CONSTRAINT article_metric_key UNIQUE (article_id, data_date,
metric_name),
CONSTRAINT article_metric_fkey FOREIGN KEY (article_id)
REFERENCES article(id) ON UPDATE CASCADE ON DELETE CASCADE
);
INSERT INTO article(id) VALUES(1), (2);
INSERT INTO article_metric(article_id, data_date, metric_name, value)
VALUES
(1,'2016-09-24','all_view_durations_average','{"value": 1000}'),
(1,'2016-10-01','all_view_durations_average','{"value": 37}'),
(1,'2016-10-08','all_view_durations_average','{"value": 17}');
INSERT INTO article_metric(article_id, data_date, metric_name, value)
VALUES
(1,'2016-10-1','all_views', '{"value":1}'),(1,'2016-10-2','all_views',
'{"value":2}'),(1,'2016-10-3','all_views',
'{"value":3}'),(1,'2016-10-4','all_views',
'{"value":4}'),(1,'2016-10-5','all_views',
'{"value":5}'),(1,'2016-10-6','all_views',
'{"value":6}'),(1,'2016-10-7','all_views',
'{"value":7}'),(1,'2016-10-8','all_views',
'{"value":8}'),(1,'2016-10-9','all_views',
'{"value":9}'),(1,'2016-10-10','all_views',
'{"value":10}'),(1,'2016-10-11','all_views',
'{"value":11}'),(1,'2016-10-12','all_views',
'{"value":12}'),(1,'2016-10-13','all_views',
'{"value":13}'),(1,'2016-10-14','all_views',
'{"value":14}'),(1,'2016-10-15','all_views',
'{"value":15}'),(1,'2016-10-16','all_views',
'{"value":16}'),(1,'2016-10-17','all_views',
'{"value":17}'),(1,'2016-10-18','all_views',
'{"value":18}'),(1,'2016-10-19','all_views', '{"value":19}');
INSERT INTO article_metric(article_id, data_date, metric_name, value)
VALUES
(2,'2016-10-03','all_view_durations_average','{"value": 1000}'),
(2,'2016-10-10','all_view_durations_average','{"value": 119}');
INSERT INTO article_metric(article_id, data_date, metric_name, value)
VALUES
(2,'2016-10-5','all_views','{"value":4}'),(2,'2016-10-6','all_views','{"value":5}'),(2,'2016-10-7','all_views','{"value":6}'),(2,'2016-10-8','all_views','{"value":7}'),(2,'2016-10-9','all_views','{"value":8}'),(2,'2016-10-10','all_views','{"value":9}'),(2,'2016-10-11','all_views','{"value":10}'),(2,'2016-10-12','all_views','{"value":11}'),(2,'2016-10-13','all_views','{"value":12}'),(2,'2016-10-14','all_views','{"value":13}'),(2,'2016-10-15','all_views','{"value":14}'),(2,'2016-10-16','all_views','{"value":15}'),(2,'2016-10-17','all_views','{"value":16}'),(2,'2016-10-18','all_views','{"value":17}'),(2,'2016-10-19','all_views','{"value":18}'),(2,'2016-10-20','all_views','{"value":19}');
PREPARE get_articles_1 AS
SELECT
fia.id,
all_views.total AS views_in_period
FROM
article fia
JOIN (
SELECT
daily_views.article_id
FROM (SELECT
mp.date AS generated_date
FROM generate_series($1, $2, '1 day'::interval) mp)
dates
JOIN article_metric daily_views
ON daily_views.data_date = dates.generated_date
WHERE
daily_views.metric_name = 'all_views'
GROUP BY daily_views.article_id)
average_parts
ON average_parts.article_id = fia.id
JOIN
(SELECT
article_id,
SUM((value->>'value')::INT) AS total
FROM
article_metric
WHERE metric_name = 'all_views'
AND data_date BETWEEN $1 AND $2
GROUP BY article_id) all_views
ON all_views.article_id = fia.id;
PREPARE get_articles_2 AS
SELECT
fia.id,
all_views.total AS views_in_period
FROM
article fia
JOIN
(SELECT
article_id,
SUM((value->>'value')::INT) AS total
FROM
article_metric
WHERE metric_name = 'all_views'
AND data_date BETWEEN $1 AND $2
GROUP BY article_id) all_views
ON all_views.article_id = fia.id
JOIN (
SELECT
daily_views.article_id
FROM (SELECT
mp.date AS generated_date
FROM generate_series($1, $2, '1 day'::interval) mp)
dates
JOIN article_metric daily_views
ON daily_views.data_date = dates.generated_date
WHERE
daily_views.metric_name = 'all_views'
GROUP BY daily_views.article_id)
average_parts
ON average_parts.article_id = fia.id;
EXECUTE get_articles_1('2016-09-28T00:00:00.000+00:00',
'2016-10-16T00:00:00.000+00:00');
EXECUTE get_articles_2('2016-09-28T00:00:00.000+00:00',
'2016-10-16T00:00:00.000+00:00');
--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs
vjchem@gmail.com writes:
This appears to ultimately be a casting issue, but it was surprising to us
and thus I thought I'd file a bug report. I have a query that has two JOINs,
where each JOIN uses parameters $1 and $2. The order of the JOIN changes the
results returned. I can get the results I expect by explicitly casting $1
and $2 or by changing the order of the JOINs. Is this expected behavior? It
feels like I should get an error if the casting is ambiguous or that the
order of the JOINs should not change the results.
I think the point is that you've got two separate uses of $1 and $2
in contexts that will lead to different conclusions about what their
types are (date or timestamptz, respectively). Whichever one the
parser comes to first will determine its choice, and the other context
is not so incompatible as to result in an error; nor is the provided
input string. But you'll get different results depending on that
choice.
Yes, it's ambiguous, but throwing an error would probably not make
more people happy than it makes unhappy.
regards, tom lane
--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs