BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

Started by PG Bug reporting formover 5 years ago28 messageshackersbugs
Jump to latest
#1PG Bug reporting form
noreply@postgresql.org
hackersbugs

The following bug has been logged on the website:

Bug reference: 16583
Logged by: Jiří Fejfar
Email address: jurafejfar@gmail.com
PostgreSQL version: 12.4
Operating system: debian 10.5
Description:

Joining two identical tables placed on separate DBs with different collation
accessed through postgres_fdw failed when joined with merge join. Some
records are missing (7 vs. 16 rows in example) in output. See this snippet
https://gitlab.com/-/snippets/2004522 (or code pasted below) for psql script
reproducing error also with expected output (working fine on alpine linux).
The same behavior is also observed on postgres v13.

Regards, Jiří Fejfar.

--------------------------------system---------------------
debian
cat /etc/debian_version
10.5

ldd --version
ldd (Debian GLIBC 2.28-10) 2.28
Copyright © 2018 Free Software Foundation, Inc.

--------
alpine
cat /etc/alpine-release
3.12.0

ldd --version
musl libc (x86_64)
Version 1.1.24
Dynamic Program Loader
Usage: /lib/ld-musl-x86_64.so.1 [options] [--] pathname

------------------------psql script--------------------
DROP DATABASE IF EXISTS db_en; DROP DATABASE IF EXISTS db_cz; DROP DATABASE
IF EXISTS db_join;
DROP USER IF EXISTS fdw_user_en; DROP USER IF EXISTS fdw_user_cz;

CREATE DATABASE db_en encoding UTF8 LC_COLLATE 'en_US.UTF-8' LC_CTYPE
'en_US.UTF-8' TEMPLATE template0;
CREATE DATABASE db_cz encoding UTF8 LC_COLLATE 'cs_CZ.UTF-8' LC_CTYPE
'cs_CZ.UTF-8' TEMPLATE template0;
CREATE DATABASE db_join encoding UTF8 LC_COLLATE 'en_US.UTF-8' LC_CTYPE
'en_US.UTF-8' TEMPLATE template0;

\c db_en

CREATE TABLE t_nuts (
id INT PRIMARY KEY,
label text
);

WITH w_labels AS (
VALUES ('CZ0100'), ('CZ0201'), ('CZ0202'), ('CZ0203'), ('CZ0204'),
('CZ0205'),
('CZ0206'), ('CZ0207'), ('CZ0208'), ('CZ0209'), ('CZ020A'), ('CZ020B'),
('CZ020C'),
('CZ0311'), ('CZ0312'), ('CZ0313')
)
INSERT INTO t_nuts (id, label)
SELECT
row_number() OVER() AS id,
w_labels.column1 as label FROM w_labels--, generate_series(1, 500)
;

VACUUM (FULL, ANALYZE) t_nuts;

SELECT label, count(*) from t_nuts GROUP BY label ORDER BY label;

\c db_cz

CREATE TABLE t_nuts (
id INT PRIMARY KEY,
label text
);

WITH w_labels AS (
VALUES ('CZ0100'), ('CZ0201'), ('CZ0202'), ('CZ0203'), ('CZ0204'),
('CZ0205'),
('CZ0206'), ('CZ0207'), ('CZ0208'), ('CZ0209'), ('CZ020A'), ('CZ020B'),
('CZ020C'),
('CZ0311'), ('CZ0312'), ('CZ0313')
)
INSERT INTO t_nuts (id, label)
SELECT
row_number() OVER() AS id,
w_labels.column1 as label FROM w_labels--, generate_series(1, 1000)
;

VACUUM (FULL, ANALYZE) t_nuts;

SELECT label, count(*) from t_nuts GROUP BY label ORDER BY label;

\c db_en
CREATE USER fdw_user_en WITH PASSWORD 'fdw_pass_en';
GRANT SELECT ON TABLE t_nuts TO fdw_user_en;

\c db_join

CREATE EXTENSION postgres_fdw ;
CREATE SERVER db_en_serv FOREIGN DATA WRAPPER postgres_fdw OPTIONS ( host
'localhost', port '5432', dbname 'db_en', use_remote_estimate 'True');
CREATE USER MAPPING FOR CURRENT_USER SERVER db_en_serv OPTIONS ( user
'fdw_user_en', password 'fdw_pass_en');
CREATE SCHEMA en;
IMPORT FOREIGN SCHEMA public LIMIT TO (t_nuts) FROM SERVER db_en_serv INTO
en;

SELECT label, count(*) FROM en.t_nuts GROUP BY label ORDER BY label;

\c db_cz
CREATE USER fdw_user_cz WITH PASSWORD 'fdw_pass_cz';
GRANT SELECT ON TABLE t_nuts TO fdw_user_cz;

\c db_join

CREATE SERVER db_cz_serv FOREIGN DATA WRAPPER postgres_fdw OPTIONS ( host
'localhost', port '5432', dbname 'db_cz', use_remote_estimate 'True');
CREATE USER MAPPING FOR CURRENT_USER SERVER db_cz_serv OPTIONS ( user
'fdw_user_cz', password 'fdw_pass_cz');
CREATE SCHEMA cz;
IMPORT FOREIGN SCHEMA public LIMIT TO (t_nuts) FROM SERVER db_cz_serv INTO
cz;

SELECT label, count(*) FROM cz.t_nuts GROUP BY label ORDER BY label;

EXPLAIN (VERBOSE)
SELECT cz__t_nuts.label, count(*)
FROM cz.t_nuts AS cz__t_nuts
INNER JOIN en.t_nuts AS en__t_nuts ON (cz__t_nuts.label =
en__t_nuts.label)
GROUP BY cz__t_nuts.label;

SELECT cz__t_nuts.label, count(*)
FROM cz.t_nuts AS cz__t_nuts
INNER JOIN en.t_nuts AS en__t_nuts ON (cz__t_nuts.label =
en__t_nuts.label)
GROUP BY cz__t_nuts.label;

select version();

------------------------wrong output (Debian, GLIBC 2.28)----
DROP DATABASE
DROP DATABASE
DROP DATABASE
DROP ROLE
DROP ROLE
CREATE DATABASE
CREATE DATABASE
CREATE DATABASE
Nyní jste připojeni k databázi "db_en" jako uživatel "postgres".
CREATE TABLE
INSERT 0 16
VACUUM
label | count
--------+-------
CZ0100 | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 řádek)

Nyní jste připojeni k databázi "db_cz" jako uživatel "postgres".
CREATE TABLE
INSERT 0 16
VACUUM
label | count
--------+-------
CZ0100 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 řádek)

Nyní jste připojeni k databázi "db_en" jako uživatel "postgres".
CREATE ROLE
GRANT
Nyní jste připojeni k databázi "db_join" jako uživatel "postgres".
CREATE EXTENSION
CREATE SERVER
CREATE USER MAPPING
CREATE SCHEMA
IMPORT FOREIGN SCHEMA
label | count
--------+-------
CZ0100 | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 řádek)

Nyní jste připojeni k databázi "db_cz" jako uživatel "postgres".
CREATE ROLE
GRANT
Nyní jste připojeni k databázi "db_join" jako uživatel "postgres".
CREATE SERVER
CREATE USER MAPPING
CREATE SCHEMA
IMPORT FOREIGN SCHEMA
label | count
--------+-------
CZ0100 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 řádek)

QUERY PLAN

-----------------------------------------------------------------------------------------------
GroupAggregate (cost=203.28..204.16 rows=16 width=40)
Output: cz__t_nuts.label, count(*)
Group Key: cz__t_nuts.label
-> Merge Join (cost=203.28..203.92 rows=16 width=32)
Output: cz__t_nuts.label
Merge Cond: (cz__t_nuts.label = en__t_nuts.label)
-> Foreign Scan on cz.t_nuts cz__t_nuts (cost=101.48..101.84
rows=16 width=7)
Output: cz__t_nuts.id, cz__t_nuts.label
Remote SQL: SELECT label FROM public.t_nuts ORDER BY label
ASC NULLS LAST
-> Sort (cost=101.80..101.84 rows=16 width=7)
Output: en__t_nuts.label
Sort Key: en__t_nuts.label
-> Foreign Scan on en.t_nuts en__t_nuts
(cost=100.00..101.48 rows=16 width=7)
Output: en__t_nuts.label
Remote SQL: SELECT label FROM public.t_nuts
(15 řádek)

label | count
--------+-------
CZ0100 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(7 řádek)

version

------------------------------------------------------------------------------------------------------------------
PostgreSQL 12.4 (Debian 12.4-1.pgdg100+1) on x86_64-pc-linux-gnu, compiled
by gcc (Debian 8.3.0-6) 8.3.0, 64-bit
(1 řádka)

------------------------correct output (Alpine, musl libc)----

DROP DATABASE
DROP DATABASE
DROP DATABASE
DROP ROLE
DROP ROLE
CREATE DATABASE
CREATE DATABASE
CREATE DATABASE
You are now connected to database "db_en" as user "postgres".
CREATE TABLE
INSERT 0 16
VACUUM
label | count
--------+-------
CZ0100 | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 rows)

You are now connected to database "db_cz" as user "postgres".
CREATE TABLE
INSERT 0 16
VACUUM
label | count
--------+-------
CZ0100 | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 rows)

You are now connected to database "db_en" as user "postgres".
CREATE ROLE
GRANT
You are now connected to database "db_join" as user "postgres".
CREATE EXTENSION
CREATE SERVER
CREATE USER MAPPING
CREATE SCHEMA
IMPORT FOREIGN SCHEMA
label | count
--------+-------
CZ0100 | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 rows)

You are now connected to database "db_cz" as user "postgres".
CREATE ROLE
GRANT
You are now connected to database "db_join" as user "postgres".
CREATE SERVER
CREATE USER MAPPING
CREATE SCHEMA
IMPORT FOREIGN SCHEMA
label | count
--------+-------
CZ0100 | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 rows)

QUERY PLAN

-----------------------------------------------------------------------------------------------
GroupAggregate (cost=203.28..204.16 rows=16 width=40)
Output: cz__t_nuts.label, count(*)
Group Key: cz__t_nuts.label
-> Merge Join (cost=203.28..203.92 rows=16 width=32)
Output: cz__t_nuts.label
Merge Cond: (cz__t_nuts.label = en__t_nuts.label)
-> Foreign Scan on cz.t_nuts cz__t_nuts (cost=101.48..101.84
rows=16 width=7)
Output: cz__t_nuts.id, cz__t_nuts.label
Remote SQL: SELECT label FROM public.t_nuts ORDER BY label
ASC NULLS LAST
-> Sort (cost=101.80..101.84 rows=16 width=7)
Output: en__t_nuts.label
Sort Key: en__t_nuts.label
-> Foreign Scan on en.t_nuts en__t_nuts
(cost=100.00..101.48 rows=16 width=7)
Output: en__t_nuts.label
Remote SQL: SELECT label FROM public.t_nuts
(15 rows)

label | count
--------+-------
CZ0100 | 1
CZ0201 | 1
CZ0202 | 1
CZ0203 | 1
CZ0204 | 1
CZ0205 | 1
CZ0206 | 1
CZ0207 | 1
CZ0208 | 1
CZ0209 | 1
CZ020A | 1
CZ020B | 1
CZ020C | 1
CZ0311 | 1
CZ0312 | 1
CZ0313 | 1
(16 rows)

version

---------------------------------------------------------------------------------------
PostgreSQL 12.4 on x86_64-pc-linux-musl, compiled by gcc (Alpine 9.3.0)
9.3.0, 64-bit
(1 row)

#2Tom Lane
tgl@sss.pgh.pa.us
In reply to: PG Bug reporting form (#1)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

PG Bug reporting form <noreply@postgresql.org> writes:

Joining two identical tables placed on separate DBs with different collation
accessed through postgres_fdw failed when joined with merge join. Some
records are missing (7 vs. 16 rows in example) in output. See this snippet
https://gitlab.com/-/snippets/2004522 (or code pasted below) for psql script
reproducing error also with expected output (working fine on alpine linux).

So I think what is happening here is that postgres_fdw's version of
IMPORT FOREIGN SCHEMA translates "COLLATE default" on the remote
server to "COLLATE default" on the local one, which of course is
a big fail if the defaults don't match. That allows the local
planner to believe that remote ORDER BYs on the two foreign tables
will give compatible results, causing the merge join to not work
very well at all.

We probably need to figure out some way of substituting the remote
database's actual lc_collate setting when we see "COLLATE default".

I'm also thinking that the documentation is way too cavalier about
dismissing non-matching collation names by just saying that you
can turn off import_collate. The fact is that doing so is likely
to be disastrous, the more so the more optimization intelligence
we add to postgres_fdw.

I wonder if we could do something like this:

* Change postgresImportForeignSchema() as above, so that it will never
apply "COLLATE default" to an imported column, except in the case
where you turn off import_collate.

* In postgres_fdw planning, treat "COLLATE default" on a foreign table
column as meaning "we don't know the collation"; never believe that
that column can be ordered in a way that matches any local collation.
(It'd be better perhaps if there were an explicit way to say "COLLATE
unknown", but I hesitate to invent such a concept in general.)

* Document that in manual creation of a postgres_fdw foreign table
with a text column, you need to explicitly write the correct collation
if you want the best query plans to be generated.

This seems like too big a behavioral change to consider back-patching,
unfortunately.

regards, tom lane

#3Tom Lane
tgl@sss.pgh.pa.us
In reply to: Tom Lane (#2)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

I wrote:

So I think what is happening here is that postgres_fdw's version of
IMPORT FOREIGN SCHEMA translates "COLLATE default" on the remote
server to "COLLATE default" on the local one, which of course is
a big fail if the defaults don't match. That allows the local
planner to believe that remote ORDER BYs on the two foreign tables
will give compatible results, causing the merge join to not work
very well at all.

We probably need to figure out some way of substituting the remote
database's actual lc_collate setting when we see "COLLATE default".

Here's a draft patch for that part. There's a few things to quibble
about:

* It tests for "COLLATE default" by checking whether pg_collation.oid
is DEFAULT_COLLATION_OID, thus assuming that that OID will never change.
I think this is safer than checking the collation name, but maybe
somebody else would have a different opinion? Another idea is to check
whether collprovider is 'd', but that only works with v10 and up.

* It might not be able to find a remote collation matching the database's
datcollate/datctype. As coded, we'll end up creating the local column
with "COLLATE default", putting us back in the same hurt we're in now.
I think this is okay given the other planned change to interpret "COLLATE
default" as "we don't know what collation this is". In any case it's hard
to see what else we could do, other than fail entirely.

* Alternatively, it might find more than one such remote collation;
indeed that's the norm, eg we'd typically find both "en_US" and
"en_US.utf8", or the like. I made it choose the shortest collation
name in such cases, but maybe there is a case for the longest?
I don't much want it to pick "ucs_basic" over "C", though.

* The whole thing is certain to fall over whenever we find a way to
allow ICU collations as database defaults. While we can presumably
fix the query when we make that change, existing postgres_fdw releases
would not work against a newer server. Probably there's little to be
done about this, either.

* As shown by the expected-output changes, there are some test cases
that expose that we're not picking the default collation anymore.
That creates a testing problem: this can't be committed as-is because
it'll fail with any other locale environment than what the expected
file was made with. We could lobotomize the test cases to not print
the column collation, but then we're not really verifying that this
code does what it's supposed to. Not sure what the best compromise is.

Comments?

regards, tom lane

Attachments:

dont-import-collations-as-default-1.patchtext/x-diff; charset=us-ascii; name=dont-import-collations-as-default-1.patchDownload+51-40
#4Tom Lane
tgl@sss.pgh.pa.us
In reply to: Tom Lane (#3)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

I wrote:

So I think what is happening here is that postgres_fdw's version of
IMPORT FOREIGN SCHEMA translates "COLLATE default" on the remote
server to "COLLATE default" on the local one, which of course is
a big fail if the defaults don't match. That allows the local
planner to believe that remote ORDER BYs on the two foreign tables
will give compatible results, causing the merge join to not work
very well at all.

Here's a full patch addressing this issue. I decided that the best
way to address the test-instability problem is to explicitly give
collations to all the foreign-table columns for which it matters
in the postgres_fdw test. (For portability's sake, that has to be
"C" or "POSIX"; I mostly used "C".) Aside from ensuring that the
test still passes with some other prevailing locale, this seems like
a good idea since we'll then be testing the case we are encouraging
users to use.

And indeed, it immediately turned up a new problem: if we explicitly
assign a collation to a foreign-table column c, the system won't
ship WHERE clauses as simple as "c = 'foo'" to the remote. This
surprised me, but the reason turned out to be that what postgres_fdw
is actually seeing is something like

{OPEXPR
:opno 98
:opfuncid 67
:opresulttype 16
:opretset false
:opcollid 0
:inputcollid 950
:args (
{VAR
:varno 6
:varattno 4
:vartype 25
:vartypmod -1
:varcollid 950
:varlevelsup 0
:varnosyn 6
:varattnosyn 4
:location 171
}
{RELABELTYPE
:arg
{CONST
:consttype 25
:consttypmod -1
:constcollid 100
:constlen -1
:constbyval false
:constisnull false
:location 341
:constvalue 9 [ 36 0 0 0 48 48 48 48 49 ]
}
:resulttype 25
:resulttypmod -1
:resultcollid 950
:relabelformat 2
:location -1
}
)
:location -1
}

that is, the constant is being explicitly relabeled with the correct
collation, and thus is_foreign_expr() thinks the collation shown by
the RelabelType node is an unsafely-introduced collation.

What I did about this was to change the recursion rule in
foreign_expr_walker() so that merging a safely-derived collation with
the same collation unsafely derived is considered safe. I think this
is all right, and it allows us to accept some cases that previously
were rejected as unsafe. But I might be missing something.

(BTW, there's an independent bug here, which is that we're getting
a tree of the above shape rather than a simple Const with the
appropriate collation; that is, this tree isn't fully const-folded.
This is a bug in canonicalize_ec_expression, which I'll go fix
separately. But it won't affect the problem at hand.)

This seems like a sufficiently large change in postgres_fdw's
behavior to require review, so I'll go add this to the next CF.

regards, tom lane

Attachments:

fix-postgres-fdw-collation-handling-1.patchtext/x-diff; charset=us-ascii; name=fix-postgres-fdw-collation-handling-1.patchDownload+205-146
#5Jiří Fejfar
jurafejfar@gmail.com
In reply to: Tom Lane (#2)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

On 17.08.2020 17:26, Tom Lane wrote:

PG Bug reporting form <noreply@postgresql.org> writes:

Joining two identical tables placed on separate DBs with different collation
accessed through postgres_fdw failed when joined with merge join. Some
records are missing (7 vs. 16 rows in example) in output. See this snippet
https://gitlab.com/-/snippets/2004522 (or code pasted below) for psql script
reproducing error also with expected output (working fine on alpine linux).

So I think what is happening here is that postgres_fdw's version of
IMPORT FOREIGN SCHEMA translates "COLLATE default" on the remote
server to "COLLATE default" on the local one, which of course is
a big fail if the defaults don't match. That allows the local
planner to believe that remote ORDER BYs on the two foreign tables
will give compatible results, causing the merge join to not work
very well at all.

I am just wondering: if it is bug in IMPORT FOREIGN SCHEMA, how it is
possible the bug is not present [1]https://gitlab.com/-/snippets/2004522#note_396751634 when provided psql script [2]https://gitlab.com/-/snippets/2004522 is run
on Alpine Linux? I suppose, both Debian and Alpine has the same IMPORT
FOREIGN SCHEMA behavior (both has PG12.4). But differs in glibc vs. musl
libc. Is it possible, there is also something differing in those
libraries with respect to cs.CZ-UTF8?

Best regards, Jiří.

[1]: https://gitlab.com/-/snippets/2004522#note_396751634

[2]: https://gitlab.com/-/snippets/2004522

#6Tom Lane
tgl@sss.pgh.pa.us
In reply to: Jiří Fejfar (#5)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

=?UTF-8?B?SmnFmcOtIEZlamZhcg==?= <jurafejfar@gmail.com> writes:

I am just wondering: if it is bug in IMPORT FOREIGN SCHEMA, how it is
possible the bug is not present [1] when provided psql script [2] is run
on Alpine Linux?

[ shrug ] Could easy be that Alpine distributes dumbed-down locale
definitions in which the sort order isn't actually any different
between those two locales. Did you check what the sort order of
your test data looks like in each case?

regards, tom lane

#7Jiří Fejfar
jurafejfar@gmail.com
In reply to: Tom Lane (#6)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

On Wed, 19 Aug 2020 at 07:53, Tom Lane <tgl@sss.pgh.pa.us> wrote:

=?UTF-8?B?SmnFmcOtIEZlamZhcg==?= <jurafejfar@gmail.com> writes:

I am just wondering: if it is bug in IMPORT FOREIGN SCHEMA, how it is
possible the bug is not present [1] when provided psql script [2] is run
on Alpine Linux?

[ shrug ] Could easy be that Alpine distributes dumbed-down locale
definitions in which the sort order isn't actually any different
between those two locales. Did you check what the sort order of
your test data looks like in each case?

regards, tom lane

Oh, I can see on Alpine that even local tables are ordered like with
en.US-UTF8 even if DB has default cs.CZ-UTF8.

postgres=# \l
List of databases
Name | Owner | Encoding | Collate | Ctype |
Access privileges
-----------+----------+----------+-------------+-------------+-----------------------
db_cz | postgres | UTF8 | cs_CZ.UTF-8 | cs_CZ.UTF-8 |
db_en | postgres | UTF8 | en_US.UTF-8 | en_US.UTF-8 |
...
postgres=# \c db_cz ;
You are now connected to database "db_cz" as user "postgres".
db_cz=# select * from t_nuts order by label;
id | label
----+--------
1 | CZ0100
2 | CZ0201
...

11 | CZ020A
12 | CZ020B
13 | CZ020C

...

It is mentioned in Alpine docker docs [1]https://hub.docker.com/_/postgres that "Alpine-based variants
do not support locales;".

Thanks, J.

[1]: https://hub.docker.com/_/postgres

#8Peter Eisentraut
peter_e@gmx.net
In reply to: Tom Lane (#4)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

On 2020-08-18 22:09, Tom Lane wrote:

Here's a full patch addressing this issue. I decided that the best
way to address the test-instability problem is to explicitly give
collations to all the foreign-table columns for which it matters
in the postgres_fdw test. (For portability's sake, that has to be
"C" or "POSIX"; I mostly used "C".) Aside from ensuring that the
test still passes with some other prevailing locale, this seems like
a good idea since we'll then be testing the case we are encouraging
users to use.

I have studied this patch and this functionality. I don't think
collation differences between remote and local instances are handled
sufficiently. This bug report and patch addresses one particular case,
where the database-wide collation of the remote and local instance are
different. But it doesn't handle cases like the same collation name
doing different things, having different versions, or different
attributes. This probably works currently because the libc collations
don't have much functionality like that, but there is a variety of work
conceived (or, in the case of version tracking, already done since the
bug was first discussed) that would break that.

Taking a step back, I think there are only two ways this could really
work: Either, the admin makes a promise that all the collations match on
all the instances; then the planner can take advantage of that. Or,
there is no such promise, and then the planner can't. I don't
understand what the currently implemented approach is. It appears to be
something in the middle, where certain representations are made that
certain things might match, and then there is some nontrivial code that
analyzes expressions whether they conform to those rules. As you said,
the description of the import_collate option is kind of hand-wavy about
all this.

--
Peter Eisentraut
2ndQuadrant, an EDB company
https://www.2ndquadrant.com/

#9Tom Lane
tgl@sss.pgh.pa.us
In reply to: Peter Eisentraut (#8)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

Peter Eisentraut <peter.eisentraut@2ndquadrant.com> writes:

I have studied this patch and this functionality. I don't think
collation differences between remote and local instances are handled
sufficiently. This bug report and patch addresses one particular case,
where the database-wide collation of the remote and local instance are
different. But it doesn't handle cases like the same collation name
doing different things, having different versions, or different
attributes.

Yeah, agreed. I don't think it's practical to have a 100% solution.
I'd make a couple of points:

* The design philosophy of postgres_fdw, to the extent it has one,
is that it's the user's responsibility to make sure that the local
declaration of a foreign table is a faithful model of the actual
remote object. There are certain variances you can get away with,
but in general, if it breaks it's your fault. (Admittedly, if the
local declaration was created via IMPORT FOREIGN SCHEMA, we would
like to be sure that it's right without help. But there's only
so much we can do there. There are already plenty of ways to
fool IMPORT FOREIGN SCHEMA anyway, for example if the same type
name refers to something different on the two systems.)

* Not being able to ship any qual conditions involving collatable
datatypes seems like an absolutely unacceptable outcome. Thus,
I don't buy your alternative of not letting the planner make
any assumptions at all about compatibility of remote collations.

I think that what this patch is basically doing is increasing the
visibility of collation compatibility as something that postgres_fdw
users need to take into account. Sure, it's not a 100% solution,
but it improves the situation, and it seems like we'd have to do
this anyway along the road to any better solution.

If you've got ideas about how to improve things further, by all
means let's discuss that ... but let's not make the perfect be
the enemy of the good.

regards, tom lane

#10Tom Lane
tgl@sss.pgh.pa.us
In reply to: Tom Lane (#9)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

Rebased over b663a4136 --- no substantive changes, just keeping
the cfbot happy.

regards, tom lane

Attachments:

fix-postgres-fdw-collation-handling-2.patchtext/x-diff; charset=us-ascii; name=fix-postgres-fdw-collation-handling-2.patchDownload+205-146
#11Neil Chen
carpenter.nail.cz@gmail.com
In reply to: Tom Lane (#10)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

The following review has been posted through the commitfest application:
make installcheck-world: tested, passed
Implements feature: tested, passed
Spec compliant: not tested
Documentation: tested, passed

Greetings, 
I learned about the patch and read your discussions. I'm not sure why this patch has not been discussed now. In short, I think it's beneficial to submit it as a temporary solution.
Another thing I want to know is whether these codes can be simplified:
-	if (state > outer_cxt->state)
+	if (collation == outer_cxt->collation &&
+		((state == FDW_COLLATE_UNSAFE &&
+		  outer_cxt->state == FDW_COLLATE_SAFE) ||
+		 (state == FDW_COLLATE_SAFE &&
+		  outer_cxt->state == FDW_COLLATE_UNSAFE)))
+	{
+		outer_cxt->state = FDW_COLLATE_SAFE;
+	}
+	else if (state > outer_cxt->state)

If the state is determined by the collation, when the collations are equal, do we just need to judge the state not equal to FDW_COLLATE_NONE?

#12Ibrar Ahmed
ibrar.ahmad@gmail.com
In reply to: Neil Chen (#11)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

On Wed, Mar 3, 2021 at 1:42 PM Neil Chen <carpenter.nail.cz@gmail.com>
wrote:

The following review has been posted through the commitfest application:
make installcheck-world: tested, passed
Implements feature: tested, passed
Spec compliant: not tested
Documentation: tested, passed

Greetings,
I learned about the patch and read your discussions. I'm not sure why this
patch has not been discussed now. In short, I think it's beneficial to
submit it as a temporary solution.
Another thing I want to know is whether these codes can be simplified:
-       if (state > outer_cxt->state)
+       if (collation == outer_cxt->collation &&
+               ((state == FDW_COLLATE_UNSAFE &&
+                 outer_cxt->state == FDW_COLLATE_SAFE) ||
+                (state == FDW_COLLATE_SAFE &&
+                 outer_cxt->state == FDW_COLLATE_UNSAFE)))
+       {
+               outer_cxt->state = FDW_COLLATE_SAFE;
+       }
+       else if (state > outer_cxt->state)

If the state is determined by the collation, when the collations are
equal, do we just need to judge the state not equal to FDW_COLLATE_NONE?

The patch is failing the regression, @Tom Lane <tgl@sss.pgh.pa.us> can you
please take a look at that.

https://cirrus-ci.com/task/4593497492684800

============== running regression test queries ==============
test postgres_fdw ... FAILED 2782 ms
============== shutting down postmaster ==============
======================
1 of 1 tests failed.
======================
The differences that caused some tests to fail can be viewed in the
file "/tmp/cirrus-ci-build/contrib/postgres_fdw/regression.diffs". A copy
of the test summary that you see
above is saved in the file
"/tmp/cirrus-ci-build/contrib/postgres_fdw/regression.out".

--
Ibrar Ahmed

#13Tom Lane
tgl@sss.pgh.pa.us
In reply to: Ibrar Ahmed (#12)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

Ibrar Ahmed <ibrar.ahmad@gmail.com> writes:

The patch is failing the regression, @Tom Lane <tgl@sss.pgh.pa.us> can you
please take a look at that.

Seems to just need an update of the expected-file to account for test
cases added recently. (I take no position on whether the new results
are desirable; some of these might be breaking the intent of the case.
But this should quiet the cfbot anyway.)

regards, tom lane

Attachments:

fix-postgres-fdw-collation-handling-3.patchtext/x-diff; charset=us-ascii; name=fix-postgres-fdw-collation-handling-3.patchDownload+221-155
#14Ibrar Ahmed
ibrar.ahmad@gmail.com
In reply to: Tom Lane (#13)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

On Wed, Jul 14, 2021 at 1:41 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Ibrar Ahmed <ibrar.ahmad@gmail.com> writes:

The patch is failing the regression, @Tom Lane <tgl@sss.pgh.pa.us> can

you

please take a look at that.

Seems to just need an update of the expected-file to account for test
cases added recently. (I take no position on whether the new results
are desirable; some of these might be breaking the intent of the case.
But this should quiet the cfbot anyway.)

regards, tom lane

Thanks for the update.

The test case was added by commit "Add support for asynchronous execution"
"27e1f14563cf982f1f4d71e21ef247866662a052" by Etsuro Fujita. He can comment
whether the new results are desirable or not.

--
Ibrar Ahmed

#15Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Ibrar Ahmed (#14)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

On Thu, Jul 15, 2021 at 4:17 AM Ibrar Ahmed <ibrar.ahmad@gmail.com> wrote:

On Wed, Jul 14, 2021 at 1:41 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Seems to just need an update of the expected-file to account for test
cases added recently. (I take no position on whether the new results
are desirable; some of these might be breaking the intent of the case.
But this should quiet the cfbot anyway.)

The test case was added by commit "Add support for asynchronous execution"
"27e1f14563cf982f1f4d71e21ef247866662a052" by Etsuro Fujita. He can comment
whether the new results are desirable or not.

The new results aren't what I intended. I'll update the patch to
avoid that by modifying the original test cases properly, if there are
no objections.

Best regards,
Etsuro Fujita

#16Ibrar Ahmed
ibrar.ahmad@gmail.com
In reply to: Etsuro Fujita (#15)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

On Thu, Jul 15, 2021 at 2:35 PM Etsuro Fujita <etsuro.fujita@gmail.com>
wrote:

On Thu, Jul 15, 2021 at 4:17 AM Ibrar Ahmed <ibrar.ahmad@gmail.com> wrote:

On Wed, Jul 14, 2021 at 1:41 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Seems to just need an update of the expected-file to account for test
cases added recently. (I take no position on whether the new results
are desirable; some of these might be breaking the intent of the case.
But this should quiet the cfbot anyway.)

The test case was added by commit "Add support for asynchronous

execution"

"27e1f14563cf982f1f4d71e21ef247866662a052" by Etsuro Fujita. He can

comment

whether the new results are desirable or not.

The new results aren't what I intended. I'll update the patch to
avoid that by modifying the original test cases properly, if there are
no objections.

Best regards,
Etsuro Fujita

Thanks Etsuro,

I have changed the status to "Waiting On Author", because patch need
changes.
Etsuro, can you make yourself a reviewer/co-author to keep track of that?

--
Ibrar Ahmed

#17Tom Lane
tgl@sss.pgh.pa.us
In reply to: Etsuro Fujita (#15)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

Etsuro Fujita <etsuro.fujita@gmail.com> writes:

On Thu, Jul 15, 2021 at 4:17 AM Ibrar Ahmed <ibrar.ahmad@gmail.com> wrote:

On Wed, Jul 14, 2021 at 1:41 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Seems to just need an update of the expected-file to account for test
cases added recently. (I take no position on whether the new results
are desirable; some of these might be breaking the intent of the case.
But this should quiet the cfbot anyway.)

The test case was added by commit "Add support for asynchronous execution"
"27e1f14563cf982f1f4d71e21ef247866662a052" by Etsuro Fujita. He can comment
whether the new results are desirable or not.

The new results aren't what I intended. I'll update the patch to
avoid that by modifying the original test cases properly, if there are
no objections.

Please follow up on that sometime? In the meantime, here is a rebase
over aa769f80e and 2dc53fe2a, to placate the cfbot.

The real reason that this hasn't gotten committed is that I remain
pretty uncomfortable about whether it's an acceptable solution to
the problem. Suddenly asking people to plaster COLLATE clauses
on all their textual remote columns seems like a big compatibility
gotcha. However, I lack any ideas about a less unpleasant solution.

regards, tom lane

Attachments:

fix-postgres-fdw-collation-handling-4.patchtext/x-diff; charset=us-ascii; name=fix-postgres-fdw-collation-handling-4.patchDownload+208-131
#18Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Tom Lane (#17)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

On Thu, Sep 2, 2021 at 5:42 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Etsuro Fujita <etsuro.fujita@gmail.com> writes:

On Thu, Jul 15, 2021 at 4:17 AM Ibrar Ahmed <ibrar.ahmad@gmail.com> wrote:

On Wed, Jul 14, 2021 at 1:41 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Seems to just need an update of the expected-file to account for test
cases added recently. (I take no position on whether the new results
are desirable; some of these might be breaking the intent of the case.
But this should quiet the cfbot anyway.)

The test case was added by commit "Add support for asynchronous execution"
"27e1f14563cf982f1f4d71e21ef247866662a052" by Etsuro Fujita. He can comment
whether the new results are desirable or not.

The new results aren't what I intended. I'll update the patch to
avoid that by modifying the original test cases properly, if there are
no objections.

Please follow up on that sometime?

Will do in this commitfest.

In the meantime, here is a rebase
over aa769f80e and 2dc53fe2a, to placate the cfbot.

Thanks for the rebase!

Best regards,
Etsuro Fujita

#19Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Tom Lane (#17)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

On Thu, Sep 2, 2021 at 5:42 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

The real reason that this hasn't gotten committed is that I remain
pretty uncomfortable about whether it's an acceptable solution to
the problem. Suddenly asking people to plaster COLLATE clauses
on all their textual remote columns seems like a big compatibility
gotcha.

I think so too. I reviewed the patch:

                /*
                 * If the Var is from the foreign table, we consider its
-                * collation (if any) safe to use.  If it is from another
+                * collation (if any) safe to use, *unless* it's
+                * DEFAULT_COLLATION_OID.  We treat that as meaning "we don't
+                * know which collation this is".  If it is from another
                 * table, we treat its collation the same way as we would a
                 * Param's collation, ie it's not safe for it to have a
                 * non-default collation.
@@ -350,7 +352,12 @@ foreign_expr_walker(Node *node,
                    /* Else check the collation */
                    collation = var->varcollid;
-                   state = OidIsValid(collation) ? FDW_COLLATE_SAFE :
FDW_COLLATE_NONE;
+                   if (collation == InvalidOid)
+                       state = FDW_COLLATE_NONE;
+                   else if (collation == DEFAULT_COLLATION_OID)
+                       state = FDW_COLLATE_UNSAFE;
+                   else
+                       state = FDW_COLLATE_SAFE;

One thing I noticed about this change is:

explain (verbose, costs off) select * from ft3 order by f2;
QUERY PLAN
---------------------------------------------------------
Sort
Output: f1, f2, f3
Sort Key: ft3.f2
-> Foreign Scan on public.ft3
Output: f1, f2, f3
Remote SQL: SELECT f1, f2, f3 FROM public.loct3
(6 rows)

where ft3 is defined as in the postgres_fdw regression test (see the
section “test handling of collations”). For this query, the sort is
done locally, but I think it should be done remotely, or an error
should be raised, as we don’t know the collation assigned to the
column “f2”. So I think we need to do something about this.

Having said that, I think another option for this would be to left the
code as-is; assume that 1) the foreign var has "COLLATE default”, not
an unknown collation, when labeled with "COLLATE default”, and 2)
"COLLATE default” on the local database matches "COLLATE default” on
the remote database. This would be the same as before, so we could
avoid the concern mentioned above. I agree with the
postgresImportForeignSchema() change, except creating a local column
with "COLLATE default" silently if that function can’t find a remote
collation matching the database's datcollate/datctype when seeing
"COLLATE default”, in which case I think an error should be raised to
prompt the user to check the settings for the remote server and/or
define foreign tables manually with collations that match the remote
side. Maybe I’m missing something, though.

Anyway, here is a patch created on top of your patch to modify
async-related test cases to work as intended. I’m also attaching your
patch to make the cfbot quiet.

Sorry for the delay.

Best regards,
Etsuro Fujita

Attachments:

0001-fix-postgres-fdw-collation-handling-4.patchapplication/octet-stream; name=0001-fix-postgres-fdw-collation-handling-4.patchDownload+208-131
0002-modify-postgres-fdw-async-test-cases.patchapplication/octet-stream; name=0002-modify-postgres-fdw-async-test-cases.patchDownload+22-27
#20Tom Lane
tgl@sss.pgh.pa.us
In reply to: Etsuro Fujita (#19)
hackersbugs
Re: BUG #16583: merge join on tables with different DB collation behind postgres_fdw fails

Etsuro Fujita <etsuro.fujita@gmail.com> writes:

On Thu, Sep 2, 2021 at 5:42 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

The real reason that this hasn't gotten committed is that I remain
pretty uncomfortable about whether it's an acceptable solution to
the problem. Suddenly asking people to plaster COLLATE clauses
on all their textual remote columns seems like a big compatibility
gotcha.

I think so too.

Yeah :-(. It seems like a very unpleasant change.

Having said that, I think another option for this would be to left the
code as-is; assume that 1) the foreign var has "COLLATE default”, not
an unknown collation, when labeled with "COLLATE default”, and 2)
"COLLATE default” on the local database matches "COLLATE default” on
the remote database.

The fundamental complaint that started this thread was exactly that
assumption (2) isn't safe. So it sounds to me like you're proposing
that we do nothing, which isn't a great answer either. I suppose
we could try documenting our way out of this, but people will
continue to get bit because they won't read or won't understand
the limitation.

I'd be happier if we had a way to check whether the local and remote
default collations are compatible. But it seems like that's a big ask,
especially in cross-operating-system situations.

regards, tom lane

#21Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Tom Lane (#20)
hackersbugs
#22Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Etsuro Fujita (#21)
hackersbugs
#23Tom Lane
tgl@sss.pgh.pa.us
In reply to: Etsuro Fujita (#22)
hackersbugs
#24Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Tom Lane (#23)
hackersbugs
#25Tom Lane
tgl@sss.pgh.pa.us
In reply to: Etsuro Fujita (#24)
hackersbugs
#26Tom Lane
tgl@sss.pgh.pa.us
In reply to: Etsuro Fujita (#24)
hackersbugs
#27Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Tom Lane (#25)
hackersbugs
#28Jeremy Schneider
schnjere@amazon.com
In reply to: Tom Lane (#25)
hackersbugs