From a3bf8c6819875330955cab8f68636c351db837ea Mon Sep 17 00:00:00 2001 From: "kuroda.hayato%40jp.fujitsu.com" Date: Wed, 18 May 2022 04:56:18 +0000 Subject: [PATCH v3 3/3] add documents --- doc/src/sgml/catalogs.sgml | 328 +++++++++++++++++++++++++++++ doc/src/sgml/filelist.sgml | 1 + doc/src/sgml/func.sgml | 113 ++++++++++ doc/src/sgml/lrg.sgml | 417 +++++++++++++++++++++++++++++++++++++ doc/src/sgml/postgres.sgml | 1 + 5 files changed, 860 insertions(+) create mode 100644 doc/src/sgml/lrg.sgml diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index a533a2153e..db45b339e8 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -200,6 +200,26 @@ metadata for large objects + + pg_lrg_info + logical replication group + + + + pg_lrg_nodes + logical replication group + + + + pg_lrg_pub + logical replication group + + + + pg_lrg_sub + logical replication group + + pg_namespace schemas @@ -4960,6 +4980,314 @@ SCRAM-SHA-256$<iteration count>:&l + + <structname>pg_lrg_info</structname> + + + pg_lrg_info + + + The catalog pg_lrg_info stores information about + node groups. A node group is a group of nodes that send and receive their + data changes, and it can be created by the lrg_create + function. See for more information about node groups. + + + + Each database in the same node group has a tuple with the same information + except for oid. + + + + Currently, a node can participate in only one node group. This means that + this system catalog has at most one tuple. + + + + <structname>pg_lrg_info</structname> Columns + + + + + Column Type + + + Description + + + + + + + + oid oid + + + Row identifier + + + + + + groupname name + + + Name of the group + + + + + + puballtables bool + + + The type of publication + + + + +
+
+ + + <structname>pg_lrg_nodes</structname> + + + pg_lrg_nodes + + + The catalog pg_lrg_nodes stores information about + nodes in node group that this node has attached. When lrg_node_attach() + is executed, information about other nodes in a group is copied from the + upstream node, and information about this node is copied to the other nodes. + Therefore, each database in the same node group has tuples with the same + information except for oid and dbid. + See for more information about LRG. + + + + <structname>pg_lrg_nodes</structname> Columns + + + + + Column Type + + + Description + + + + + + + + oid oid + + + Row identifier + + + + + + nodeid name + + + Identifier of this node + + + + + + groupid oid + (references pg_lrg_info.oid) + + + Attached group + + + + + + dbid oid + (references pg_database.oid) + + + Database + + + + + + status int + + + State code: + 0 = initialize, + 1 = a publication was created in this node + 2 = subscriptions were created in all nodes + 3 = ready + 4 = this node is now detaching from a group + + + + + + nodename name + + + Name of this node + + + + + + localconn name + + + Connection string for this node + + + + + + upstreamconn name + + + Connection string for upstream node + + + + + +
+
+ + + <structname>pg_lrg_pub</structname> + + + pg_lrg_pub + + + The catalog pg_lrg_pub contains the mapping between + node groups and a publication in the database. See for + more information about LRG. + + + + <structname>pg_lrg_pub</structname> Columns + + + + + Column Type + + + Description + + + + + + + + oid oid + + + Row identifier + + + + + + groupid oid + (references pg_lrg_info.oid) + + + Attached group + + + + + + pubid oid + (references pg_publication.oid) + + + Publication + + + + + + +
+
+ + + + <structname>pg_lrg_sub</structname> + + + pg_lrg_sub + + + The catalog pg_lrg_sub contains the mapping between + node groups and subscriptions in the database. See for + more information about LRG. + + + + <structname>pg_lrg_sub</structname> Columns + + + + + Column Type + + + Description + + + + + + + + oid oid + + + Row identifier + + + + + + groupid oid + (references pg_lrg_info.oid) + + + Attached group + + + + + + subid oid + (references pg_subscription.oid) + + + Subscription + + + + + + +
+
<structname>pg_namespace</structname> diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 40ef5f7ffc..8be17a652e 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -49,6 +49,7 @@ + diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 85ecc639fd..0722ff8989 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -29233,6 +29233,119 @@ postgres=# SELECT * FROM pg_walfile_name_offset((pg_backup_stop()).lsn); + + Logical Replication Group Management Functions + + + The functions shown + in are for + controlling and interacting with node groups. + + + + LRG Management Functions + + + + + Function + + + Description + + + + + + + + + + + lrg_create ( group_name name, publication_type text, local_connstring text, node_name name ) + void + + + Creates a node group named group_name + that shares publication_type, and attach to it as + node_name. If this node is already in a logical + replication group, an error is raised. Note that this function returns + immediately. This function is just a trigger for starting complex + operations by the LRG worker process. If users want to wait until + finishing them, lrg_wait can be used. + + + + + + + + lrg_node_attach ( group_name name, local_connstring text, upstream_connstring text, node_name name ) + void + + + Attach to a node group named group_name + as node_name. The initial data and configuration + information of this group will be shared from the upstream_connstring. + node_name must be unique in the group. + If this node is already in a node group, an error is + raised. Note that this function returns immediately. This function is + just a trigger for starting complex operations by the LRG worker + process. If users want to wait until finishing them, lrg_wait + can be used. + + + + + + + + lrg_node_detach ( group_name name, node_name name ) + void + + + Detach node_name from group_name. + If this node is not in a node group, an error is raised. + Note that this function returns immediately. This function is just a + trigger for starting complex operations by the LRG worker process. If + users want to wait until finishing them, lrg_wait + can be used. + + + + + + + + lrg_drop ( group_name name ) + void + + + Detach this node group_name and remove it. If + this node is not in a node group, or there are still nodes + in the group, an error is raised. + + + + + + + + lrg_wait () + void + + + Wait until LRG related operations are done. This function returns + immediately if this node is not in a node group. + + + + + +
+ +
+ Database Object Management Functions diff --git a/doc/src/sgml/lrg.sgml b/doc/src/sgml/lrg.sgml new file mode 100644 index 0000000000..34843a3ef1 --- /dev/null +++ b/doc/src/sgml/lrg.sgml @@ -0,0 +1,417 @@ + + + Logical Replication Group (LRG) + + + logical replication group + + + + Logical Replication Group (LRG) is a way to create a node group that + replicates data objects and their changes to each other. All nodes in + the group can execute Read-Write queries, and its changes will + "eventually" send to other nodes. + + + + When a node is attached to a node group, data that nodes in the group have + will be copied to the attaching node. Also, data existing in the node will + be copied to others. + + + + Each node in a node group has one publication and same number of subscriptions + as other nodes. + + + + LRG is especially useful when working with "data-aware" applications. + It is recommended that users build a node group located acros + different geographical location, and each node inserts or updates different + tuples. + + + + Advantages of LRG are: + + + + + Allowing load balancing + + + + + Allowing rolling updates of nodes + + + + + Improving the availability of the system + + + + + + + Node group + + Node group is a group that participant nodes have same tables and + send/receive their data changes. This is done via logical replication. + Each node in a group must be connected to all other nodes. LRG make such a + group by creating publications and subscriptions on nodes. + + + + Node groups are created by lrg_create. + The node that executed this function automatically attaches to the created + group. This function requires a connection string that is used for + connecting to the node from other or connecting to itself. Therefore, it is + not recommended to specify the Unix-domain socket or loopback address as the + connection string unless testing purposes. + + + + When users add a node to a node group, the node name must be specified. This + can be used for managing node information, and it must be specified when + users want to detach a node from a node group. The name must be unique within + a node group. + + + + In order to attach to a node group, the function lrg_node_attach + is used. At that time, in addition to specifying connection string for this + node, a connection string for connecting to one of the node in the group must + be specified. The specified node is called as "upstream node". After + executing lrg_node_attach, this node connects to the + upstream node, and gets connection strings for other nodes and synchronizes + the initial data. + + + + All tables present in the node group must be set up to allow all changes + to be replicated. This requires either of a primary key, or a replica + identity with unique index and not-null constraint. LRG does not check them + or throw any warnings. Users must define tables properly. Moreover, users + must not change any configurations of the automatically defined publication. + + + + Currently, each node can attach to only one node group, and changes to all + tables are replicated to nodes in the group. This is implemented by + defining a publication with target list 'FOR ALL TABLES'. This is an + implementation restriction that might be lifted in a future release. + + + + + LRG SQL interface + + See for detailed documentation on + SQL-level APIs for interacting with LRG. + + + + + Locking over nodes + + This feature is under development. + + + + + + Restrictions + + LRG currently has the following restrictions or missing functionality. + + + + Each node can attach only one node set at a time. + + + + + All tables in the database must be replicated. That is, only + 'FOR ALL TABLES' can be specified for publication_type + in lrg_create. + + + + + All replicated objects must be defined on the all nodes. + + + + + LRG does not provide any mechanism for conflict handling (e.g. PK violations). + Avoiding conflicts is the user responsibility. + + + + + + + + Configuration + + LRG will create a publication and subscriptions on each node, + that number of subscribers is the number of participants minus one. Therefore LRG requires + several configuration options. + + + + + + wal_level must be logical. + + + + + max_replication_slots must be set to at least the number of participants. + + + + + max_wal_senders must be set to at least same as max_replication_slots. + + + + + max_logical_replication_workers must be larger than the number of participants. + + + + + max_worker_processes must be larger that max_logical_replication_workers. + + + + + + + Architecture + + + LRG is implemented by "LRG launcher" and "LRG worker" processes. The launcher + process is registered when the server is not in the recovery mode, and it has + a responsibility for starting LRG worker processes. The actual operations for + creating or attaching to a node group are done by the LRG + worker process. There can be one LRG worker per database. + + + + More specifically: when SQL functions related with LRG are called by users, + tuples will be inserted to catalogs and backend process will notify to the + launcher. After receiving the notification, the launcher seek + and launches the LRG worker process. This seeking and launching will be + also done when server boots. + + + + + Example + + The following example demonstrates constructing a node group. + Assuming that there are three nodes, called Node1, Node2, Node3, and they can connect each other. + In the example all nodes are in the same server. + + + + Create a group + + + At first, a node group must be created. Here it will be created on Node1. + + +postgres=# -- Create a node group 'testgroup', and attach to it as 'testnode1' +postgres=# SELECT lrg_create('testgroup', 'FOR ALL TABLES', 'port=5431 dbname=postgres', 'testnode1'); + lrg_create +------------ + +(1 row) + +postgres=# -- Wait until operations by LRG worker are done +postgres=# SELECT lrg_wait(); + lrg_wait +---------- + +(1 row) + + + + + Information of the group and nodes can check via system catalogs, + like pg_lrg_nodes. + + +postgres=# SELECT * FROM pg_lrg_info ; + oid | groupname | puballtables +-------+-----------+-------------- + 16384 | testgroup | t +(1 row) + +postgres=# SELECT * FROM pg_lrg_nodes; + oid | nodeid | groupid | dbid | status | nodename | localconn | upstreamconn +-------+----------------------+---------+------+--------+-----------+---------------------------+-------------- + 16385 | 70988980716274892555 | 16384 | 5 | 3 | testnode1 | port=5431 dbname=postgres | +(1 row) + + + + + + Attach to a group + + + Next Node2 can attach the created group. Note again that the connection string for Node1 must be specified. + + + +postgres=# -- Attach to 'testgroup' as 'testnode2' +postgres=# SELECT lrg_node_attach('testgroup', 'port=5432 dbname=postgres', 'port=5431 dbname=postgres', 'testnode2'); + lrg_node_attach +----------------- + +(1 row) + +postgres=# -- Wait until operations by LRG worker are done +postgres=# SELECT lrg_wait(); + lrg_wait +---------- + +(1 row) + + + + The status of all nodes can check via pg_lrg_nodes. Following tuples will be found on both nodes. + + + +postgres=# SELECT * FROM pg_lrg_info ; + oid | groupname | puballtables +-------+-----------+-------------- + 16384 | testgroup | t +(1 row) + +postgres=# SELECT * FROM pg_lrg_nodes; + oid | nodeid | groupid | dbid | status | nodename | localconn | upstreamconn +-------+----------------------+---------+------+--------+-----------+---------------------------+--------------------------- + 16385 | 70989229890284027935 | 16384 | 5 | 3 | testnode2 | port=5432 dbname=postgres | port=5431 dbname=postgres + 16386 | 70988980716274892555 | 16385 | 5 | 3 | testnode1 | port=5431 dbname=postgres | +(2 rows) + + + + Now Node1 and Node2 has same contents about LRG system catalogs, + so ether of them can be specified as an upstream node. + In below example Node2 is used as upstream for attaching a new node. + + + +postgres=# -- Attach to 'testgroup' as 'testode3', and data will be copied from Node2 +postgres=# SELECT lrg_node_attach('testgroup', 'port=5433 dbname=postgres', 'port=5432 dbname=postgres', 'testnode3'); + lrg_node_attach +----------------- + +(1 row) + +postgres=# -- Wait until operations by LRG worker are done +postgres=# SELECT lrg_wait(); + lrg_wait +---------- + +(1 row) + + + + Finally pg_lrg_info and pg_lrg_nodes will be like: + + + +postgres=# SELECT * FROM pg_lrg_info ; + oid | groupname | puballtables +-------+-----------+-------------- + 16384 | testgroup | t +(1 row) + +postgres=# SELECT * FROM pg_lrg_nodes; + oid | nodeid | groupid | dbid | status | nodename | localconn | upstreamconn +-------+----------------------+---------+------+--------+-----------+---------------------------+--------------------------- + 16385 | 70989243367269230745 | 16384 | 5 | 3 | testnode3 | port=5433 dbname=postgres | port=5432 dbname=postgres + 16386 | 70989229890284027935 | 16385 | 5 | 3 | testnode2 | port=5432 dbname=postgres | port=5431 dbname=postgres + 16387 | 70988980716274892555 | 16385 | 5 | 3 | testnode1 | port=5431 dbname=postgres | +(3 rows) + + + + Now all nodes publish their changes, and they subscribe them. If a tuple inserted on Node1, + data will be also found on Node2 and Node3. + + + + + + Detach from a group + + User can detach attached nodes by executing lrg_node_detach + at any time. This function must be called from a node that is a member of a + group. + + + +postgres=# -- Detach Node3 from 'testgroup'. This can be done from the arbitrary member of the group +postgres=# select lrg_node_detach('testgroup', 'testnode3'); + lrg_node_detach +----------------- + +(1 row) + +postgres=# -- Also detach Node2 from 'testgroup'. This can be done from Node1 or Node2 +postgres=# select lrg_node_detach('testgroup', 'testnode2'); + lrg_node_detach +----------------- + +(1 row) + + + + The user-defined data will be not removed even if nodes are detached, but tuples in LRG related catalogs will be deleted. + + + +postgres=# -- On Node2, there are no tuples in pg_lrg_nodes +postgres=# select * from pg_lrg_nodes; + oid | nodeid | groupid | dbid | status | nodename | localconn | upstreamconn +-----+--------+---------+------+--------+----------+-----------+-------------- +(0 rows) + + + + + Drop a group + + For dropping a group, the API lrg_drop can be used. + The function must be callled from a node that is a member of a group, and + it will throw ERROR if there are other members in the group. + + + +postgres=# -- Drop 'testgroup.' Node1 will detach from it automatically. +postgres=# select lrg_drop('testgroup'); + lrg_drop +---------- + +(1 row) + +postgres=# -- There are no tuples in pg_lrg_info +postgres=# select * from pg_lrg_info; + oid | groupname | puballtables +-----+-----------+-------------- +(0 rows) + + + + + + \ No newline at end of file diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml index 0b60e46d69..bcea47fdc9 100644 --- a/doc/src/sgml/postgres.sgml +++ b/doc/src/sgml/postgres.sgml @@ -172,6 +172,7 @@ break is not needed in a wider output rendering. &logical-replication; &jit; ®ress; + &lrg; -- 2.27.0