Improve the performance of the standby server when dropping tables on the primary server

Started by Tokuda, Takashiover 8 years ago3 messages
#1Tokuda, Takashi
tokuda.takashi@jp.fujitsu.com
1 attachment(s)

Hi,

The attached patch changes data structure storing unowned SMgrRelation objects
from list structure to hash structure.
The reason why I change it is that list structure very slowly removes a node.
And list structure takes longer time to remove a node than hash structure.

The problem was reported in BUG #14575.
/messages/by-id/20170303023246.25054.66379@wrigleys.postgresql.org

In my customer's case, the standby server was delayed more than 20 minites
when dropping many table at once.

- Performance check

I confirmed the performance of dropping tables by the following method.

1. Set up a synchronous streaming replication environment.
And set synchronous_commit = remote_apply in postgresql.conf.

2. Create 100,000 tables (tbl1, tbl2, ... , tbl100000).
And insert one row in each table.

3. Measure the time to drop 50 tables by psql

$ time psql -d ${DATABSE} -p ${PORT} -f drop.sql

drop.sql
--
begin;
drop table tbl1;
drop table tbl2;
...
drop table tbl50;
commit;
--

Result:
without this patch
real 0m3.734s
user 0m0.003s
sys 0m0.005s

with this patch
real 0m1.292s
user 0m0.005s
sys 0m0.003s

Even in this case, we have improved considerably,
so I suggest you might approve it.

Regards,
Takashi Tokuda

Attachments:

change_data_structure_storing_unowned_SMgrRelation.patchapplication/octet-stream; name=change_data_structure_storing_unowned_SMgrRelation.patchDownload
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 0ca095c..b47374d 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -76,18 +76,25 @@ static const f_smgr smgrsw[] = {
 static const int NSmgr = lengthof(smgrsw);
 
 
+typedef struct SMgrRelationHashEntry
+{
+	/* rnode is the hashtable lookup key, so it must be first! */
+	RelFileNodeBackend smgr_rnode;  /* relation physical identifier */
+
+	SMgrRelation reln;
+} SMgrRelationHashEntry;
+
+
 /*
- * Each backend has a hashtable that stores all extant SMgrRelation objects.
- * In addition, "unowned" SMgrRelation objects are chained together in a list.
+ * Each backend has a hashtable that stores all extant SMgrRelation objects
+ * and a hashtable that stores "unowned" SMgrRelation objects.
  */
 static HTAB *SMgrRelationHash = NULL;
 
-static SMgrRelation first_unowned_reln = NULL;
+static HTAB *UnownedSMgrRelationHash = NULL;
 
 /* local function prototypes */
 static void smgrshutdown(int code, Datum arg);
-static void add_to_unowned_list(SMgrRelation reln);
-static void remove_from_unowned_list(SMgrRelation reln);
 
 
 /*
@@ -143,14 +150,19 @@ smgropen(RelFileNode rnode, BackendId backend)
 	if (SMgrRelationHash == NULL)
 	{
 		/* First time through: initialize the hash table */
-		HASHCTL		ctl;
+		HASHCTL		ctl, unownctl;
 
 		MemSet(&ctl, 0, sizeof(ctl));
 		ctl.keysize = sizeof(RelFileNodeBackend);
 		ctl.entrysize = sizeof(SMgrRelationData);
 		SMgrRelationHash = hash_create("smgr relation table", 400,
 									   &ctl, HASH_ELEM | HASH_BLOBS);
-		first_unowned_reln = NULL;
+
+		MemSet(&unownctl, 0, sizeof(unownctl));
+		unownctl.keysize = sizeof(RelFileNodeBackend);
+		unownctl.entrysize = sizeof(SMgrRelationHashEntry);
+		UnownedSMgrRelationHash = hash_create("unowned smgr relation table", 
+										400, &unownctl, HASH_ELEM | HASH_BLOBS);
 	}
 
 	/* Look up or create an entry */
@@ -164,6 +176,7 @@ smgropen(RelFileNode rnode, BackendId backend)
 	if (!found)
 	{
 		int			forknum;
+		SMgrRelationHashEntry *unownreln;
 
 		/* hash_search already filled in the lookup key */
 		reln->smgr_owner = NULL;
@@ -177,7 +190,13 @@ smgropen(RelFileNode rnode, BackendId backend)
 			reln->md_num_open_segs[forknum] = 0;
 
 		/* it has no owner yet */
-		add_to_unowned_list(reln);
+		unownreln = (SMgrRelationHashEntry *) hash_search(UnownedSMgrRelationHash,
+										  (void *) &(reln->smgr_rnode),
+										  HASH_ENTER, &found);
+
+		Assert(!found);
+		/* hash_search already filled in the lookup key */
+		unownreln->reln = reln;
 	}
 
 	return reln;
@@ -207,7 +226,10 @@ smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
 	if (reln->smgr_owner)
 		*(reln->smgr_owner) = NULL;
 	else
-		remove_from_unowned_list(reln);
+		if (hash_search(UnownedSMgrRelationHash,
+					(void *) &(reln->smgr_rnode),
+					HASH_REMOVE, NULL) == NULL)
+			elog(ERROR, "Unowned SMgrRelation hashtable corrupted");
 
 	/* Now establish the ownership relationship. */
 	reln->smgr_owner = owner;
@@ -221,6 +243,9 @@ smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
 void
 smgrclearowner(SMgrRelation *owner, SMgrRelation reln)
 {
+	bool         found;
+	SMgrRelationHashEntry *unownreln;
+
 	/* Do nothing if the SMgrRelation object is not owned by the owner */
 	if (reln->smgr_owner != owner)
 		return;
@@ -231,53 +256,12 @@ smgrclearowner(SMgrRelation *owner, SMgrRelation reln)
 	/* unset our reference to the owner */
 	reln->smgr_owner = NULL;
 
-	add_to_unowned_list(reln);
-}
-
-/*
- * add_to_unowned_list -- link an SMgrRelation onto the unowned list
- *
- * Check remove_from_unowned_list()'s comments for performance
- * considerations.
- */
-static void
-add_to_unowned_list(SMgrRelation reln)
-{
-	/* place it at head of the list (to make smgrsetowner cheap) */
-	reln->next_unowned_reln = first_unowned_reln;
-	first_unowned_reln = reln;
-}
-
-/*
- * remove_from_unowned_list -- unlink an SMgrRelation from the unowned list
- *
- * If the reln is not present in the list, nothing happens.  Typically this
- * would be caller error, but there seems no reason to throw an error.
- *
- * In the worst case this could be rather slow; but in all the cases that seem
- * likely to be performance-critical, the reln being sought will actually be
- * first in the list.  Furthermore, the number of unowned relns touched in any
- * one transaction shouldn't be all that high typically.  So it doesn't seem
- * worth expending the additional space and management logic needed for a
- * doubly-linked list.
- */
-static void
-remove_from_unowned_list(SMgrRelation reln)
-{
-	SMgrRelation *link;
-	SMgrRelation cur;
-
-	for (link = &first_unowned_reln, cur = *link;
-		 cur != NULL;
-		 link = &cur->next_unowned_reln, cur = *link)
-	{
-		if (cur == reln)
-		{
-			*link = cur->next_unowned_reln;
-			cur->next_unowned_reln = NULL;
-			break;
-		}
-	}
+	unownreln = (SMgrRelationHashEntry *) hash_search(UnownedSMgrRelationHash,
+									  (void *) &(reln->smgr_rnode),
+									  HASH_ENTER, &found);
+	Assert(!found);
+	/* hash_search already filled in the lookup key */
+	unownreln->reln = reln;
 }
 
 /*
@@ -304,7 +288,10 @@ smgrclose(SMgrRelation reln)
 	owner = reln->smgr_owner;
 
 	if (!owner)
-		remove_from_unowned_list(reln);
+		if (hash_search(UnownedSMgrRelationHash,
+					(void *) &(reln->smgr_rnode),
+					HASH_REMOVE, NULL) == NULL)
+			elog(ERROR, "Unowned SMgrRelation hashtable corrupted");
 
 	if (hash_search(SMgrRelationHash,
 					(void *) &(reln->smgr_rnode),
@@ -797,13 +784,17 @@ smgrpostckpt(void)
 void
 AtEOXact_SMgr(void)
 {
+	HASH_SEQ_STATUS status;
+	SMgrRelationHashEntry *unownreln;
+
+	if (UnownedSMgrRelationHash == NULL)
+		return;
+
 	/*
 	 * Zap all unowned SMgrRelations.  We rely on smgrclose() to remove each
-	 * one from the list.
+	 * one from the hash.
 	 */
-	while (first_unowned_reln != NULL)
-	{
-		Assert(first_unowned_reln->smgr_owner == NULL);
-		smgrclose(first_unowned_reln);
-	}
+	hash_seq_init(&status, UnownedSMgrRelationHash);
+	while ((unownreln = (SMgrRelationHashEntry *) hash_seq_search(&status)) != NULL)
+		smgrclose(unownreln->reln);
 }
#2Simon Riggs
simon@2ndquadrant.com
In reply to: Tokuda, Takashi (#1)
Re: Improve the performance of the standby server when dropping tables on the primary server

On 1 August 2017 at 05:45, Tokuda, Takashi
<tokuda.takashi@jp.fujitsu.com> wrote:

Hi,

The attached patch changes data structure storing unowned SMgrRelation objects
from list structure to hash structure.
The reason why I change it is that list structure very slowly removes a node.
And list structure takes longer time to remove a node than hash structure.

The problem was reported in BUG #14575.
/messages/by-id/20170303023246.25054.66379@wrigleys.postgresql.org

In my customer's case, the standby server was delayed more than 20 minites
when dropping many table at once.

- Performance check

Interesting, thanks for the patch.

Couple of points regarding performance...

* The previous coding allowed for a fast path to access the last
unowned relation, which this patch removes. It seems a good idea to
cache the last unowned relation, or if not explain why the comment
that says why it worked that way is no longer true.

* We should only create the hash table when needed, i.e. on or after
when we add an unowned relation, since that is not a typical case.

* The hash table is sized at 400 elements and will grow from there.
The comments in dynahash say "An overly large nelem will penalize
hash_seq_search speed without buying much." so this makes your patch
suitable for the bulk case but likely to perform worse for fewer
elements. So I'm guessing that you picked 400 because that's what the
parameter is set to for the smgr relation table rather than because
this has had good consideration.

I'll take your word for now that it improves the main case but I'd
suggest you consider the performance effects of the patch on other
cases that use this code.

Without looking deeper: does this code also run for temp objects? Can
it be optimized for that case a little better?

--
Simon Riggs http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#3Tokuda, Takashi
tokuda.takashi@jp.fujitsu.com
In reply to: Simon Riggs (#2)
Re: Improve the performance of the standby server when dropping tables on the primary server

Hi,

* The previous coding allowed for a fast path to access the last
unowned relation, which this patch removes. It seems a good idea to
cache the last unowned relation, or if not explain why the comment
that says why it worked that way is no longer true.

* We should only create the hash table when needed, i.e. on or after
when we add an unowned relation, since that is not a typical case.

* The hash table is sized at 400 elements and will grow from there.
The comments in dynahash say "An overly large nelem will penalize
hash_seq_search speed without buying much." so this makes your patch
suitable for the bulk case but likely to perform worse for fewer
elements. So I'm guessing that you picked 400 because that's what the
parameter is set to for the smgr relation table rather than because
this has had good consideration.

I thought abount improving the above problems.
But I have no good ideas to improve every case.
Do you have any good ideas?

I suggest to apply this modification only for the startup process.
This is because the startup process has many unowned SMgrRelation objects.
In XLOG replay, statup process create fake relcaches.
Fake relcaches create unowned SMgrRelation objects.
So startup process has more unowned SMgrRelation objects than any other process.

Of cource, it is necessary to think about the problems such as hash size.
Do you think about it.

Regards, Takashi Tokuda

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers