From 6fcd9be88bd85df4af0508f082dfdad0f8bbf518 Mon Sep 17 00:00:00 2001
From: Dilip Kumar <dilipkumar@localhost.localdomain>
Date: Tue, 27 Apr 2021 13:57:21 +0530
Subject: [PATCH v3] Assorted bug fix while selecting the largest top
 transaction for streaming

There were mainly 2 problems, 1) Ideally, if we haven't selected any transaction
we should take next available transaction without comparing the size but the
condition was wrong and it was selecting the next available transaction without
comparing the size if we had already selected a transaction which was wrong.
2) Another probelm was we were selecting the transaction without checking their
base snapshot, so if the base snapshot is NULL then we can not stream any change
so it was hitting the assert that after streaming txn->size should be 0.  So the
solution is we should never select the transaction for streaming which doesn't
have a base snapshot as we can not stream that transaction.
---
 src/backend/replication/logical/reorderbuffer.c | 31 ++++++++++++++-----------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 5cb484f..ea217ef 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -3362,16 +3362,17 @@ ReorderBufferLargestTXN(ReorderBuffer *rb)
  * iterate over the limited number of toplevel transactions.
  *
  * Note that, we skip transactions that contains incomplete changes. There
- * is a scope of optimization here such that we can select the largest transaction
- * which has complete changes.  But that will make the code and design quite complex
- * and that might not be worth the benefit.  If we plan to stream the transactions
- * that contains incomplete changes then we need to find a way to partially
- * stream/truncate the transaction changes in-memory and build a mechanism to
- * partially truncate the spilled files.  Additionally, whenever we partially
- * stream the transaction we need to maintain the last streamed lsn and next time
- * we need to restore from that segment and the offset in WAL.  As we stream the
- * changes from the top transaction and restore them subtransaction wise, we need
- * to even remember the subxact from where we streamed the last change.
+ * is a scope of optimization here such that we can select the largest
+ * transaction which has incomplete changes.  But that will make the code and
+ * design quite complex and that might not be worth the benefit.  If we plan to
+ * stream the transactions that contains incomplete changes then we need to
+ * find a way to partially stream/truncate the transaction changes in-memory
+ * and build a mechanism to partially truncate the spilled files.
+ * Additionally, whenever we partially stream the transaction we need to
+ * maintain the last streamed lsn and next time we need to restore from that
+ * segment and the offset in WAL.  As we stream the changes from the top
+ * transaction and restore them subtransaction wise, we need to even remember
+ * the subxact from where we streamed the last change.
  */
 static ReorderBufferTXN *
 ReorderBufferLargestTopTXN(ReorderBuffer *rb)
@@ -3381,13 +3382,17 @@ ReorderBufferLargestTopTXN(ReorderBuffer *rb)
 	ReorderBufferTXN *largest = NULL;
 
 	/* Find the largest top-level transaction. */
-	dlist_foreach(iter, &rb->toplevel_by_lsn)
+	dlist_foreach(iter, &rb->txns_by_base_snapshot_lsn)
 	{
 		ReorderBufferTXN *txn;
 
-		txn = dlist_container(ReorderBufferTXN, node, iter.cur);
+		txn = dlist_container(ReorderBufferTXN, base_snapshot_node, iter.cur);
+
+		/* known-as-subtxn txns must not be listed */
+		Assert(!rbtxn_is_known_subxact(txn));
+		Assert(txn->base_snapshot != NULL);
 
-		if ((largest != NULL || txn->total_size > largest_size) &&
+		if ((largest == NULL || txn->total_size > largest_size) &&
 			(txn->total_size > 0) && !(rbtxn_has_incomplete_tuple(txn)))
 		{
 			largest = txn;
-- 
1.8.3.1

