diff --git a/src/backend/storage/aio/method_worker.c b/src/backend/storage/aio/method_worker.c
index b5ac073a910..25afa382932 100644
--- a/src/backend/storage/aio/method_worker.c
+++ b/src/backend/storage/aio/method_worker.c
@@ -48,10 +48,6 @@
 #include "utils/wait_event.h"
 
 
-/* How many workers should each worker wake up if needed? */
-#define IO_WORKER_WAKEUP_FANOUT 2
-
-
 typedef struct PgAioWorkerSubmissionQueue
 {
 	uint32		size;
@@ -69,10 +65,11 @@ typedef struct PgAioWorkerSlot
 
 typedef struct PgAioWorkerControl
 {
-	uint64		idle_worker_mask;
+	int dummy;
 	PgAioWorkerSlot workers[FLEXIBLE_ARRAY_MEMBER];
 } PgAioWorkerControl;
 
+static int	nextWakeupWorker = 0;
 
 static size_t pgaio_worker_shmem_size(void);
 static void pgaio_worker_shmem_init(bool first_time);
@@ -94,7 +91,7 @@ const IoMethodOps pgaio_worker_ops = {
 int			io_workers = 3;
 
 
-static int	io_worker_queue_size = 64;
+static int	io_worker_queue_size = 512;
 static int	MyIoWorkerId;
 static PgAioWorkerSubmissionQueue *io_worker_submission_queue;
 static PgAioWorkerControl *io_worker_control;
@@ -152,7 +149,6 @@ pgaio_worker_shmem_init(bool first_time)
 						&found);
 	if (!found)
 	{
-		io_worker_control->idle_worker_mask = 0;
 		for (int i = 0; i < MAX_IO_WORKERS; ++i)
 		{
 			io_worker_control->workers[i].latch = NULL;
@@ -162,17 +158,20 @@ pgaio_worker_shmem_init(bool first_time)
 }
 
 static int
-pgaio_worker_choose_idle(void)
+pgaio_worker_choose(void)
 {
-	int			worker;
+	int worker;
 
-	if (io_worker_control->idle_worker_mask == 0)
-		return -1;
+	worker = nextWakeupWorker;
+	nextWakeupWorker = (nextWakeupWorker + 1) % io_workers;
 
-	/* Find the lowest bit position, and clear it. */
-	worker = pg_rightmost_one_pos64(io_worker_control->idle_worker_mask);
-	io_worker_control->idle_worker_mask &= ~(UINT64_C(1) << worker);
-	Assert(io_worker_control->workers[worker].in_use);
+	if (MyBackendType == B_IO_WORKER && nextWakeupWorker == MyIoWorkerId)
+	{
+		if (io_workers > 1)
+			nextWakeupWorker = (nextWakeupWorker + 1) % io_workers;
+		else
+			return -1;
+	}
 
 	return worker;
 }
@@ -245,8 +244,6 @@ pgaio_worker_submit_internal(int num_staged_ios, PgAioHandle **staged_ios)
 {
 	PgAioHandle *synchronous_ios[PGAIO_SUBMIT_BATCH_SIZE];
 	int			nsync = 0;
-	Latch	   *wakeup = NULL;
-	int			worker;
 
 	Assert(num_staged_ios <= PGAIO_SUBMIT_BATCH_SIZE);
 
@@ -261,25 +258,20 @@ pgaio_worker_submit_internal(int num_staged_ios, PgAioHandle **staged_ios)
 			 * we can to workers, to maximize concurrency.
 			 */
 			synchronous_ios[nsync++] = staged_ios[i];
-			continue;
 		}
+	}
+	LWLockRelease(AioWorkerSubmissionQueueLock);
+
+	if (num_staged_ios > nsync)
+	{
+		int			worker = pgaio_worker_choose();
 
-		if (wakeup == NULL)
+		if (worker >= 0)
 		{
-			/* Choose an idle worker to wake up if we haven't already. */
-			worker = pgaio_worker_choose_idle();
-			if (worker >= 0)
-				wakeup = io_worker_control->workers[worker].latch;
-
-			pgaio_debug_io(DEBUG4, staged_ios[i],
-						   "choosing worker %d",
-						   worker);
+			Latch	   *wakeup = io_worker_control->workers[worker].latch;
+			SetLatch(wakeup);
 		}
 	}
-	LWLockRelease(AioWorkerSubmissionQueueLock);
-
-	if (wakeup)
-		SetLatch(wakeup);
 
 	/* Run whatever is left synchronously. */
 	if (nsync > 0)
@@ -317,7 +309,6 @@ pgaio_worker_die(int code, Datum arg)
 	Assert(io_worker_control->workers[MyIoWorkerId].in_use);
 	Assert(io_worker_control->workers[MyIoWorkerId].latch == MyLatch);
 
-	io_worker_control->idle_worker_mask &= ~(UINT64_C(1) << MyIoWorkerId);
 	io_worker_control->workers[MyIoWorkerId].in_use = false;
 	io_worker_control->workers[MyIoWorkerId].latch = NULL;
 	LWLockRelease(AioWorkerSubmissionQueueLock);
@@ -354,7 +345,6 @@ pgaio_worker_register(void)
 	if (MyIoWorkerId == -1)
 		elog(ERROR, "couldn't find a free worker slot");
 
-	io_worker_control->idle_worker_mask |= (UINT64_C(1) << MyIoWorkerId);
 	io_worker_control->workers[MyIoWorkerId].latch = MyLatch;
 	LWLockRelease(AioWorkerSubmissionQueueLock);
 
@@ -458,10 +448,6 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len)
 	while (!ShutdownRequestPending)
 	{
 		uint32		io_index;
-		Latch	   *latches[IO_WORKER_WAKEUP_FANOUT];
-		int			nlatches = 0;
-		int			nwakeups = 0;
-		int			worker;
 
 		/*
 		 * Try to get a job to do.
@@ -470,36 +456,9 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len)
 		 * to ensure that we don't see an outdated data in the handle.
 		 */
 		LWLockAcquire(AioWorkerSubmissionQueueLock, LW_EXCLUSIVE);
-		if ((io_index = pgaio_worker_submission_queue_consume()) == -1)
-		{
-			/*
-			 * Nothing to do.  Mark self idle.
-			 *
-			 * XXX: Invent some kind of back pressure to reduce useless
-			 * wakeups?
-			 */
-			io_worker_control->idle_worker_mask |= (UINT64_C(1) << MyIoWorkerId);
-		}
-		else
-		{
-			/* Got one.  Clear idle flag. */
-			io_worker_control->idle_worker_mask &= ~(UINT64_C(1) << MyIoWorkerId);
-
-			/* See if we can wake up some peers. */
-			nwakeups = Min(pgaio_worker_submission_queue_depth(),
-						   IO_WORKER_WAKEUP_FANOUT);
-			for (int i = 0; i < nwakeups; ++i)
-			{
-				if ((worker = pgaio_worker_choose_idle()) < 0)
-					break;
-				latches[nlatches++] = io_worker_control->workers[worker].latch;
-			}
-		}
+		io_index = pgaio_worker_submission_queue_consume();
 		LWLockRelease(AioWorkerSubmissionQueueLock);
 
-		for (int i = 0; i < nlatches; ++i)
-			SetLatch(latches[i]);
-
 		if (io_index != -1)
 		{
 			PgAioHandle *ioh = NULL;
