From 1a0c463d317d0e63885e7442b53875e9a1fd64e3 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Thu, 5 Mar 2020 06:04:35 -0600
Subject: [PATCH v6 4/4] Read each dir incrementally during each SRF call..

This requires a stack and keeping state and opened dirs in the data structure.
It also requires squirreling around with memory contexts.
The SRF implementation seems to preclude use of recursion.
---
 src/backend/utils/adt/genfile.c | 107 ++++++++++++++------------------
 1 file changed, 47 insertions(+), 60 deletions(-)

diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c
index bc473e79ef..32d5b6f8b9 100644
--- a/src/backend/utils/adt/genfile.c
+++ b/src/backend/utils/adt/genfile.c
@@ -37,15 +37,15 @@
 
 typedef struct
 {
-	/* List of opened dirs */
+	/* Stack of opened dirs */
 	List		*location;
 	List		*dirdesc;
 	bool		include_dot_dirs;
 
 	/* Used in ls_dir_files: */
-	int			npaths;
-	char		**path;
-	struct stat	*stat;
+	bool		dir_ok;
+	char		*path;
+	struct stat	stat;
 } directory_fctx;
 
 
@@ -530,27 +530,37 @@ pg_ls_dir_1arg(PG_FUNCTION_ARGS)
 }
 
 /*
- * Populate fctx with list of pathnames and stat structues.
- *
- * Generate a full list of the stat structures all at once.  That gets as close
- * as we can to a consistent view of the filesystem, from which files might be
- * removed or renamed or reordered.
- *
- * recurse is the recursion level into a stack of dirs, or -1 to not decend
- * into directories.
+ * Update fctx->path and stat with next filename.
+ * Directories are descended into.
+ * The current dir to read from is at fctx[fctx->depth].
  */
-static void
-populate_paths(directory_fctx *fctx, int recurse)
+static int
+populate_paths(directory_fctx *fctx, FuncCallContext *funcctx)
 {
 	struct dirent *de;
-	struct stat	attrib;
-
-	DIR *dirdesc = llast(fctx->dirdesc);
-	char *location = llast(fctx->location);
 
-	while ((de = ReadDir(dirdesc, location)) != NULL)
+	for (;;)
 	{
 		char		path[MAXPGPATH];
+		DIR			*dirdesc = llast(fctx->dirdesc);
+		char		*location = llast(fctx->location);
+
+		Assert(list_length(fctx->dirdesc) == list_length(fctx->location));
+
+		if ((de = ReadDir(dirdesc, location)) == NULL)
+		{
+			/*
+			 * Read to the end of the dir on the top of the stack, now move to
+			 * the next dir.
+			 */
+			if (list_length(fctx->dirdesc) == 1)
+				return 0;
+			FreeDir(llast(fctx->dirdesc));
+			fctx->dirdesc = list_delete_last(fctx->dirdesc);
+			pfree(location);
+			fctx->location = list_delete_last(fctx->location);
+			continue;
+		}
 
 		/* Skip hidden files */
 		if (de->d_name[0] == '.')
@@ -558,36 +568,26 @@ populate_paths(directory_fctx *fctx, int recurse)
 
 		/* Get the file info */
 		snprintf(path, sizeof(path), "%s/%s", location, de->d_name);
-		if (stat(path, &attrib) < 0)
+		if (stat(path, &fctx->stat) < 0)
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not stat file \"%s\": %m", path)));
 
 		/* Ignore anything but regular files, or dirs, if requested */
-		if (recurse == -1)
+		if (!fctx->dir_ok)
 			; /* Do nothing, fall through */
-		else if (S_ISDIR(attrib.st_mode))
+		else if (S_ISDIR(fctx->stat.st_mode))
 		{
-			/* Reallocate location and dirdesc whenever recursing */
-			fctx->location = lappend(fctx->location, path);
+			/* Reallocate location and dirdesc whenever descending */
+			MemoryContext oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+			fctx->location = lappend(fctx->location, pstrdup(path));
 			fctx->dirdesc = lappend(fctx->dirdesc, AllocateDir(path));
-			populate_paths(fctx, 1+recurse);
-			FreeDir(llast(fctx->dirdesc));
-			list_delete_last(fctx->dirdesc);
-		} else if (!S_ISREG(attrib.st_mode))
+			MemoryContextSwitchTo(oldcontext);
+		} else if (!S_ISREG(fctx->stat.st_mode))
 			continue;
 
-		/* Reallocate path and stat if npaths is a power of two */
-		if (!(fctx->npaths & (fctx->npaths-1)))
-		{
-			int nalloc = fctx->npaths ? 2 * fctx->npaths : 1;
-			fctx->path = repalloc(fctx->path, nalloc*sizeof(*fctx->path));
-			fctx->stat = repalloc(fctx->stat, nalloc*sizeof(*fctx->stat));
-		}
-
-		fctx->path[fctx->npaths] = pstrdup(path);
-		fctx->stat[fctx->npaths] = attrib;
-		fctx->npaths++;
+		fctx->path = pstrdup(path);
+		return 1;
 	}
 }
 
@@ -623,9 +623,7 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok, bool
 
 		fctx->location = lappend(NIL, pstrdup(dir));
 		fctx->dirdesc = lappend(NIL, AllocateDir(dir));
-		fctx->npaths = 0;
-		fctx->path = palloc(sizeof(*fctx->path));
-		fctx->stat = palloc(sizeof(*fctx->stat));
+		fctx->dir_ok = dir_ok;
 
 		if (!fctx->dirdesc)
 		{
@@ -641,8 +639,6 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok, bool
 								dir)));
 		}
 
-		populate_paths(fctx, dir_ok ? 0 : -1);
-		funcctx->max_calls = fctx->npaths;
 		funcctx->user_fctx = fctx;
 		MemoryContextSwitchTo(oldcontext);
 	}
@@ -650,35 +646,26 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok, bool
 	funcctx = SRF_PERCALL_SETUP();
 	fctx = (directory_fctx *) funcctx->user_fctx;
 
-	/*
-	 * Having already stat()ed all files, each call now just returns the
-	 * nth element.
-	 */
-	if (funcctx->call_cntr < funcctx->max_calls)
+	if (populate_paths(fctx, funcctx))
 	{
-		struct stat	*stat = &fctx->stat[funcctx->call_cntr];
 		Datum		values[4];
 		bool		nulls[4] = {0};
 		HeapTuple	tuple;
 
-		values[0] = CStringGetTextDatum(fctx->path[funcctx->call_cntr]);
-		values[1] = Int64GetDatum((int64) stat->st_size);
-		values[2] = TimestampTzGetDatum(time_t_to_timestamptz(stat->st_mtime));
+		values[0] = CStringGetTextDatum(fctx->path);
+		values[1] = Int64GetDatum((int64) fctx->stat.st_size);
+		values[2] = TimestampTzGetDatum(time_t_to_timestamptz(fctx->stat.st_mtime));
 		if (dir_ok)
-			values[3] = BoolGetDatum(S_ISDIR(stat->st_mode));
+			values[3] = BoolGetDatum(S_ISDIR(fctx->stat.st_mode));
 
 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+		pfree(fctx->path);
 		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
 	}
 
-	for (int i = 0; i < funcctx->max_calls; ++i)
-		pfree(fctx->path[i]);
-
-	pfree(fctx->path);
-	pfree(fctx->stat);
 	FreeDir(linitial(fctx->dirdesc));
 	list_free(fctx->dirdesc);
-	list_free(fctx->location);
+	list_free_deep(fctx->location);
 	SRF_RETURN_DONE(funcctx);
 }
 
-- 
2.17.0

