From 2437276b9c7525981d4a70b804c81021b2f5fa1f Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sat, 14 Dec 2019 16:22:15 -0600
Subject: [PATCH v6 2/4] pg_ls_tmpdir to show directories

See also 9cd92d1a33699f86aa53d44ab04cc3eb50c18d11
---
 doc/src/sgml/func.sgml           |  13 +--
 src/backend/utils/adt/genfile.c  | 133 +++++++++++++++++++++++--------
 src/include/catalog/catversion.h |   2 +-
 src/include/catalog/pg_proc.dat  |   8 +-
 4 files changed, 114 insertions(+), 42 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 323366feb6..35abff16c9 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -21382,8 +21382,9 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
        </entry>
        <entry><type>setof record</type></entry>
        <entry>
-        List the name, size, and last modification time of files in the
-        temporary directory for <parameter>tablespace</parameter>.  If
+        For the temporary directory for <parameter>tablespace</parameter>,
+        list each file's name, size, last modification time, and boolean
+        indicating if it is a directory.  Directories are shown recursively.  If
         <parameter>tablespace</parameter> is not provided, the
         <literal>pg_default</literal> tablespace is used.  Access is granted
         to members of the <literal>pg_monitor</literal> role and may be
@@ -21479,9 +21480,11 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
     <primary>pg_ls_tmpdir</primary>
    </indexterm>
    <para>
-    <function>pg_ls_tmpdir</function> returns the name, size, and last modified
-    time (mtime) of each file in the temporary file directory for the specified
-    <parameter>tablespace</parameter>.  If <parameter>tablespace</parameter> is
+    <function>pg_ls_tmpdir</function> lists each file in the temporary file
+    directory for the specified <parameter>tablespace</parameter>, along with
+    its size, last modified time (mtime) and boolean indicating if it is a
+    directory.  Directories are used for temporary files used by parallel
+    processes, and are shown recursively.  If <parameter>tablespace</parameter> is
     not provided, the <literal>pg_default</literal> tablespace is used.  By
     default only superusers and members of the <literal>pg_monitor</literal>
     role can use this function.  Access may be granted to others using
diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c
index 897b11a77d..c5148f547b 100644
--- a/src/backend/utils/adt/genfile.c
+++ b/src/backend/utils/adt/genfile.c
@@ -40,6 +40,11 @@ typedef struct
 	char	   *location;
 	DIR		   *dirdesc;
 	bool		include_dot_dirs;
+
+	/* Used in ls_dir_files: */
+	int			npaths;
+	char		**path;
+	struct stat	*stat;
 } directory_fctx;
 
 
@@ -522,12 +527,73 @@ pg_ls_dir_1arg(PG_FUNCTION_ARGS)
 	return pg_ls_dir(fcinfo);
 }
 
-/* Generic function to return a directory listing of files */
+/*
+ * Populate fctx with list of pathnames and stat structues.  Generate a full
+ * list of the stat structures all at once - the alternative is to re-stat
+ * everything later on, which then requires somehow handling cases like the 2nd
+ * stat fails, or the 2nd stat returns a dir which didn't exist at the time we
+ * originally looked, or existed but wasn't a dir.  stat()ing everything all at
+ * once gets as close as we can to a consistent view of the filesystem, from
+ * which files might be removed or renamed or reordered.
+ */
+static void
+populate_paths(directory_fctx *fctx, bool recurse)
+{
+	struct dirent *de;
+	struct stat	attrib;
+	while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL)
+	{
+		char		path[MAXPGPATH];
+
+		/* Skip hidden files */
+		if (de->d_name[0] == '.')
+			continue;
+
+		/* Get the file info */
+		snprintf(path, sizeof(path), "%s/%s", fctx->location, de->d_name);
+		if (stat(path, &attrib) < 0)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not stat file \"%s\": %m", path)));
+
+		/* Ignore anything but regular files, or dirs, if requested */
+		if (S_ISDIR(attrib.st_mode))
+		{
+			/* Save current dir while recursing */
+			directory_fctx oldfctx = *fctx;
+
+			fctx->location = path;
+			fctx->dirdesc = AllocateDir(path);
+
+			if (recurse)
+				populate_paths(fctx, recurse);
+
+			/* Restore previous dir */
+			FreeDir(fctx->dirdesc);
+			fctx->dirdesc = oldfctx.dirdesc;
+			fctx->location = oldfctx.location;
+		} else if (!S_ISREG(attrib.st_mode))
+			continue;
+
+		/* Reallocate path and stat if npaths is a power of two */
+		if (!(fctx->npaths & (fctx->npaths-1)))
+		{
+			int nalloc = fctx->npaths ? 2 * fctx->npaths : 1;
+			fctx->path = repalloc(fctx->path, nalloc*sizeof(*fctx->path));
+			fctx->stat = repalloc(fctx->stat, nalloc*sizeof(*fctx->stat));
+		}
+
+		fctx->path[fctx->npaths] = pstrdup(path);
+		fctx->stat[fctx->npaths] = attrib;
+		fctx->npaths++;
+	}
+}
+
+/* Generic function to return a directory listing of files (and optionally dirs) */
 static Datum
-pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
+pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok, bool dir_ok)
 {
 	FuncCallContext *funcctx;
-	struct dirent *de;
 	directory_fctx *fctx;
 
 	if (SRF_IS_FIRSTCALL())
@@ -540,17 +606,24 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
 
 		fctx = palloc(sizeof(directory_fctx));
 
-		tupdesc = CreateTemplateTupleDesc(3);
+		tupdesc = CreateTemplateTupleDesc(dir_ok ? 4:3);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
 						   TEXTOID, -1, 0);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "size",
 						   INT8OID, -1, 0);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "modification",
 						   TIMESTAMPTZOID, -1, 0);
+		if (dir_ok)
+			TupleDescInitEntry(tupdesc, (AttrNumber) 4, "isdir",
+						   BOOLOID, -1, 0);
+
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
 		fctx->location = pstrdup(dir);
 		fctx->dirdesc = AllocateDir(fctx->location);
+		fctx->npaths = 0;
+		fctx->path = palloc(sizeof(*fctx->path));
+		fctx->stat = palloc(sizeof(*fctx->stat));
 
 		if (!fctx->dirdesc)
 		{
@@ -566,6 +639,8 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
 								fctx->location)));
 		}
 
+		populate_paths(fctx, dir_ok);
+		funcctx->max_calls = fctx->npaths;
 		funcctx->user_fctx = fctx;
 		MemoryContextSwitchTo(oldcontext);
 	}
@@ -573,38 +648,32 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
 	funcctx = SRF_PERCALL_SETUP();
 	fctx = (directory_fctx *) funcctx->user_fctx;
 
-	while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL)
+	/*
+	 * Having already stat()ed all files, each call now just returns the
+	 * nth element.
+	 */
+	if (funcctx->call_cntr < funcctx->max_calls)
 	{
-		Datum		values[3];
-		bool		nulls[3];
-		char		path[MAXPGPATH * 2];
-		struct stat attrib;
+		struct stat	*stat = &fctx->stat[funcctx->call_cntr];
+		Datum		values[4];
+		bool		nulls[4] = {0};
 		HeapTuple	tuple;
 
-		/* Skip hidden files */
-		if (de->d_name[0] == '.')
-			continue;
-
-		/* Get the file info */
-		snprintf(path, sizeof(path), "%s/%s", fctx->location, de->d_name);
-		if (stat(path, &attrib) < 0)
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not stat file \"%s\": %m", path)));
-
-		/* Ignore anything but regular files */
-		if (!S_ISREG(attrib.st_mode))
-			continue;
-
-		values[0] = CStringGetTextDatum(de->d_name);
-		values[1] = Int64GetDatum((int64) attrib.st_size);
-		values[2] = TimestampTzGetDatum(time_t_to_timestamptz(attrib.st_mtime));
-		memset(nulls, 0, sizeof(nulls));
+		values[0] = CStringGetTextDatum(fctx->path[funcctx->call_cntr]);
+		values[1] = Int64GetDatum((int64) stat->st_size);
+		values[2] = TimestampTzGetDatum(time_t_to_timestamptz(stat->st_mtime));
+		if (dir_ok)
+			values[3] = BoolGetDatum(S_ISDIR(stat->st_mode));
 
 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
 		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
 	}
 
+	for (int i = 0; i < funcctx->max_calls; ++i)
+		pfree(fctx->path[i]);
+
+	pfree(fctx->path);
+	pfree(fctx->stat);
 	FreeDir(fctx->dirdesc);
 	SRF_RETURN_DONE(funcctx);
 }
@@ -613,14 +682,14 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
 Datum
 pg_ls_logdir(PG_FUNCTION_ARGS)
 {
-	return pg_ls_dir_files(fcinfo, Log_directory, false);
+	return pg_ls_dir_files(fcinfo, Log_directory, false, false);
 }
 
 /* Function to return the list of files in the WAL directory */
 Datum
 pg_ls_waldir(PG_FUNCTION_ARGS)
 {
-	return pg_ls_dir_files(fcinfo, XLOGDIR, false);
+	return pg_ls_dir_files(fcinfo, XLOGDIR, false, false);
 }
 
 /*
@@ -638,7 +707,7 @@ pg_ls_tmpdir(FunctionCallInfo fcinfo, Oid tblspc)
 						tblspc)));
 
 	TempTablespacePath(path, tblspc);
-	return pg_ls_dir_files(fcinfo, path, true);
+	return pg_ls_dir_files(fcinfo, path, true, true);
 }
 
 /*
@@ -667,5 +736,5 @@ pg_ls_tmpdir_1arg(PG_FUNCTION_ARGS)
 Datum
 pg_ls_archive_statusdir(PG_FUNCTION_ARGS)
 {
-	return pg_ls_dir_files(fcinfo, XLOGDIR "/archive_status", true);
+	return pg_ls_dir_files(fcinfo, XLOGDIR "/archive_status", true, false);
 }
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index d4fe84a037..6e1e2b961b 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202003031
+#define CATALOG_VERSION_NO	202003032
 
 #endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 07a86c7b7b..0dbce4ba09 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -10739,13 +10739,13 @@
 { oid => '5029', descr => 'list files in the pgsql_tmp directory',
   proname => 'pg_ls_tmpdir', procost => '10', prorows => '20', proretset => 't',
   provolatile => 'v', prorettype => 'record', proargtypes => '',
-  proallargtypes => '{text,int8,timestamptz}', proargmodes => '{o,o,o}',
-  proargnames => '{name,size,modification}', prosrc => 'pg_ls_tmpdir_noargs' },
+  proallargtypes => '{text,int8,timestamptz,bool}', proargmodes => '{o,o,o,o}',
+  proargnames => '{name,size,modification,isdir}', prosrc => 'pg_ls_tmpdir_noargs' },
 { oid => '5030', descr => 'list files in the pgsql_tmp directory',
   proname => 'pg_ls_tmpdir', procost => '10', prorows => '20', proretset => 't',
   provolatile => 'v', prorettype => 'record', proargtypes => 'oid',
-  proallargtypes => '{oid,text,int8,timestamptz}', proargmodes => '{i,o,o,o}',
-  proargnames => '{tablespace,name,size,modification}',
+  proallargtypes => '{oid,text,int8,timestamptz,bool}', proargmodes => '{i,o,o,o,o}',
+  proargnames => '{tablespace,name,size,modification,isdir}',
   prosrc => 'pg_ls_tmpdir_1arg' },
 
 # hash partitioning constraint function
-- 
2.17.0

