From b8d31613acfc9d554640b4e292f366d79084bb8e Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Wed, 18 Oct 2023 16:18:58 -0700
Subject: [PATCH v3 2/3] heavily wip: update typedefs

---
 meson.build                           | 14 +++++-
 src/backend/jit/llvm/meson.build      |  2 +-
 src/backend/snowball/meson.build      |  2 +-
 src/timezone/meson.build              |  2 +-
 src/tools/pgindent/merge_typedefs     | 24 +++++++++
 src/tools/pgindent/meson.build        | 70 +++++++++++++++++++++++++++
 src/tools/pgindent/typedefs_from_objs | 42 ++++++++++++++++
 7 files changed, 152 insertions(+), 4 deletions(-)
 create mode 100755 src/tools/pgindent/merge_typedefs
 create mode 100644 src/tools/pgindent/meson.build
 create mode 100755 src/tools/pgindent/typedefs_from_objs

diff --git a/meson.build b/meson.build
index 2cfeb60eb07..70f6b680c2c 100644
--- a/meson.build
+++ b/meson.build
@@ -2579,6 +2579,7 @@ add_project_link_arguments(ldflags, language: ['c', 'cpp'])
 
 # list of targets for various alias targets
 backend_targets = []
+data_targets = []
 bin_targets = []
 pl_targets = []
 contrib_targets = []
@@ -2894,6 +2895,8 @@ subdir('src/interfaces/ecpg/test')
 
 subdir('doc/src/sgml')
 
+subdir('src/tools/pgindent')
+
 generated_sources_ac += {'': ['GNUmakefile']}
 
 # After processing src/test, add test_install_libs to the testprep_targets
@@ -2982,6 +2985,7 @@ endif
 all_built = [
   backend_targets,
   bin_targets,
+  data_targets,
   libpq_st,
   pl_targets,
   contrib_targets,
@@ -3017,12 +3021,20 @@ run_target('install-test-files',
 # Indentation and similar targets
 ###############################################################
 
+# If the dependencies for generating a local typedefs.list are fulfilled, we
+# use a combination of a locally built and the source tree's typededefs.list
+# file (see src/tools/pgindent/meson.build) for reindenting. That ensures
+# newly added typedefs are indented correctly.
 indent_base_cmd = [perl, files('src/tools/pgindent/pgindent'),
     '--indent', pg_bsd_indent.full_path(),
     '--sourcetree=@SOURCE_ROOT@']
 indent_depend = [pg_bsd_indent]
 
-# reindent the entire tree
+if typedefs_supported
+  indent_base_cmd += ['--typedefs', typedefs_merged.full_path()]
+  indent_depend += typedefs_merged
+endif
+
 # Reindent the entire tree
 run_target('indent-tree',
   command: indent_base_cmd + ['.'],
diff --git a/src/backend/jit/llvm/meson.build b/src/backend/jit/llvm/meson.build
index 8ffaf414609..7e2f6d5bd5c 100644
--- a/src/backend/jit/llvm/meson.build
+++ b/src/backend/jit/llvm/meson.build
@@ -82,4 +82,4 @@ llvmjit_types = custom_target('llvmjit_types.bc',
   install_dir: dir_lib_pkg,
   depfile: '@BASENAME@.c.bc.d',
 )
-backend_targets += llvmjit_types
+data_targets += llvmjit_types
diff --git a/src/backend/snowball/meson.build b/src/backend/snowball/meson.build
index 0f669c0bf3c..69e777399fd 100644
--- a/src/backend/snowball/meson.build
+++ b/src/backend/snowball/meson.build
@@ -94,4 +94,4 @@ install_subdir('stopwords',
 )
 
 backend_targets += dict_snowball
-backend_targets += snowball_create
+data_targets += snowball_create
diff --git a/src/timezone/meson.build b/src/timezone/meson.build
index 7b85a01c6bd..779a51d85a4 100644
--- a/src/timezone/meson.build
+++ b/src/timezone/meson.build
@@ -50,7 +50,7 @@ if get_option('system_tzdata') == ''
     install_dir: dir_data,
   )
 
-  bin_targets += tzdata
+  data_targets += tzdata
 endif
 
 subdir('tznames')
diff --git a/src/tools/pgindent/merge_typedefs b/src/tools/pgindent/merge_typedefs
new file mode 100755
index 00000000000..c2e1f446c15
--- /dev/null
+++ b/src/tools/pgindent/merge_typedefs
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+
+# Tool to merge multiple typedefs files into one
+
+import argparse
+import os
+import subprocess
+import sys
+
+parser = argparse.ArgumentParser(
+    description='merge multiple typedef files')
+
+parser.add_argument('--output', type=argparse.FileType('w'), required=True)
+parser.add_argument('input', type=argparse.FileType('r'), nargs='*')
+
+args = parser.parse_args()
+
+typedefs = set()
+
+for input in args.input:
+    for typedef in input.readlines():
+        typedefs.add(typedef.strip())
+
+print('\n'.join(sorted(typedefs)), file=args.output)
diff --git a/src/tools/pgindent/meson.build b/src/tools/pgindent/meson.build
new file mode 100644
index 00000000000..c0801a6c86c
--- /dev/null
+++ b/src/tools/pgindent/meson.build
@@ -0,0 +1,70 @@
+# Currently the code requires meson 0.63 or upwards. This could likely be
+# lifted (in fact, some older versions actually work, but only some), but for
+# now it's not crucial.
+typedefs_supported = meson.version().version_compare('>=0.63')
+if not typedefs_supported
+  subdir_done()
+endif
+
+typedefs_from_objs = files('typedefs_from_objs')
+typedefs_from_objs_cmd = [typedefs_from_objs, '--host', host_system, '--output', '@OUTPUT@', '@INPUT@']
+merge_typedefs = files('merge_typedefs')
+merge_typedefs_cmd = [merge_typedefs, '--output', '@OUTPUT@', '@INPUT@']
+
+# XXX: This list of targets should likely not be maintained here
+typedef_src_tgts = [
+  backend_targets,
+  bin_targets,
+  [libpq_st],
+  pl_targets,
+  contrib_targets,
+  ecpg_targets,
+]
+
+# We generate partial typedefs files for each binary/library. That makes using
+# this during incremental development much faster.
+#
+# The reason we process the object files instead of executables is that that
+# doesn't work on macos. There doesn't seem to be a reason to target
+# executables directly on other platforms.
+typedef_tgts = []
+foreach tgts : typedef_src_tgts
+  foreach tgt : tgts
+    # can't use tgt.name(), as we have have targets that differ just in suffix
+    name = fs.name(tgt.full_path())
+    tdname = 'typedefs.list.local-@0@'.format(name)
+    objs = tgt.extract_all_objects(recursive: true)
+    typedef_tgts += custom_target(tdname,
+                                  output: tdname,
+                                  command: typedefs_from_objs_cmd,
+                                  input: objs,
+                                  build_by_default: false,
+                                  )
+
+  endforeach
+endforeach
+
+# A typedefs.list file for the local build tree. This may not contain typedefs
+# referenced e.g. by platform specific code for other platforms.
+typedefs_local = custom_target('typedefs.list.local', output: 'typedefs.list.local',
+  command: merge_typedefs_cmd,
+  input: typedef_tgts,
+  build_by_default: false,
+)
+
+# The locally generated typedef.list, merge with the source tree version. This
+# is useful for reindenting code, particularly when the local tree has new
+# types added.
+typedefs_merged = custom_target('typedefs.list.merged', output: 'typedefs.list.merged',
+  command: merge_typedefs_cmd,
+  input: [typedefs_local, files('typedefs.list')],
+  build_by_default: false,
+)
+
+# FIXME: As-is this is rarely useful, as the local typedefs file will likely
+# contain too many changes.
+if cp.found()
+  run_target('update-typedefs',
+    command: [cp, typedefs_merged, '@SOURCE_ROOT@/src/tools/pgindent/typedefs.list'],
+  )
+endif
diff --git a/src/tools/pgindent/typedefs_from_objs b/src/tools/pgindent/typedefs_from_objs
new file mode 100755
index 00000000000..821c84cb3d8
--- /dev/null
+++ b/src/tools/pgindent/typedefs_from_objs
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+
+# Tool to extract typedefs from object files.
+
+import argparse
+import re
+import shutil
+import subprocess
+import sys
+
+parser = argparse.ArgumentParser(
+    description='generate typedefs file for a set of object files')
+
+parser.add_argument('--output', type=argparse.FileType('w'), required=True)
+parser.add_argument('--host', type=str, required=True)
+parser.add_argument('input', nargs='*')
+
+args = parser.parse_args()
+
+if args.host == 'linux':
+    find_td_re = re.compile(r'^[^\n]+TAG_typedef\)\n[^\n]+DW_AT_name\s*:\s*\([^\)]+\): ([^\n]+)$', re.MULTILINE)
+    # FIXME: should probably be set by the caller? Except that the same binary
+    # name behaves very differently on different platforms :/
+    cmd = [shutil.which('objdump'), '-Wi']
+elif args.host == 'darwin':
+    find_td_re = re.compile(r'^[^\n]+TAG_typedef\n\s*DW_AT_type[^\n]+\n\s+DW_AT_name\s*\(\"([^\n]+)\"\)$', re.MULTILINE)
+    cmd = [shutil.which('dwarfdump')]
+else:
+    raise f'unsupported platform: {args.host}'
+
+lcmd = cmd + args.input
+sp = subprocess.run(lcmd, stdout=subprocess.PIPE, universal_newlines=True)
+if sp.returncode != 0:
+    print(f'{lcmd} failed with return code {sp.returncode}', file=sys.stderr)
+    sys.exit(sp.returncode)
+
+fa = find_td_re.findall(sp.stdout)
+
+for typedef in fa:
+    print(typedef, file=args.output)
+
+sys.exit(0)
-- 
2.38.0

