Improve docs syntax checking and enable it in the meson build

Started by Nazir Bilal Yavuz3 months ago6 messages
#1Nazir Bilal Yavuz
byavuz81@gmail.com
1 attachment(s)

Hi,

The Meson build did not include tab and non-breaking space checks for
the docs. The attached patch adds these checks and includes a few
related improvements.

This topic was previously discussed towards end of the another thread
[1]: /messages/by-id/CACJufxFgAh1--EMwOjMuANe=VTmjkNaZjH+AzSe04-8ZCGiESA@mail.gmail.com
thread for it, so I am continuing the discussion here.

These checks were previously done in the Makefile:

```
# tabs are harmless, but it is best to avoid them in SGML files
check-tabs:
@( ! grep ' ' $(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml
$(srcdir)/ref/*.sgml $(srcdir)/*.xsl) ) || \
(echo "Tabs appear in SGML/XML files" 1>&2; exit 1)

# Non-breaking spaces are harmless, but it is best to avoid them in SGML files.
# Use perl command because non-GNU grep or sed could not have hex
escape sequence.
check-nbsp:
@ ( $(PERL) -ne '/\xC2\xA0/ and print("$$ARGV:$$_"),$$n++; END
{exit($$n>0)}' \
$(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml
$(srcdir)/ref/*.sgml $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \
(echo "Non-breaking spaces appear in SGML/XML files" 1>&2; exit 1)
```

I moved these checks to a new Perl script called sgml_syntax_check.pl.
This script can also perform xmllint validation (when possible).

Here is a summary of the changes:

1 - A new sgml_syntax_check.pl script was added to handle tab, nbsp,
and xmllint validation checks.
1.1 - It is registered as the sgml_syntax_check test in the Meson build.
1.2 - These checks are run when executing 'make check' or 'meson test
sgml_syntax_check' commands.
1.3 - During the creation of postgres-full.xml, the script performs
tab and nbsp checks. The xmllint check is skipped there, since
validation is already handled by the --valid option. So, we do not run
the same check twice.

2 - The sgml_syntax_check test runs by default in the Meson build.
2.1 - Tab and nbsp checks always run.
2.2 - The xmllint validation and the test are skipped if the DocBook
can not be found. I was not able to achieve the same behavior in the
autoconf build, so the test is not run by default there. The Make
build continues to work as before, you can run the checks manually via
make check in doc/src/sgml.

[1]: /messages/by-id/CACJufxFgAh1--EMwOjMuANe=VTmjkNaZjH+AzSe04-8ZCGiESA@mail.gmail.com

--
Regards,
Nazir Bilal Yavuz
Microsoft

Attachments:

v5-0001-Improve-docs-syntax-checking.patchtext/x-patch; charset=US-ASCII; name=v5-0001-Improve-docs-syntax-checking.patchDownload
From 75005289c720c5283b909a15d05ffc7c5bbf6bdc Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Tue, 7 Oct 2025 15:18:03 +0300
Subject: [PATCH v5] Improve docs syntax checking

Move the checks out of the Makefile into a perl script that can be
called from both the Makefile and meson.build. The set of files checked
is simplified, so it is just all the sgml and xsl files found in
docs/src/sgml directory tree.

Along the way make some adjustments to .cirrus.tasks.yml to support this
better in CI.

Also ensure that the checks are run while generating the
postgres-full.xml.

Author: Nazir Bilal Yavuz <byavuz81@gmail.com>
Co-Author: Andrew Dunstan <andrew@dunslane.net>
---
 doc/src/sgml/Makefile             |  17 +----
 doc/src/sgml/docbook-test.sgml    |   9 +++
 doc/src/sgml/meson.build          |  32 +++++++++-
 doc/src/sgml/sgml_syntax_check.pl | 100 ++++++++++++++++++++++++++++++
 .cirrus.tasks.yml                 |   3 +
 5 files changed, 146 insertions(+), 15 deletions(-)
 create mode 100644 doc/src/sgml/docbook-test.sgml
 create mode 100755 doc/src/sgml/sgml_syntax_check.pl

diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile
index b53b2694a6b..9b6f65ad215 100644
--- a/doc/src/sgml/Makefile
+++ b/doc/src/sgml/Makefile
@@ -69,6 +69,7 @@ ALL_IMAGES := $(wildcard $(srcdir)/images/*.svg)
 # files into one big file).  This helps tools that don't understand
 # vpath builds (such as dbtoepub).
 postgres-full.xml: postgres.sgml $(ALL_SGML)
+	$(PERL) $(srcdir)/sgml_syntax_check.pl --srcdir $(srcdir)
 	$(XMLLINT) $(XMLINCLUDE) --output $@ --noent --valid $<
 
 
@@ -200,8 +201,8 @@ MAKEINFO = makeinfo
 ##
 
 # Quick syntax check without style processing
-check: postgres.sgml $(ALL_SGML) check-tabs check-nbsp
-	$(XMLLINT) $(XMLINCLUDE) --noout --valid $<
+check: postgres.sgml $(ALL_SGML)
+	$(PERL) $(srcdir)/sgml_syntax_check.pl --xmllint "$(XMLLINT)" --xmllint_input $< --srcdir $(srcdir)
 
 
 ##
@@ -261,18 +262,6 @@ clean-man:
 
 endif # sqlmansectnum != 7
 
-# tabs are harmless, but it is best to avoid them in SGML files
-check-tabs:
-	@( ! grep '	' $(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl) ) || \
-	(echo "Tabs appear in SGML/XML files" 1>&2;  exit 1)
-
-# Non-breaking spaces are harmless, but it is best to avoid them in SGML files.
-# Use perl command because non-GNU grep or sed could not have hex escape sequence.
-check-nbsp:
-	@ ( $(PERL) -ne '/\xC2\xA0/ and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \
-	  $(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \
-	(echo "Non-breaking spaces appear in SGML/XML files" 1>&2;  exit 1)
-
 ##
 ## Clean
 ##
diff --git a/doc/src/sgml/docbook-test.sgml b/doc/src/sgml/docbook-test.sgml
new file mode 100644
index 00000000000..242a52676e0
--- /dev/null
+++ b/doc/src/sgml/docbook-test.sgml
@@ -0,0 +1,9 @@
+<!-- doc/src/sgml/docbook-test.sgml -->
+
+<!-- This file is used to check if the DocBook can be found -->
+
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+  "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<book>
+  <title>DocBook Test</title>
+</book>
diff --git a/doc/src/sgml/meson.build b/doc/src/sgml/meson.build
index 6ae192eac68..312f5bb9fea 100644
--- a/doc/src/sgml/meson.build
+++ b/doc/src/sgml/meson.build
@@ -96,6 +96,18 @@ xmllint = xmltools_wrapper + [
   '--tool', xmllint_bin, '--',
 ]
 
+sgml_syntax_check = files(
+  'sgml_syntax_check.pl'
+)
+
+sgml_syntax_check_command = [
+  sgml_syntax_check,
+  '--srcdir',
+    meson.current_source_dir(),
+  '--builddir',
+    meson.current_build_dir(),
+  ]
+
 # Run validation only once, common to all subsequent targets.  While
 # we're at it, also resolve all entities (that is, copy all included
 # files into one big file).  This helps tools that don't understand
@@ -104,7 +116,9 @@ postgres_full_xml = custom_target('postgres-full.xml',
   input: 'postgres.sgml',
   output: 'postgres-full.xml',
   depfile: 'postgres-full.xml.d',
-  command: [xmllint, '--nonet', '--noent', '--valid',
+  command: [perl, sgml_syntax_check_command] +
+           ['&&'] +
+           [xmllint, '--nonet', '--noent', '--valid',
             '--path', '@OUTDIR@', '-o', '@OUTPUT@', '@INPUT@'],
   depends: doc_generated,
   build_by_default: false,
@@ -306,3 +320,19 @@ endif
 if alldocs.length() != 0
   alias_target('alldocs', alldocs)
 endif
+
+test(
+  'sgml_syntax_check',
+  perl,
+  protocol: 'exitcode',
+  suite: 'doc',
+  args: sgml_syntax_check_command + [
+    '--xmllint',
+      '@0@ --nonet'.format(xmllint_bin.full_path()),
+    '--xmllint_input',
+      'postgres.sgml'
+  ],
+  depends: doc_generated
+)
+
+testprep_targets += doc_generated
diff --git a/doc/src/sgml/sgml_syntax_check.pl b/doc/src/sgml/sgml_syntax_check.pl
new file mode 100755
index 00000000000..473c6bcc033
--- /dev/null
+++ b/doc/src/sgml/sgml_syntax_check.pl
@@ -0,0 +1,100 @@
+# /usr/bin/perl
+
+# Copyright (c) 2025, PostgreSQL Global Development Group
+
+# doc/src/sgml/sgml_syntax_check.pl
+
+use strict;
+use warnings FATAL => 'all';
+use Getopt::Long;
+
+use File::Find;
+
+my $xmllint;
+my $srcdir;
+my $builddir;
+my $xmllint_input;
+
+GetOptions(
+	'srcdir:s' => \$srcdir,
+	'builddir:s' => \$builddir,
+	'xmllint:s' => \$xmllint,
+	'xmllint_input:s' => \$xmllint_input) or die "$0: wrong arguments";
+
+die "$0: --srcdir must be specified\n" unless defined $srcdir;
+
+my $xmlinclude = "--path . --path $srcdir";
+$xmlinclude .= " --path $builddir" if defined $builddir;
+
+# find files to process - all the sgml and xsl files (including in subdirectories)
+my @files_to_process;
+my @dirs_to_search = ($srcdir);
+push @dirs_to_search, $builddir if defined $builddir;
+find(
+	sub {
+		return unless -f $_;
+		return if $_ !~ /\.(sgml|xsl)$/;
+		push @files_to_process, $File::Find::name;
+	},
+	@dirs_to_search,);
+
+# tabs and non-breaking spaces are harmless, but it is best to avoid them in SGML files
+sub check_tabs_and_nbsp
+{
+	my $errors = 0;
+	for my $f (@files_to_process)
+	{
+		open my $fh, "<:encoding(UTF-8)", $f or die "Can't open $f: $!";
+		my $line_no = 0;
+		while (<$fh>)
+		{
+			$line_no++;
+			if (/\t/)
+			{
+				print STDERR "Tab found in $f:$line_no\n";
+				$errors++;
+			}
+			if (/\xC2\xA0/)
+			{
+				print STDERR "$f:$line_no: contains non-breaking space\n";
+				$errors++;
+			}
+		}
+		close($fh);
+	}
+
+	if ($errors)
+	{
+		die "Tabs and/or non-breaking spaces appear in SGML/XML files\n";
+	}
+}
+
+sub test_docbook
+{
+	my $cmd = "$xmllint $xmlinclude --noout --valid docbook-test.sgml";
+	if (system($cmd) != 0)
+	{
+		print STDERR "DocBook DTD file can not be found\n";
+		# Exit code 77 is used for skipping the test instead of failing it
+		exit(77);
+	}
+}
+
+sub run_xmllint
+{
+	# Check DocBook DTD file before running xmllint
+	test_docbook();
+
+	my $cmd = "$xmllint $xmlinclude --noout --valid $xmllint_input";
+	system($cmd) == 0 or die "xmllint validation failed\n";
+}
+
+check_tabs_and_nbsp();
+
+# Run xmllint check only if its arguments are passed. Because generation of
+# postgres-full.xml already does xmllint check, we don't want to do this check
+# twice.
+if ($xmllint && $xmllint_input)
+{
+	run_xmllint();
+}
diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml
index eca9d62fc22..1c937247a9a 100644
--- a/.cirrus.tasks.yml
+++ b/.cirrus.tasks.yml
@@ -627,6 +627,8 @@ task:
     TEST_JOBS: 8
     IMAGE: ghcr.io/cirruslabs/macos-runner:sonoma
 
+    XML_CATALOG_FILES: /opt/local/share/xml/docbook/4.5/catalog.xml
+
     CIRRUS_WORKING_DIR: ${HOME}/pgsql/
     CCACHE_DIR: ${HOME}/ccache
     MACPORTS_CACHE: ${HOME}/macports-cache
@@ -641,6 +643,7 @@ task:
 
     MACOS_PACKAGE_LIST: >-
       ccache
+      docbook-xml-4.5
       icu
       kerberos5
       lz4
-- 
2.51.0

#2Nazir Bilal Yavuz
byavuz81@gmail.com
In reply to: Nazir Bilal Yavuz (#1)
1 attachment(s)
Re: Improve docs syntax checking and enable it in the meson build

Hi,

On Tue, 7 Oct 2025 at 16:12, Nazir Bilal Yavuz <byavuz81@gmail.com> wrote:

The Meson build did not include tab and non-breaking space checks for
the docs. The attached patch adds these checks and includes a few
related improvements.

I have updated v6 to use a stamp file, as Andres suggested [1]/messages/by-id/tcjetkmnm4vtuyxakqvkqokvow6csjokdwwtplc5nl4zbpyjoo@jjfhsuqa6fno, to
ensure a dependency between the syntax check and the postgres-full.xml
file in the meson build.

[1]: /messages/by-id/tcjetkmnm4vtuyxakqvkqokvow6csjokdwwtplc5nl4zbpyjoo@jjfhsuqa6fno

--
Regards,
Nazir Bilal Yavuz
Microsoft

Attachments:

v6-0001-Improve-docs-syntax-checking.patchtext/x-patch; charset=US-ASCII; name=v6-0001-Improve-docs-syntax-checking.patchDownload
From 2549adca5facd392d68545c1a61b10e1cf8d4bde Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Tue, 7 Oct 2025 15:18:03 +0300
Subject: [PATCH v6] Improve docs syntax checking

Move the checks out of the Makefile into a perl script that can be
called from both the Makefile and meson.build. The set of files checked
is simplified, so it is just all the sgml and xsl files found in
docs/src/sgml directory tree.

Along the way make some adjustments to .cirrus.tasks.yml to support this
better in CI.

Also ensure that the checks are run while generating the
postgres-full.xml.

Author: Nazir Bilal Yavuz <byavuz81@gmail.com>
Co-Author: Andrew Dunstan <andrew@dunslane.net>
---
 doc/src/sgml/Makefile             |  17 +---
 doc/src/sgml/docbook-test.sgml    |   9 ++
 doc/src/sgml/meson.build          |  39 +++++++-
 doc/src/sgml/sgml_syntax_check.pl | 160 ++++++++++++++++++++++++++++++
 .cirrus.tasks.yml                 |   4 +
 5 files changed, 214 insertions(+), 15 deletions(-)
 create mode 100644 doc/src/sgml/docbook-test.sgml
 create mode 100755 doc/src/sgml/sgml_syntax_check.pl

diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile
index b53b2694a6b..9b6f65ad215 100644
--- a/doc/src/sgml/Makefile
+++ b/doc/src/sgml/Makefile
@@ -69,6 +69,7 @@ ALL_IMAGES := $(wildcard $(srcdir)/images/*.svg)
 # files into one big file).  This helps tools that don't understand
 # vpath builds (such as dbtoepub).
 postgres-full.xml: postgres.sgml $(ALL_SGML)
+	$(PERL) $(srcdir)/sgml_syntax_check.pl --srcdir $(srcdir)
 	$(XMLLINT) $(XMLINCLUDE) --output $@ --noent --valid $<
 
 
@@ -200,8 +201,8 @@ MAKEINFO = makeinfo
 ##
 
 # Quick syntax check without style processing
-check: postgres.sgml $(ALL_SGML) check-tabs check-nbsp
-	$(XMLLINT) $(XMLINCLUDE) --noout --valid $<
+check: postgres.sgml $(ALL_SGML)
+	$(PERL) $(srcdir)/sgml_syntax_check.pl --xmllint "$(XMLLINT)" --xmllint_input $< --srcdir $(srcdir)
 
 
 ##
@@ -261,18 +262,6 @@ clean-man:
 
 endif # sqlmansectnum != 7
 
-# tabs are harmless, but it is best to avoid them in SGML files
-check-tabs:
-	@( ! grep '	' $(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl) ) || \
-	(echo "Tabs appear in SGML/XML files" 1>&2;  exit 1)
-
-# Non-breaking spaces are harmless, but it is best to avoid them in SGML files.
-# Use perl command because non-GNU grep or sed could not have hex escape sequence.
-check-nbsp:
-	@ ( $(PERL) -ne '/\xC2\xA0/ and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \
-	  $(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \
-	(echo "Non-breaking spaces appear in SGML/XML files" 1>&2;  exit 1)
-
 ##
 ## Clean
 ##
diff --git a/doc/src/sgml/docbook-test.sgml b/doc/src/sgml/docbook-test.sgml
new file mode 100644
index 00000000000..242a52676e0
--- /dev/null
+++ b/doc/src/sgml/docbook-test.sgml
@@ -0,0 +1,9 @@
+<!-- doc/src/sgml/docbook-test.sgml -->
+
+<!-- This file is used to check if the DocBook can be found -->
+
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+  "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<book>
+  <title>DocBook Test</title>
+</book>
diff --git a/doc/src/sgml/meson.build b/doc/src/sgml/meson.build
index 6ae192eac68..e5b7e58f6ad 100644
--- a/doc/src/sgml/meson.build
+++ b/doc/src/sgml/meson.build
@@ -96,6 +96,26 @@ xmllint = xmltools_wrapper + [
   '--tool', xmllint_bin, '--',
 ]
 
+sgml_syntax_check = files(
+  'sgml_syntax_check.pl'
+)
+
+# Create a stamp file for sgml syntax check and make postgres_full_xml
+# depend on it.  So, this check will run before the generation of
+# postgres_full_xml.
+sgml_check_stamp = custom_target('sgml-check-stamp',
+  output: 'sgml-check-stamp',
+  depfile: 'sgml-check-stamp.d',
+  command: [
+    perl, sgml_syntax_check,
+    '--srcdir', meson.current_source_dir(),
+    '--builddir', meson.current_build_dir(),
+    '--dep_file', '@DEPFILE@',
+    '--stamp_file', '@OUTPUT@',
+  ],
+  build_by_default: false,
+)
+
 # Run validation only once, common to all subsequent targets.  While
 # we're at it, also resolve all entities (that is, copy all included
 # files into one big file).  This helps tools that don't understand
@@ -106,7 +126,7 @@ postgres_full_xml = custom_target('postgres-full.xml',
   depfile: 'postgres-full.xml.d',
   command: [xmllint, '--nonet', '--noent', '--valid',
             '--path', '@OUTDIR@', '-o', '@OUTPUT@', '@INPUT@'],
-  depends: doc_generated,
+  depends: [doc_generated, sgml_check_stamp],
   build_by_default: false,
 )
 docs += postgres_full_xml
@@ -306,3 +326,20 @@ endif
 if alldocs.length() != 0
   alias_target('alldocs', alldocs)
 endif
+
+test(
+  'sgml_syntax_check',
+  perl,
+  protocol: 'exitcode',
+  suite: 'doc',
+  args: [
+    sgml_syntax_check,
+    '--srcdir', meson.current_source_dir(),
+    '--builddir', meson.current_build_dir(),
+    '--xmllint', '@0@ --nonet'.format(xmllint_bin.full_path()),
+    '--xmllint_input', 'postgres.sgml'
+  ],
+  depends: doc_generated
+)
+
+testprep_targets += doc_generated
diff --git a/doc/src/sgml/sgml_syntax_check.pl b/doc/src/sgml/sgml_syntax_check.pl
new file mode 100755
index 00000000000..8748a42cc99
--- /dev/null
+++ b/doc/src/sgml/sgml_syntax_check.pl
@@ -0,0 +1,160 @@
+#!/usr/bin/perl
+#
+# Perl script that checks SGML/XSL syntax and formatting issues.
+#
+# doc/src/sgml/sgml_syntax_check.pl
+#
+# Copyright (c) 2025, PostgreSQL Global Development Group
+
+use strict;
+use warnings FATAL => 'all';
+use Getopt::Long;
+
+use File::Find;
+
+my $srcdir;
+my $builddir;
+my $xmllint;
+my $xmllint_input;
+my $dep_file;
+my $stamp_file;
+
+GetOptions(
+	'srcdir:s' => \$srcdir,
+	'builddir:s' => \$builddir,
+	'xmllint:s' => \$xmllint,
+	'xmllint_input:s' => \$xmllint_input,
+	'dep_file:s' => \$dep_file,
+	'stamp_file:s' => \$stamp_file) or die "$0: wrong arguments";
+
+die "$0: --srcdir must be specified\n" unless defined $srcdir;
+
+my $xmlinclude = "--path . --path $srcdir";
+$xmlinclude .= " --path $builddir" if defined $builddir;
+
+# Find files to process - all the sgml and xsl files (including in subdirectories)
+my @files_to_process;
+my @dirs_to_search = ($srcdir);
+push @dirs_to_search, $builddir if defined $builddir;
+find(
+	sub {
+		return unless -f $_;
+		return if $_ !~ /\.(sgml|xsl)$/;
+		push @files_to_process, $File::Find::name;
+	},
+	@dirs_to_search,);
+
+# tabs and non-breaking spaces are harmless, but it is best to avoid them in SGML files
+sub check_tabs_and_nbsp
+{
+	my $errors = 0;
+	for my $f (@files_to_process)
+	{
+		open my $fh, "<:encoding(UTF-8)", $f or die "Can't open $f: $!";
+		my $line_no = 0;
+		while (<$fh>)
+		{
+			$line_no++;
+			if (/\t/)
+			{
+				print STDERR "Tab found in $f:$line_no\n";
+				$errors++;
+			}
+			if (/\xC2\xA0/)
+			{
+				print STDERR "$f:$line_no: contains non-breaking space\n";
+				$errors++;
+			}
+		}
+		close($fh);
+	}
+
+	if ($errors)
+	{
+		remove_stamp_file();
+		die "Tabs and/or non-breaking spaces appear in SGML/XML files\n";
+	}
+}
+
+sub test_docbook
+{
+	my $cmd = "$xmllint $xmlinclude --noout --valid docbook-test.sgml";
+	if (system($cmd) != 0)
+	{
+		remove_stamp_file();
+
+		print STDERR "DocBook DTD file can not be found\n";
+		# Exit code 77 is used for skipping the test instead of failing it
+		exit(77);
+	}
+}
+
+sub run_xmllint
+{
+	# Check DocBook DTD file before running xmllint
+	test_docbook();
+
+	my $cmd = "$xmllint $xmlinclude --noout --valid $xmllint_input";
+	my $res = system($cmd);
+	if ($res != 0)
+	{
+		remove_stamp_file();
+		die "xmllint validation failed\n";
+	}
+}
+
+sub write_dep_file()
+{
+	if ($dep_file)
+	{
+		open my $fh, '>', $dep_file or die "can't open $dep_file: $!";
+
+		foreach my $file (@files_to_process)
+		{
+			print $fh "$dep_file : $file\n";
+		}
+
+		close $fh;
+	}
+}
+
+sub create_stamp_file
+{
+	# All checks are passed, we can create a stamp file now
+	if ($stamp_file)
+	{
+		# Avoid touching existing stamp file to prevent unnecessary rebuilds
+		if (!(-f $stamp_file))
+		{
+			open my $fh, '>', $stamp_file
+			  or die "can't open $stamp_file: $!";
+			close $fh;
+		}
+
+	}
+}
+
+sub remove_stamp_file
+{
+	if ($stamp_file)
+	{
+		unlink $stamp_file;
+	}
+}
+
+# Create a dependency file for meson build
+write_dep_file();
+
+### Do the checks
+check_tabs_and_nbsp();
+
+# Run xmllint check only if its arguments are passed. Because generation of
+# postgres-full.xml already does xmllint check, we don't want to do this check
+# twice.
+if ($xmllint && $xmllint_input)
+{
+	run_xmllint();
+}
+###
+
+create_stamp_file();
diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml
index eca9d62fc22..701a508e396 100644
--- a/.cirrus.tasks.yml
+++ b/.cirrus.tasks.yml
@@ -627,6 +627,9 @@ task:
     TEST_JOBS: 8
     IMAGE: ghcr.io/cirruslabs/macos-runner:sonoma
 
+    # Ensure DocBook 4.5 DTD is available for syntax checks
+    XML_CATALOG_FILES: /opt/local/share/xml/docbook/4.5/catalog.xml
+
     CIRRUS_WORKING_DIR: ${HOME}/pgsql/
     CCACHE_DIR: ${HOME}/ccache
     MACPORTS_CACHE: ${HOME}/macports-cache
@@ -641,6 +644,7 @@ task:
 
     MACOS_PACKAGE_LIST: >-
       ccache
+      docbook-xml-4.5
       icu
       kerberos5
       lz4
-- 
2.51.0

#3Peter Eisentraut
peter@eisentraut.org
In reply to: Nazir Bilal Yavuz (#1)
Re: Improve docs syntax checking and enable it in the meson build

On 07.10.25 15:12, Nazir Bilal Yavuz wrote:

1 - A new sgml_syntax_check.pl script was added to handle tab, nbsp,
and xmllint validation checks.
1.1 - It is registered as the sgml_syntax_check test in the Meson build.
1.2 - These checks are run when executing 'make check' or 'meson test
sgml_syntax_check' commands.
1.3 - During the creation of postgres-full.xml, the script performs
tab and nbsp checks. The xmllint check is skipped there, since
validation is already handled by the --valid option. So, we do not run
the same check twice.

I think including the xmllint support in the new sgml_syntax_check is
overkill, since the normal build already runs xmllint, or you could
alternatively just write it into the build description file (makefile or
meson.build). The build commands should be visible in the build
description file, not layered into some other script.

I suggest the following approach:

- Change sgml_syntax_check.pl into a smaller script that just checks for
tabs and nbsp. (Maybe a different name then.)

- Add a call of that script to the build of postgres-full.xml.

- Change the "check" target to just depend on postgres-full.xml, without
its own commands.

And then replicate that logic in meson.

#4Nazir Bilal Yavuz
byavuz81@gmail.com
In reply to: Peter Eisentraut (#3)
1 attachment(s)
Re: Improve docs syntax checking and enable it in the meson build

Hi,

Thank you for looking into this!

On Wed, 15 Oct 2025 at 21:32, Peter Eisentraut <peter@eisentraut.org> wrote:

On 07.10.25 15:12, Nazir Bilal Yavuz wrote:

1 - A new sgml_syntax_check.pl script was added to handle tab, nbsp,
and xmllint validation checks.
1.1 - It is registered as the sgml_syntax_check test in the Meson build.
1.2 - These checks are run when executing 'make check' or 'meson test
sgml_syntax_check' commands.
1.3 - During the creation of postgres-full.xml, the script performs
tab and nbsp checks. The xmllint check is skipped there, since
validation is already handled by the --valid option. So, we do not run
the same check twice.

I think including the xmllint support in the new sgml_syntax_check is
overkill, since the normal build already runs xmllint, or you could
alternatively just write it into the build description file (makefile or
meson.build). The build commands should be visible in the build
description file, not layered into some other script.

I suggest the following approach:

- Change sgml_syntax_check.pl into a smaller script that just checks for
tabs and nbsp. (Maybe a different name then.)

- Add a call of that script to the build of postgres-full.xml.

- Change the "check" target to just depend on postgres-full.xml, without
its own commands.

And then replicate that logic in meson.

All of these are addressed in v7. I think sgml_syntax_check is still a
suitable name but I am open to suggestions.

--
Regards,
Nazir Bilal Yavuz
Microsoft

Attachments:

v7-0001-Improve-docs-syntax-checking.patchtext/x-patch; charset=US-ASCII; name=v7-0001-Improve-docs-syntax-checking.patchDownload
From bab59770eee6ee348d20c8cb7eb1b7af5366f5df Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Tue, 21 Oct 2025 13:49:16 +0300
Subject: [PATCH v7] Improve docs syntax checking

Move the checks out of the Makefile into a perl script that can be
called from both the Makefile and meson.build. The set of files checked
is simplified, so it is just all the sgml and xsl files found in
docs/src/sgml directory tree.

Along the way make some adjustments to .cirrus.tasks.yml to support this
better in CI.

Also ensure that the checks are run while generating the
postgres-full.xml.

Author: Nazir Bilal Yavuz <byavuz81@gmail.com>
Co-Author: Andrew Dunstan <andrew@dunslane.net>
Discussion: https://postgr.es/m/CAN55FZ1qzoDcaKqsR3DwE%3DX6FL%2Bwpm%2B%3DKLvH6ahrRXNhjU53DQ%40mail.gmail.com
---
 doc/src/sgml/Makefile             |  16 +----
 doc/src/sgml/meson.build          |  34 ++++++++-
 doc/src/sgml/sgml_syntax_check.pl | 115 ++++++++++++++++++++++++++++++
 .cirrus.tasks.yml                 |   3 +
 4 files changed, 153 insertions(+), 15 deletions(-)
 create mode 100755 doc/src/sgml/sgml_syntax_check.pl

diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile
index b53b2694a6b..50de8fab931 100644
--- a/doc/src/sgml/Makefile
+++ b/doc/src/sgml/Makefile
@@ -69,6 +69,7 @@ ALL_IMAGES := $(wildcard $(srcdir)/images/*.svg)
 # files into one big file).  This helps tools that don't understand
 # vpath builds (such as dbtoepub).
 postgres-full.xml: postgres.sgml $(ALL_SGML)
+	$(PERL) $(srcdir)/sgml_syntax_check.pl --srcdir $(srcdir)
 	$(XMLLINT) $(XMLINCLUDE) --output $@ --noent --valid $<
 
 
@@ -200,8 +201,7 @@ MAKEINFO = makeinfo
 ##
 
 # Quick syntax check without style processing
-check: postgres.sgml $(ALL_SGML) check-tabs check-nbsp
-	$(XMLLINT) $(XMLINCLUDE) --noout --valid $<
+check: postgres-full.xml
 
 
 ##
@@ -261,18 +261,6 @@ clean-man:
 
 endif # sqlmansectnum != 7
 
-# tabs are harmless, but it is best to avoid them in SGML files
-check-tabs:
-	@( ! grep '	' $(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl) ) || \
-	(echo "Tabs appear in SGML/XML files" 1>&2;  exit 1)
-
-# Non-breaking spaces are harmless, but it is best to avoid them in SGML files.
-# Use perl command because non-GNU grep or sed could not have hex escape sequence.
-check-nbsp:
-	@ ( $(PERL) -ne '/\xC2\xA0/ and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \
-	  $(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \
-	(echo "Non-breaking spaces appear in SGML/XML files" 1>&2;  exit 1)
-
 ##
 ## Clean
 ##
diff --git a/doc/src/sgml/meson.build b/doc/src/sgml/meson.build
index 6ae192eac68..7895ae5e776 100644
--- a/doc/src/sgml/meson.build
+++ b/doc/src/sgml/meson.build
@@ -96,6 +96,26 @@ xmllint = xmltools_wrapper + [
   '--tool', xmllint_bin, '--',
 ]
 
+sgml_syntax_check = files(
+  'sgml_syntax_check.pl'
+)
+
+# Create a stamp file for sgml syntax check and make postgres_full_xml
+# depend on it.  So, this check will run before the generation of
+# postgres_full_xml.
+sgml_check_stamp = custom_target('sgml-check-stamp',
+  output: 'sgml-check-stamp',
+  depfile: 'sgml-check-stamp.d',
+  command: [
+    perl, sgml_syntax_check,
+    '--srcdir', meson.current_source_dir(),
+    '--builddir', meson.current_build_dir(),
+    '--dep_file', '@DEPFILE@',
+    '--stamp_file', '@OUTPUT@',
+  ],
+  build_by_default: false,
+)
+
 # Run validation only once, common to all subsequent targets.  While
 # we're at it, also resolve all entities (that is, copy all included
 # files into one big file).  This helps tools that don't understand
@@ -106,7 +126,7 @@ postgres_full_xml = custom_target('postgres-full.xml',
   depfile: 'postgres-full.xml.d',
   command: [xmllint, '--nonet', '--noent', '--valid',
             '--path', '@OUTDIR@', '-o', '@OUTPUT@', '@INPUT@'],
-  depends: doc_generated,
+  depends: [doc_generated, sgml_check_stamp],
   build_by_default: false,
 )
 docs += postgres_full_xml
@@ -306,3 +326,15 @@ endif
 if alldocs.length() != 0
   alias_target('alldocs', alldocs)
 endif
+
+# Test creation of 'postgres_full_xml' by depending on it.
+test(
+  'doc' / 'sgml_syntax_check',
+  # We actually don't need to set 'executable', as we run the test by depending
+  # on 'postgres_full_xml', but meson requires setting an executable.
+  perl,
+  suite: 'doc',
+  depends: postgres_full_xml
+)
+
+testprep_targets += doc_generated
diff --git a/doc/src/sgml/sgml_syntax_check.pl b/doc/src/sgml/sgml_syntax_check.pl
new file mode 100755
index 00000000000..55c212d9e5d
--- /dev/null
+++ b/doc/src/sgml/sgml_syntax_check.pl
@@ -0,0 +1,115 @@
+#!/usr/bin/perl
+#
+# Perl script that checks SGML/XSL syntax and formatting issues.
+#
+# doc/src/sgml/sgml_syntax_check.pl
+#
+# Copyright (c) 2025, PostgreSQL Global Development Group
+
+use strict;
+use warnings FATAL => 'all';
+use Getopt::Long;
+
+use File::Find;
+
+my $srcdir;
+my $builddir;
+my $dep_file;
+my $stamp_file;
+
+GetOptions(
+	'srcdir:s' => \$srcdir,
+	'builddir:s' => \$builddir,
+	'dep_file:s' => \$dep_file,
+	'stamp_file:s' => \$stamp_file) or die "$0: wrong arguments";
+
+die "$0: --srcdir must be specified\n" unless defined $srcdir;
+
+# Find files to process - all the sgml and xsl files (including in subdirectories)
+my @files_to_process;
+my @dirs_to_search = ($srcdir);
+push @dirs_to_search, $builddir if defined $builddir;
+find(
+	sub {
+		return unless -f $_;
+		return if $_ !~ /\.(sgml|xsl)$/;
+		push @files_to_process, $File::Find::name;
+	},
+	@dirs_to_search,);
+
+# tabs and non-breaking spaces are harmless, but it is best to avoid them in SGML files
+sub check_tabs_and_nbsp
+{
+	my $errors = 0;
+	for my $f (@files_to_process)
+	{
+		open my $fh, "<:encoding(UTF-8)", $f or die "Can't open $f: $!";
+		my $line_no = 0;
+		while (<$fh>)
+		{
+			$line_no++;
+			if (/\t/)
+			{
+				print STDERR "Tab found in $f:$line_no\n";
+				$errors++;
+			}
+			if (/\xC2\xA0/)
+			{
+				print STDERR "$f:$line_no: contains non-breaking space\n";
+				$errors++;
+			}
+		}
+		close($fh);
+	}
+
+	if ($errors)
+	{
+		remove_stamp_file();
+		die "Tabs and/or non-breaking spaces appear in SGML/XML files\n";
+	}
+}
+
+sub write_dep_file()
+{
+	open my $fh, '>', $dep_file or die "can't open $dep_file: $!";
+
+	foreach my $file (@files_to_process)
+	{
+		print $fh "$dep_file : $file\n";
+	}
+
+	close $fh;
+}
+
+sub create_stamp_file
+{
+	# Avoid touching existing stamp file to prevent unnecessary rebuilds
+	if (!(-f $stamp_file))
+	{
+		open my $fh, '>', $stamp_file
+		  or die "can't open $stamp_file: $!";
+		close $fh;
+	}
+}
+
+sub remove_stamp_file
+{
+	if ($stamp_file)
+	{
+		unlink $stamp_file;
+	}
+}
+
+# Create a dependency file for meson build
+if ($dep_file)
+{
+	write_dep_file();
+}
+
+check_tabs_and_nbsp();
+
+# All checks are passed, we can create a stamp file meson build now
+if ($stamp_file)
+{
+	create_stamp_file();
+}
diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml
index eca9d62fc22..1c937247a9a 100644
--- a/.cirrus.tasks.yml
+++ b/.cirrus.tasks.yml
@@ -627,6 +627,8 @@ task:
     TEST_JOBS: 8
     IMAGE: ghcr.io/cirruslabs/macos-runner:sonoma
 
+    XML_CATALOG_FILES: /opt/local/share/xml/docbook/4.5/catalog.xml
+
     CIRRUS_WORKING_DIR: ${HOME}/pgsql/
     CCACHE_DIR: ${HOME}/ccache
     MACPORTS_CACHE: ${HOME}/macports-cache
@@ -641,6 +643,7 @@ task:
 
     MACOS_PACKAGE_LIST: >-
       ccache
+      docbook-xml-4.5
       icu
       kerberos5
       lz4
-- 
2.51.0

#5Peter Eisentraut
peter@eisentraut.org
In reply to: Nazir Bilal Yavuz (#4)
Re: Improve docs syntax checking and enable it in the meson build

On 22.10.25 10:04, Nazir Bilal Yavuz wrote:

diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml
index eca9d62fc22..1c937247a9a 100644
--- a/.cirrus.tasks.yml
+++ b/.cirrus.tasks.yml
@@ -627,6 +627,8 @@ task:
TEST_JOBS: 8
IMAGE: ghcr.io/cirruslabs/macos-runner:sonoma
+    XML_CATALOG_FILES: /opt/local/share/xml/docbook/4.5/catalog.xml
+
CIRRUS_WORKING_DIR: ${HOME}/pgsql/
CCACHE_DIR: ${HOME}/ccache
MACPORTS_CACHE: ${HOME}/macports-cache
@@ -641,6 +643,7 @@ task:

MACOS_PACKAGE_LIST: >-
ccache
+ docbook-xml-4.5
icu
kerberos5
lz4

What is the reason for this change in this patch? AFAICT, your patch
doesn't perform any XML-related operations anymore.

#6Nazir Bilal Yavuz
byavuz81@gmail.com
In reply to: Peter Eisentraut (#5)
1 attachment(s)
Re: Improve docs syntax checking and enable it in the meson build

Hi,

On Wed, 29 Oct 2025 at 20:24, Peter Eisentraut <peter@eisentraut.org> wrote:

On 22.10.25 10:04, Nazir Bilal Yavuz wrote:

diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml
index eca9d62fc22..1c937247a9a 100644
--- a/.cirrus.tasks.yml
+++ b/.cirrus.tasks.yml
@@ -627,6 +627,8 @@ task:
TEST_JOBS: 8
IMAGE: ghcr.io/cirruslabs/macos-runner:sonoma
+    XML_CATALOG_FILES: /opt/local/share/xml/docbook/4.5/catalog.xml
+
CIRRUS_WORKING_DIR: ${HOME}/pgsql/
CCACHE_DIR: ${HOME}/ccache
MACPORTS_CACHE: ${HOME}/macports-cache
@@ -641,6 +643,7 @@ task:

MACOS_PACKAGE_LIST: >-
ccache
+ docbook-xml-4.5
icu
kerberos5
lz4

What is the reason for this change in this patch? AFAICT, your patch
doesn't perform any XML-related operations anymore.

It is because of the "sgml_syntax_check" test in the meson build. This
test builds "postgres-full.xml" (same logic with make check) and the
build of "postgres-full.xml" has xmllint check. I could not find a way
to make this test optional like check target in the doc/src/sgml, so
the "sgml_syntax_check" test always runs.

Apart from this, I realized that there is a better way to create
"sgml_syntax_check". It was depending on "postgres-full.xml" before,
but now it directly builds the "postgres-full.xml" like "tmp_install"
test. This is addressed in the v8.

--
Regards,
Nazir Bilal Yavuz
Microsoft

Attachments:

v8-0001-Improve-docs-syntax-checking.patchtext/x-patch; charset=US-ASCII; name=v8-0001-Improve-docs-syntax-checking.patchDownload
From c313f86470c51300319fa9e4c2f3f541d7dc0c47 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Tue, 21 Oct 2025 13:49:16 +0300
Subject: [PATCH v8] Improve docs syntax checking

Move the checks out of the Makefile into a perl script that can be
called from both the Makefile and meson.build. The set of files checked
is simplified, so it is just all the sgml and xsl files found in
docs/src/sgml directory tree.

Along the way make some adjustments to .cirrus.tasks.yml to support this
better in CI.

Also ensure that the checks are run while generating the
postgres-full.xml.

Author: Nazir Bilal Yavuz <byavuz81@gmail.com>
Co-Author: Andrew Dunstan <andrew@dunslane.net>
Discussion: https://postgr.es/m/CAN55FZ1qzoDcaKqsR3DwE%3DX6FL%2Bwpm%2B%3DKLvH6ahrRXNhjU53DQ%40mail.gmail.com
---
 doc/src/sgml/Makefile             |  16 +----
 doc/src/sgml/meson.build          |  35 ++++++++-
 doc/src/sgml/sgml_syntax_check.pl | 115 ++++++++++++++++++++++++++++++
 .cirrus.tasks.yml                 |   3 +
 4 files changed, 154 insertions(+), 15 deletions(-)
 create mode 100755 doc/src/sgml/sgml_syntax_check.pl

diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile
index b53b2694a6b..50de8fab931 100644
--- a/doc/src/sgml/Makefile
+++ b/doc/src/sgml/Makefile
@@ -69,6 +69,7 @@ ALL_IMAGES := $(wildcard $(srcdir)/images/*.svg)
 # files into one big file).  This helps tools that don't understand
 # vpath builds (such as dbtoepub).
 postgres-full.xml: postgres.sgml $(ALL_SGML)
+	$(PERL) $(srcdir)/sgml_syntax_check.pl --srcdir $(srcdir)
 	$(XMLLINT) $(XMLINCLUDE) --output $@ --noent --valid $<
 
 
@@ -200,8 +201,7 @@ MAKEINFO = makeinfo
 ##
 
 # Quick syntax check without style processing
-check: postgres.sgml $(ALL_SGML) check-tabs check-nbsp
-	$(XMLLINT) $(XMLINCLUDE) --noout --valid $<
+check: postgres-full.xml
 
 
 ##
@@ -261,18 +261,6 @@ clean-man:
 
 endif # sqlmansectnum != 7
 
-# tabs are harmless, but it is best to avoid them in SGML files
-check-tabs:
-	@( ! grep '	' $(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl) ) || \
-	(echo "Tabs appear in SGML/XML files" 1>&2;  exit 1)
-
-# Non-breaking spaces are harmless, but it is best to avoid them in SGML files.
-# Use perl command because non-GNU grep or sed could not have hex escape sequence.
-check-nbsp:
-	@ ( $(PERL) -ne '/\xC2\xA0/ and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \
-	  $(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \
-	(echo "Non-breaking spaces appear in SGML/XML files" 1>&2;  exit 1)
-
 ##
 ## Clean
 ##
diff --git a/doc/src/sgml/meson.build b/doc/src/sgml/meson.build
index 6ae192eac68..6ffe4110167 100644
--- a/doc/src/sgml/meson.build
+++ b/doc/src/sgml/meson.build
@@ -96,6 +96,26 @@ xmllint = xmltools_wrapper + [
   '--tool', xmllint_bin, '--',
 ]
 
+sgml_syntax_check = files(
+  'sgml_syntax_check.pl'
+)
+
+# Create a stamp file for sgml syntax check and make postgres_full_xml
+# depend on it.  So, this check will run before the generation of
+# postgres_full_xml.
+sgml_check_stamp = custom_target('sgml-check-stamp',
+  output: 'sgml-check-stamp',
+  depfile: 'sgml-check-stamp.d',
+  command: [
+    perl, sgml_syntax_check,
+    '--srcdir', meson.current_source_dir(),
+    '--builddir', meson.current_build_dir(),
+    '--dep_file', '@DEPFILE@',
+    '--stamp_file', '@OUTPUT@',
+  ],
+  build_by_default: false,
+)
+
 # Run validation only once, common to all subsequent targets.  While
 # we're at it, also resolve all entities (that is, copy all included
 # files into one big file).  This helps tools that don't understand
@@ -106,12 +126,15 @@ postgres_full_xml = custom_target('postgres-full.xml',
   depfile: 'postgres-full.xml.d',
   command: [xmllint, '--nonet', '--noent', '--valid',
             '--path', '@OUTDIR@', '-o', '@OUTPUT@', '@INPUT@'],
-  depends: doc_generated,
+  depends: [doc_generated, sgml_check_stamp],
   build_by_default: false,
 )
 docs += postgres_full_xml
 alldocs += postgres_full_xml
 
+postgres_full_xml_target = 'postgres_full_xml'
+alias_target(postgres_full_xml_target, postgres_full_xml)
+
 
 if xsltproc_bin.found()
   xsltproc_flags = [
@@ -306,3 +329,13 @@ endif
 if alldocs.length() != 0
   alias_target('alldocs', alldocs)
 endif
+
+# Test creation of 'postgres_full_xml'.
+test(
+  'doc' / 'sgml_syntax_check',
+  meson_bin,
+  args: ['compile', postgres_full_xml_target],
+  suite: 'doc'
+)
+
+testprep_targets += doc_generated
diff --git a/doc/src/sgml/sgml_syntax_check.pl b/doc/src/sgml/sgml_syntax_check.pl
new file mode 100755
index 00000000000..55c212d9e5d
--- /dev/null
+++ b/doc/src/sgml/sgml_syntax_check.pl
@@ -0,0 +1,115 @@
+#!/usr/bin/perl
+#
+# Perl script that checks SGML/XSL syntax and formatting issues.
+#
+# doc/src/sgml/sgml_syntax_check.pl
+#
+# Copyright (c) 2025, PostgreSQL Global Development Group
+
+use strict;
+use warnings FATAL => 'all';
+use Getopt::Long;
+
+use File::Find;
+
+my $srcdir;
+my $builddir;
+my $dep_file;
+my $stamp_file;
+
+GetOptions(
+	'srcdir:s' => \$srcdir,
+	'builddir:s' => \$builddir,
+	'dep_file:s' => \$dep_file,
+	'stamp_file:s' => \$stamp_file) or die "$0: wrong arguments";
+
+die "$0: --srcdir must be specified\n" unless defined $srcdir;
+
+# Find files to process - all the sgml and xsl files (including in subdirectories)
+my @files_to_process;
+my @dirs_to_search = ($srcdir);
+push @dirs_to_search, $builddir if defined $builddir;
+find(
+	sub {
+		return unless -f $_;
+		return if $_ !~ /\.(sgml|xsl)$/;
+		push @files_to_process, $File::Find::name;
+	},
+	@dirs_to_search,);
+
+# tabs and non-breaking spaces are harmless, but it is best to avoid them in SGML files
+sub check_tabs_and_nbsp
+{
+	my $errors = 0;
+	for my $f (@files_to_process)
+	{
+		open my $fh, "<:encoding(UTF-8)", $f or die "Can't open $f: $!";
+		my $line_no = 0;
+		while (<$fh>)
+		{
+			$line_no++;
+			if (/\t/)
+			{
+				print STDERR "Tab found in $f:$line_no\n";
+				$errors++;
+			}
+			if (/\xC2\xA0/)
+			{
+				print STDERR "$f:$line_no: contains non-breaking space\n";
+				$errors++;
+			}
+		}
+		close($fh);
+	}
+
+	if ($errors)
+	{
+		remove_stamp_file();
+		die "Tabs and/or non-breaking spaces appear in SGML/XML files\n";
+	}
+}
+
+sub write_dep_file()
+{
+	open my $fh, '>', $dep_file or die "can't open $dep_file: $!";
+
+	foreach my $file (@files_to_process)
+	{
+		print $fh "$dep_file : $file\n";
+	}
+
+	close $fh;
+}
+
+sub create_stamp_file
+{
+	# Avoid touching existing stamp file to prevent unnecessary rebuilds
+	if (!(-f $stamp_file))
+	{
+		open my $fh, '>', $stamp_file
+		  or die "can't open $stamp_file: $!";
+		close $fh;
+	}
+}
+
+sub remove_stamp_file
+{
+	if ($stamp_file)
+	{
+		unlink $stamp_file;
+	}
+}
+
+# Create a dependency file for meson build
+if ($dep_file)
+{
+	write_dep_file();
+}
+
+check_tabs_and_nbsp();
+
+# All checks are passed, we can create a stamp file meson build now
+if ($stamp_file)
+{
+	create_stamp_file();
+}
diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml
index eca9d62fc22..1c937247a9a 100644
--- a/.cirrus.tasks.yml
+++ b/.cirrus.tasks.yml
@@ -627,6 +627,8 @@ task:
     TEST_JOBS: 8
     IMAGE: ghcr.io/cirruslabs/macos-runner:sonoma
 
+    XML_CATALOG_FILES: /opt/local/share/xml/docbook/4.5/catalog.xml
+
     CIRRUS_WORKING_DIR: ${HOME}/pgsql/
     CCACHE_DIR: ${HOME}/ccache
     MACPORTS_CACHE: ${HOME}/macports-cache
@@ -641,6 +643,7 @@ task:
 
     MACOS_PACKAGE_LIST: >-
       ccache
+      docbook-xml-4.5
       icu
       kerberos5
       lz4
-- 
2.51.0