summaryrefslogtreecommitdiffstats
path: root/ci/check-directional-formatting.bash
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-09 13:34:27 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-09 13:34:27 +0000
commit4dbdc42d9e7c3968ff7f690d00680419c9b8cb0f (patch)
tree47c1d492e9c956c1cd2b74dbd3b9d8b0db44dc4e /ci/check-directional-formatting.bash
parentInitial commit. (diff)
downloadgit-4dbdc42d9e7c3968ff7f690d00680419c9b8cb0f.tar.xz
git-4dbdc42d9e7c3968ff7f690d00680419c9b8cb0f.zip
Adding upstream version 1:2.43.0.upstream/1%2.43.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ci/check-directional-formatting.bash')
-rwxr-xr-xci/check-directional-formatting.bash27
1 files changed, 27 insertions, 0 deletions
diff --git a/ci/check-directional-formatting.bash b/ci/check-directional-formatting.bash
new file mode 100755
index 0000000..e6211b1
--- /dev/null
+++ b/ci/check-directional-formatting.bash
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+# This script verifies that the non-binary files tracked in the Git index do
+# not contain any Unicode directional formatting: such formatting could be used
+# to deceive reviewers into interpreting code differently from the compiler.
+# This is intended to run on an Ubuntu agent in a GitHub workflow.
+#
+# To allow translated messages to introduce such directional formatting in the
+# future, we exclude the `.po` files from this validation.
+#
+# Neither GNU grep nor `git grep` (not even with `-P`) handle `\u` as a way to
+# specify UTF-8.
+#
+# To work around that, we use `printf` to produce the pattern as a byte
+# sequence, and then feed that to `git grep` as a byte sequence (setting
+# `LC_CTYPE` to make sure that the arguments are interpreted as intended).
+#
+# Note: we need to use Bash here because its `printf` interprets `\uNNNN` as
+# UTF-8 code points, as desired. Running this script through Ubuntu's `dash`,
+# for example, would use a `printf` that does not understand that syntax.
+
+# U+202a..U+2a2e: LRE, RLE, PDF, LRO and RLO
+# U+2066..U+2069: LRI, RLI, FSI and PDI
+regex='(\u202a|\u202b|\u202c|\u202d|\u202e|\u2066|\u2067|\u2068|\u2069)'
+
+! LC_CTYPE=C git grep -El "$(LC_CTYPE=C.UTF-8 printf "$regex")" \
+ -- ':(exclude,attr:binary)' ':(exclude)*.po'