summaryrefslogtreecommitdiffstats
path: root/t/helper/test-xml-encode.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 14:47:53 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 14:47:53 +0000
commitc8bae7493d2f2910b57f13ded012e86bdcfb0532 (patch)
tree24e09d9f84dec336720cf393e156089ca2835791 /t/helper/test-xml-encode.c
parentInitial commit. (diff)
downloadgit-upstream/1%2.39.2.tar.xz
git-upstream/1%2.39.2.zip
Adding upstream version 1:2.39.2.upstream/1%2.39.2upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 't/helper/test-xml-encode.c')
-rw-r--r--t/helper/test-xml-encode.c80
1 files changed, 80 insertions, 0 deletions
diff --git a/t/helper/test-xml-encode.c b/t/helper/test-xml-encode.c
new file mode 100644
index 0000000..a648bbd
--- /dev/null
+++ b/t/helper/test-xml-encode.c
@@ -0,0 +1,80 @@
+#include "test-tool.h"
+
+static const char *utf8_replace_character = "&#xfffd;";
+
+/*
+ * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded
+ * in an XML file.
+ */
+int cmd__xml_encode(int argc, const char **argv)
+{
+ unsigned char buf[1024], tmp[4], *tmp2 = NULL;
+ ssize_t cur = 0, len = 1, remaining = 0;
+ unsigned char ch;
+
+ for (;;) {
+ if (++cur == len) {
+ len = xread(0, buf, sizeof(buf));
+ if (!len)
+ return 0;
+ if (len < 0)
+ die_errno("Could not read <stdin>");
+ cur = 0;
+ }
+ ch = buf[cur];
+
+ if (tmp2) {
+ if ((ch & 0xc0) != 0x80) {
+ fputs(utf8_replace_character, stdout);
+ tmp2 = NULL;
+ cur--;
+ continue;
+ }
+ *tmp2 = ch;
+ tmp2++;
+ if (--remaining == 0) {
+ fwrite(tmp, tmp2 - tmp, 1, stdout);
+ tmp2 = NULL;
+ }
+ continue;
+ }
+
+ if (!(ch & 0x80)) {
+ /* 0xxxxxxx */
+ if (ch == '&')
+ fputs("&amp;", stdout);
+ else if (ch == '\'')
+ fputs("&apos;", stdout);
+ else if (ch == '"')
+ fputs("&quot;", stdout);
+ else if (ch == '<')
+ fputs("&lt;", stdout);
+ else if (ch == '>')
+ fputs("&gt;", stdout);
+ else if (ch >= 0x20)
+ fputc(ch, stdout);
+ else if (ch == 0x09 || ch == 0x0a || ch == 0x0d)
+ fprintf(stdout, "&#x%02x;", ch);
+ else
+ fputs(utf8_replace_character, stdout);
+ } else if ((ch & 0xe0) == 0xc0) {
+ /* 110XXXXx 10xxxxxx */
+ tmp[0] = ch;
+ remaining = 1;
+ tmp2 = tmp + 1;
+ } else if ((ch & 0xf0) == 0xe0) {
+ /* 1110XXXX 10Xxxxxx 10xxxxxx */
+ tmp[0] = ch;
+ remaining = 2;
+ tmp2 = tmp + 1;
+ } else if ((ch & 0xf8) == 0xf0) {
+ /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
+ tmp[0] = ch;
+ remaining = 3;
+ tmp2 = tmp + 1;
+ } else
+ fputs(utf8_replace_character, stdout);
+ }
+
+ return 0;
+}