From 3e160e27e4686620d16477a9ea9cf00141e52ce7 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sat, 13 Apr 2024 10:41:51 +0200
Subject: Adding upstream version 3.9.0.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 src/util/Makefile.in         |  47 +++++++-
 src/util/argv.c              |  57 +++++++++-
 src/util/argv.h              |   2 +
 src/util/casefold.c          |   2 +-
 src/util/dict_inline.c       |   2 +-
 src/util/dict_thash.c        |   6 +-
 src/util/dict_utf8.c         |   4 +-
 src/util/inet_prefix_top.c   |   1 +
 src/util/logwriter.c         |  34 +++++-
 src/util/logwriter.h         |   1 +
 src/util/midna_domain.c      |   4 +-
 src/util/parse_utf8_char.h   | 122 +++++++++++++++++++++
 src/util/printable.c         | 162 +++++++++++++++++++++++++---
 src/util/quote_for_json.c    | 218 ++++++++++++++++++++++++++++++++++++++
 src/util/readlline.c         | 231 +++++++++++++++++++++++++++++++++++++++-
 src/util/stringops.h         |   3 +
 src/util/sys_defs.h          |   7 ++
 src/util/valid_hostname.c    |  13 ++-
 src/util/valid_hostname.in   |   6 ++
 src/util/valid_hostname.ref  |  10 ++
 src/util/valid_utf8_string.c | 247 ++++++++++++++++++++++++++++++-------------
 src/util/vstream.c           |  34 +++++-
 22 files changed, 1100 insertions(+), 113 deletions(-)
 create mode 100644 src/util/parse_utf8_char.h
 create mode 100644 src/util/quote_for_json.c

(limited to 'src/util')

diff --git a/src/util/Makefile.in b/src/util/Makefile.in
index f69dec5..01211fb 100644
--- a/src/util/Makefile.in
+++ b/src/util/Makefile.in
@@ -45,7 +45,7 @@ SRCS	= alldig.c allprint.c argv.c argv_split.c attr_clnt.c attr_print0.c \
 	byte_mask.c known_tcp_ports.c argv_split_at.c dict_stream.c \
 	sane_strtol.c hash_fnv.c ldseed.c mkmap_cdb.c mkmap_db.c mkmap_dbm.c \
 	mkmap_fail.c mkmap_lmdb.c mkmap_open.c mkmap_sdbm.c inet_prefix_top.c \
-	inet_addr_sizes.c
+	inet_addr_sizes.c quote_for_json.c
 OBJS	= alldig.o allprint.o argv.o argv_split.o attr_clnt.o attr_print0.o \
 	attr_print64.o attr_print_plain.o attr_scan0.o attr_scan64.o \
 	attr_scan_plain.o auto_clnt.o base64_code.o basename.o binhash.o \
@@ -91,7 +91,8 @@ OBJS	= alldig.o allprint.o argv.o argv_split.o attr_clnt.o attr_print0.o \
 	msg_logger.o logwriter.o unix_dgram_connect.o unix_dgram_listen.o \
 	byte_mask.o known_tcp_ports.o argv_split_at.o dict_stream.o \
 	sane_strtol.o hash_fnv.o ldseed.o mkmap_db.o mkmap_dbm.o \
-	mkmap_fail.o mkmap_open.o inet_prefix_top.o inet_addr_sizes.o
+	mkmap_fail.o mkmap_open.o inet_prefix_top.o inet_addr_sizes.o \
+	quote_for_json.o
 # MAP_OBJ is for maps that may be dynamically loaded with dynamicmaps.cf.
 # When hard-linking these, makedefs sets NON_PLUGIN_MAP_OBJ=$(MAP_OBJ),
 # otherwise it sets the PLUGIN_* macros.
@@ -145,7 +146,7 @@ TESTPROG= dict_open dup2_pass_on_exec events exec_command fifo_open \
 	vstream timecmp dict_cache midna_domain casefold strcasecmp_utf8 \
 	vbuf_print split_qnameval vstream msg_logger byte_mask \
 	known_tcp_ports dict_stream find_inet binhash hash_fnv argv \
-	clean_env inet_prefix_top
+	clean_env inet_prefix_top printable readlline quote_for_json
 PLUGIN_MAP_SO = $(LIB_PREFIX)pcre$(LIB_SUFFIX) $(LIB_PREFIX)lmdb$(LIB_SUFFIX) \
 	$(LIB_PREFIX)cdb$(LIB_SUFFIX) $(LIB_PREFIX)sdbm$(LIB_SUFFIX)
 HTABLE_FIX = NORANDOMIZE=1
@@ -365,6 +366,16 @@ unescape: $(LIB)
 	$(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS)
 	mv junk $@.o
 
+printable: $(LIB)
+	mv $@.o junk
+	$(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS)
+	mv junk $@.o
+
+readlline: $(LIB)
+	mv $@.o junk
+	$(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS)
+	mv junk $@.o
+
 hex_quote: $(LIB)
 	mv $@.o junk
 	$(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS)
@@ -609,6 +620,11 @@ inet_prefix_top: $(LIB)
 	$(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS)
 	mv junk $@.o
 
+quote_for_json: $(LIB)
+	mv $@.o junk
+	$(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS)
+	mv junk $@.o
+
 tests: all valid_hostname_test mac_expand_test dict_test unescape_test \
 	hex_quote_test ctable_test inet_addr_list_test base64_code_test \
 	attr_scan64_test attr_scan0_test host_port_test dict_tests \
@@ -618,7 +634,8 @@ tests: all valid_hostname_test mac_expand_test dict_test unescape_test \
 	strcasecmp_utf8_test vbuf_print_test miss_endif_cidr_test \
 	miss_endif_regexp_test split_qnameval_test vstring_test \
 	vstream_test byte_mask_tests mystrtok_test known_tcp_ports_test \
-	binhash_test argv_test inet_prefix_top_test
+	binhash_test argv_test inet_prefix_top_test printable_test \
+	valid_utf8_string_test readlline_test quote_for_json_test
  
 dict_tests: all dict_test \
 	dict_pcre_tests dict_cidr_test dict_thash_test dict_static_test \
@@ -650,6 +667,15 @@ unescape_test: unescape unescape.in unescape.ref
 #	diff unescape.in unescape.tmp
 	rm -f unescape.tmp
 
+printable_test: printable
+	$(SHLIB_ENV) ${VALGRIND} ./printable 
+
+readlline_test: readlline
+	$(SHLIB_ENV) ${VALGRIND} ./readlline 
+
+valid_utf8_string_test: valid_utf8_string
+	$(SHLIB_ENV) ${VALGRIND} ./valid_utf8_string 
+
 hex_quote_test: hex_quote
 	$(SHLIB_ENV) ${VALGRIND} ./hex_quote <hex_quote.c | od -cb >hex_quote.tmp
 	od -cb <hex_quote.c >hex_quote.ref
@@ -1083,6 +1109,9 @@ argv_test: argv
 inet_prefix_top_test: inet_prefix_top
 	$(SHLIB_ENV) ${VALGRIND} ./inet_prefix_top
 
+quote_for_json_test: quote_for_json
+	$(SHLIB_ENV) ${VALGRIND} ./quote_for_json
+
 depend: $(MAKES)
 	(sed '1,/^# do not edit/!d' Makefile.in; \
 	set -e; for i in [a-z][a-z0-9]*.c; do \
@@ -1119,9 +1148,12 @@ allspace.o: vbuf.h
 allspace.o: vstring.h
 argv.o: argv.c
 argv.o: argv.h
+argv.o: check_arg.h
 argv.o: msg.h
 argv.o: mymalloc.h
 argv.o: sys_defs.h
+argv.o: vbuf.h
+argv.o: vstring.h
 argv_attr_print.o: argv.h
 argv_attr_print.o: argv_attr.h
 argv_attr_print.o: argv_attr_print.c
@@ -2157,6 +2189,7 @@ logwriter.o: logwriter.c
 logwriter.o: logwriter.h
 logwriter.o: msg.h
 logwriter.o: mymalloc.h
+logwriter.o: name_code.h
 logwriter.o: safe_open.h
 logwriter.o: sys_defs.h
 logwriter.o: vbuf.h
@@ -2525,11 +2558,16 @@ posix_signals.o: posix_signals.c
 posix_signals.o: posix_signals.h
 posix_signals.o: sys_defs.h
 printable.o: check_arg.h
+printable.o: parse_utf8_char.h
 printable.o: printable.c
 printable.o: stringops.h
 printable.o: sys_defs.h
 printable.o: vbuf.h
 printable.o: vstring.h
+quote_for_json.o: quote_for_json.c
+quote_for_json.o: stringops.h
+quote_for_json.o: sys_defs.h
+quote_for_json.o: vstring.h
 rand_sleep.o: iostuff.h
 rand_sleep.o: msg.h
 rand_sleep.o: myrand.h
@@ -2848,6 +2886,7 @@ valid_utf8_hostname.o: valid_utf8_hostname.h
 valid_utf8_hostname.o: vbuf.h
 valid_utf8_hostname.o: vstring.h
 valid_utf8_string.o: check_arg.h
+valid_utf8_string.o: parse_utf8_char.h
 valid_utf8_string.o: stringops.h
 valid_utf8_string.o: sys_defs.h
 valid_utf8_string.o: valid_utf8_string.c
diff --git a/src/util/argv.c b/src/util/argv.c
index 4e05fd0..332426e 100644
--- a/src/util/argv.c
+++ b/src/util/argv.c
@@ -53,6 +53,11 @@
 /*	ssize_t	pos;
 /*	ssize_t	how_many;
 /*
+/*	char	*argv_join(buf, argvp, delim)
+/*	VSTRING	*buf;
+/*	ARGV	*argvp;
+/*	int	delim;
+/*
 /*	void	ARGV_FAKE_BEGIN(argv, arg)
 /*	const char *arg;
 /*
@@ -109,6 +114,10 @@
 /*	starting at the specified array position. The result is
 /*	null-terminated.
 /*
+/*	argv_join() joins all elements in an array using the
+/*	specified delimiter value, and appends the result to the
+/*	specified buffer.
+/*
 /*	ARGV_FAKE_BEGIN/END are an optimization for the case where
 /*	a single string needs to be passed into an ARGV-based
 /*	interface.  ARGV_FAKE_BEGIN() opens a statement block and
@@ -148,6 +157,7 @@
 
 #include "mymalloc.h"
 #include "msg.h"
+#include "vstring.h"
 #include "argv.h"
 
 #ifdef TEST
@@ -379,6 +389,20 @@ void    argv_delete(ARGV *argvp, ssize_t first, ssize_t how_many)
     argvp->argc -= how_many;
 }
 
+/* argv_join - concatenate array elements with delimiter */
+
+char   *argv_join(VSTRING *buf, ARGV *argv, int delim)
+{
+    char  **cpp;
+
+    for (cpp = argv->argv; *cpp; cpp++) {
+	vstring_strcat(buf, *cpp);
+	if (cpp[1])
+	    VSTRING_ADDCH(buf, delim);
+    }
+    return (vstring_str(buf));
+}
+
 #ifdef TEST
 
  /*
@@ -402,6 +426,7 @@ typedef struct TEST_CASE {
     const char *exp_panic_msg;		/* expected panic */
     int     exp_argc;			/* expected array length */
     const char *exp_argv[ARRAY_LEN];	/* expected array content */
+    int     join_delim;			/* argv_join() delimiter */
 } TEST_CASE;
 
 #define TERMINATE_ARRAY	(1)
@@ -559,6 +584,24 @@ static ARGV *test_argv_bad_delete3(const TEST_CASE *tp, ARGV *argvp)
     return (argvp);
 }
 
+/* test_argv_join - populate, join, and overwrite */
+
+static ARGV *test_argv_join(const TEST_CASE *tp, ARGV *argvp)
+{
+    VSTRING *buf = vstring_alloc(100);
+
+    /*
+     * Impedance mismatch: argv_join() produces output to VSTRING, but the
+     * test fixture wants output to ARGV.
+     */
+    test_argv_populate(tp, argvp);
+    argv_join(buf, argvp, tp->join_delim);
+    argv_delete(argvp, 0, argvp->argc);
+    argv_add(argvp, vstring_str(buf), ARGV_END);
+    vstring_free(buf);
+    return (argvp);
+}
+
 /* test_argv_verify - verify result */
 
 static int test_argv_verify(const TEST_CASE *tp, ARGV *argvp)
@@ -573,7 +616,7 @@ static int test_argv_verify(const TEST_CASE *tp, ARGV *argvp)
 	}
 	if (strcmp(vstring_str(test_panic_str), tp->exp_panic_msg) != 0) {
 	    msg_warn("test case '%s': got '%s', want: '%s'",
-		     tp->label, vstring_str(test_panic_str), tp->exp_panic_msg);
+		 tp->label, vstring_str(test_panic_str), tp->exp_panic_msg);
 	    return (FAIL);
 	}
 	return (PASS);
@@ -682,6 +725,18 @@ static const TEST_CASE test_cases[] = {
 	{"foo", "baz", "bar", 0}, 0, test_argv_bad_delete3,
 	"argv_delete bad range: (start=100 count=1)"
     },
+    {"argv_join, multiple strings",
+	{"foo", "baz", "bar", 0}, 0, test_argv_join,
+	0, 1, {"foo:baz:bar", 0}, ':'
+    },
+    {"argv_join, one string",
+	{"foo", 0}, 0, test_argv_join,
+	0, 1, {"foo", 0}, ':'
+    },
+    {"argv_join, empty",
+	{0}, 0, test_argv_join,
+	0, 1, {"", 0}, ':'
+    },
     0,
 };
 
diff --git a/src/util/argv.h b/src/util/argv.h
index b0098ce..f1e746a 100644
--- a/src/util/argv.h
+++ b/src/util/argv.h
@@ -33,6 +33,8 @@ extern void argv_truncate(ARGV *, ssize_t);
 extern void argv_insert_one(ARGV *, ssize_t, const char *);
 extern void argv_replace_one(ARGV *, ssize_t, const char *);
 extern void argv_delete(ARGV *, ssize_t, ssize_t);
+struct VSTRING;
+extern char *argv_join(struct VSTRING *buf, ARGV *, int);
 extern ARGV *argv_free(ARGV *);
 
 extern ARGV *argv_split(const char *, const char *);
diff --git a/src/util/casefold.c b/src/util/casefold.c
index d3ebd4b..94860b8 100644
--- a/src/util/casefold.c
+++ b/src/util/casefold.c
@@ -300,7 +300,7 @@ int     main(int argc, char **argv)
 		    encode_utf8(buffer, codepoint);
 		    if (msg_verbose)
 			vstream_printf("U+%X -> %s\n", codepoint, STR(buffer));
-		    if (valid_utf8_string(STR(buffer), LEN(buffer)) == 0)
+		    if (valid_utf8_stringz(STR(buffer)) == 0)
 			msg_fatal("bad utf-8 encoding for U+%X\n", codepoint);
 		    casefold(dest, STR(buffer));
 		}
diff --git a/src/util/dict_inline.c b/src/util/dict_inline.c
index 72339b2..d7f9344 100644
--- a/src/util/dict_inline.c
+++ b/src/util/dict_inline.c
@@ -87,7 +87,7 @@ DICT   *dict_inline_open(const char *name, int open_flags, int dict_flags)
      */
     if (DICT_NEED_UTF8_ACTIVATION(util_utf8_enable, dict_flags)
 	&& allascii(name) == 0
-	&& valid_utf8_string(name, strlen(name)) == 0)
+	&& valid_utf8_stringz(name) == 0)
 	DICT_INLINE_RETURN(dict_surrogate(DICT_TYPE_INLINE, name,
 					  open_flags, dict_flags,
 					  "bad UTF-8 syntax: \"%s:%s\"; "
diff --git a/src/util/dict_thash.c b/src/util/dict_thash.c
index 69eb17b..bae4a63 100644
--- a/src/util/dict_thash.c
+++ b/src/util/dict_thash.c
@@ -127,7 +127,7 @@ DICT   *dict_thash_open(const char *path, int open_flags, int dict_flags)
 	     */
 	    if ((dict->flags & DICT_FLAG_UTF8_ACTIVE)
 		&& allascii(STR(line_buffer)) == 0
-	    && valid_utf8_string(STR(line_buffer), LEN(line_buffer)) == 0) {
+		&& valid_utf8_stringz(STR(line_buffer)) == 0) {
 		msg_warn("%s, line %d: non-UTF-8 input \"%s\""
 			 " -- ignoring this line",
 			 VSTREAM_PATH(fp), lineno, STR(line_buffer));
@@ -181,8 +181,8 @@ DICT   *dict_thash_open(const char *path, int open_flags, int dict_flags)
 			 " is this an alias file?", path, lineno);
 
 	    /*
-	     * Optionally treat the value as a filename, and replace the value
-	     * with the BASE64-encoded content of the named file.
+	     * Optionally treat the value as a filename, and replace the
+	     * value with the BASE64-encoded content of the named file.
 	     */
 	    if (dict_flags & DICT_FLAG_SRC_RHS_IS_FILE) {
 		VSTRING *base64_buf;
diff --git a/src/util/dict_utf8.c b/src/util/dict_utf8.c
index f1fc65a..9bb6b7b 100644
--- a/src/util/dict_utf8.c
+++ b/src/util/dict_utf8.c
@@ -100,7 +100,7 @@ static char *dict_utf8_check_fold(DICT *dict, const char *string,
     /*
      * Validate UTF-8 without casefolding.
      */
-    if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) {
+    if (!allascii(string) && valid_utf8_stringz(string) == 0) {
 	if (err)
 	    *err = "malformed UTF-8 or invalid codepoint";
 	return (0);
@@ -123,7 +123,7 @@ static char *dict_utf8_check_fold(DICT *dict, const char *string,
 
 static int dict_utf8_check(const char *string, CONST_CHAR_STAR *err)
 {
-    if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) {
+    if (!allascii(string) && valid_utf8_stringz(string) == 0) {
 	if (err)
 	    *err = "malformed UTF-8 or invalid codepoint";
 	return (0);
diff --git a/src/util/inet_prefix_top.c b/src/util/inet_prefix_top.c
index 8d5af00..f35d5f0 100644
--- a/src/util/inet_prefix_top.c
+++ b/src/util/inet_prefix_top.c
@@ -164,6 +164,7 @@ int     main(int argc, char **argv)
 	    msg_info("PASS %s/%d", str_name_code(af_map, tp->in_af),
 		     tp->in_prefix_len);
 	}
+	myfree(act_prefix);
     }
     msg_info("PASS=%d FAIL=%d", pass, fail);
     return (fail > 0);
diff --git a/src/util/logwriter.c b/src/util/logwriter.c
index aea2767..4a18be3 100644
--- a/src/util/logwriter.c
+++ b/src/util/logwriter.c
@@ -21,6 +21,9 @@
 /*	const char *path,
 /*	const char *buffer,
 /*	ssize_t	buflen)
+/*
+/*	int	set_logwriter_create_perms(
+/*	const char *mode)
 /* DESCRIPTION
 /*	This module manages a logfile writer.
 /*
@@ -38,6 +41,15 @@
 /*	logwriter_one_shot() combines all the above operations. The
 /*	result is zero if successful, VSTREAM_EOF if any operation
 /*	failed.
+/*
+/*	set_logwriter_create_perms() sets the file permissions that
+/*	will be used when creating a logfile. Valid inputs are
+/*	"644", "640", and "600". Leading zeros are allowed and
+/*	ignored.
+/* DIAGNOSTICS
+/*	Fatal error: logfile create error; warning: logfile permission
+/*	change error. set_logwriter_create_perms() returns the file
+/*	create permission if the request is valid, -1 otherwise.
 /* LICENSE
 /* .ad
 /* .fi
@@ -66,10 +78,12 @@
 #include <mymalloc.h>
 #include <safe_open.h>
 #include <vstream.h>
+#include <name_code.h>
 
  /*
   * Application-specific.
   */
+static int logwriter_perms = 0600;
 
 /* logwriter_open_or_die - open logfile */
 
@@ -82,7 +96,7 @@ VSTREAM *logwriter_open_or_die(const char *path)
 #define NO_CHOWN	(-1)
 #define NO_CHGRP	(-1)
 
-    fp = safe_open(path, O_CREAT | O_WRONLY | O_APPEND, 0644,
+    fp = safe_open(path, O_CREAT | O_WRONLY | O_APPEND, logwriter_perms,
 		   NO_STATP, NO_CHOWN, NO_CHGRP, why);
     if (fp == 0)
 	msg_fatal("open logfile '%s': %s", path, vstring_str(why));
@@ -122,3 +136,21 @@ int     logwriter_one_shot(const char *path, const char *buf, ssize_t len)
     err |= logwriter_close(fp);
     return (err ? VSTREAM_EOF : 0);
 }
+
+/* set_logwriter_create_perms - logfile permission control */
+
+int     set_logwriter_create_perms(const char *mode_str)
+{
+    static const NAME_CODE sane_perms[] = {
+	"644", 0644,
+	"640", 0640,
+	"600", 0600,
+	0, -1,
+    };
+    int     perms;
+
+    if ((perms = name_code(sane_perms, NAME_CODE_FLAG_NONE,
+			   mode_str + strspn(mode_str, "0"))) != -1)
+	logwriter_perms = perms;
+    return (perms);
+}
diff --git a/src/util/logwriter.h b/src/util/logwriter.h
index f5266e4..c827d25 100644
--- a/src/util/logwriter.h
+++ b/src/util/logwriter.h
@@ -23,6 +23,7 @@ extern VSTREAM *logwriter_open_or_die(const char *);
 extern int logwriter_write(VSTREAM *, const char *, ssize_t);
 extern int logwriter_close(VSTREAM *);
 extern int logwriter_one_shot(const char *, const char *, ssize_t);
+extern int set_logwriter_create_perms(const char *);
 
 /* LICENSE
 /* .ad
diff --git a/src/util/midna_domain.c b/src/util/midna_domain.c
index 333a5c9..bc016b6 100644
--- a/src/util/midna_domain.c
+++ b/src/util/midna_domain.c
@@ -178,7 +178,7 @@ static void *midna_domain_to_ascii_create(const char *name, void *unused_context
     /*
      * Paranoia: do not expose uidna_*() to unfiltered network data.
      */
-    if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
+    if (allascii(name) == 0 && valid_utf8_stringz(name) == 0) {
 	msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
 		 myname, name, "malformed UTF-8");
 	return (0);
@@ -232,7 +232,7 @@ static void *midna_domain_to_utf8_create(const char *name, void *unused_context)
     /*
      * Paranoia: do not expose uidna_*() to unfiltered network data.
      */
-    if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
+    if (allascii(name) == 0 && valid_utf8_stringz(name) == 0) {
 	msg_warn("%s: Problem translating domain \"%.100s\" to UTF-8 form: %s",
 		 myname, name, "malformed UTF-8");
 	return (0);
diff --git a/src/util/parse_utf8_char.h b/src/util/parse_utf8_char.h
new file mode 100644
index 0000000..b00a1c2
--- /dev/null
+++ b/src/util/parse_utf8_char.h
@@ -0,0 +1,122 @@
+/*++
+/* NAME
+/*	parse_utf8_char 3h
+/* SUMMARY
+/*	parse one UTF-8 multibyte character
+/* SYNOPSIS
+/*	#include <parse_utf8_char.h>
+/*
+/*	char	*parse_utf8_char(str, end)
+/*	const char *str;
+/*	const char *end;
+/* DESCRIPTION
+/*	parse_utf8_char() determines if the byte sequence starting
+/*	at \fBstr\fR begins with a complete UTF-8 character as
+/*	defined in RFC 3629. That is, a proper encoding of code
+/*	points U+0000..U+10FFFF, excluding over-long encodings and
+/*	excluding U+D800..U+DFFF surrogates.
+/*
+/*	When the byte sequence starting at \fBstr\fR begins with a
+/*	complete UTF-8 character, this function returns a pointer
+/*	to the last byte in that character. Otherwise, it returns
+/*	a null pointer.
+/*
+/*	The \fBend\fR argument is either null (the byte sequence
+/*	starting at \fBstr\fR must be null terminated), or \fBend
+/*	- str\fR specifies the length of the byte sequence.
+/* BUGS
+/*	Code points in the range U+FDD0..U+FDEF and ending in FFFE
+/*	or FFFF are non-characters in UNICODE. This function does
+/*	not reject these.
+/* LICENSE
+/* .ad
+/* .fi
+/*	The Secure Mailer license must be distributed with this software.
+/* AUTHOR(S)
+/*	Wietse Venema
+/*	IBM T.J. Watson Research
+/*	P.O. Box 704
+/*	Yorktown Heights, NY 10598, USA
+/*
+/*	Wietse Venema
+/*	porcupine.org
+/*	Amawalk, NY 10501, USA
+/*--*/
+
+ /*
+  * System library.
+  */
+#include <sys_defs.h>
+
+#ifdef NO_INLINE
+#define inline /* */
+#endif
+
+/* parse_utf8_char - parse and validate one UTF8 multibyte sequence */
+
+static inline char *parse_utf8_char(const char *str, const char *end)
+{
+    const unsigned char *cp = (const unsigned char *) str;
+    const unsigned char *ep = (const unsigned char *) end;
+    unsigned char c0, ch;
+
+    /*
+     * Optimized for correct input, time, space, and for CPUs that have a
+     * decent number of registers. Other implementation considerations:
+     * 
+     * - In the UTF-8 encoding, a non-leading byte is never null. Therefore,
+     * this function will correctly reject a partial UTF-8 character at the
+     * end of a null-terminated string.
+     * 
+     * - If the "end" argument is a null constant, and if this function is
+     * inlined, then an optimizing compiler should propagate the constant
+     * through the "ep" variable, and eliminate any code branches that
+     * require ep != 0.
+     */
+    /* Single-byte encodings. */
+    if (EXPECTED((c0 = *cp) <= 0x7f) /* we know that c0 >= 0x0 */ ) {
+	return ((char *) cp);
+    }
+    /* Two-byte encodings. */
+    else if (EXPECTED(c0 <= 0xdf) /* we know that c0 >= 0x80 */ ) {
+	/* Exclude over-long encodings. */
+	if (UNEXPECTED(c0 < 0xc2)
+	    || UNEXPECTED(ep && cp + 1 >= ep)
+	/* Require UTF-8 tail byte. */
+	    || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80))
+	    return (0);
+	return ((char *) cp);
+    }
+    /* Three-byte encodings. */
+    else if (EXPECTED(c0 <= 0xef) /* we know that c0 >= 0xe0 */ ) {
+	if (UNEXPECTED(ep && cp + 2 >= ep)
+	/* Exclude over-long encodings. */
+	    || UNEXPECTED((ch = *++cp) < (c0 == 0xe0 ? 0xa0 : 0x80))
+	/* Exclude U+D800..U+DFFF. */
+	    || UNEXPECTED(ch > (c0 == 0xed ? 0x9f : 0xbf))
+	/* Require UTF-8 tail byte. */
+	    || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80))
+	    return (0);
+	return ((char *) cp);
+    }
+    /* Four-byte encodings. */
+    else if (EXPECTED(c0 <= 0xf4) /* we know that c0 >= 0xf0 */ ) {
+	if (UNEXPECTED(ep && cp + 3 >= ep)
+	/* Exclude over-long encodings. */
+	    || UNEXPECTED((ch = *++cp) < (c0 == 0xf0 ? 0x90 : 0x80))
+	/* Exclude code points above U+10FFFF. */
+	    || UNEXPECTED(ch > (c0 == 0xf4 ? 0x8f : 0xbf))
+	/* Require UTF-8 tail byte. */
+	    || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80)
+	/* Require UTF-8 tail byte. */
+	    || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80))
+	    return (0);
+	return ((char *) cp);
+    }
+    /* Invalid: c0 >= 0xf5 */
+    else {
+	return (0);
+    }
+}
+
+#undef inline 
diff --git a/src/util/printable.c b/src/util/printable.c
index 6c148fd..0e1ae19 100644
--- a/src/util/printable.c
+++ b/src/util/printable.c
@@ -45,6 +45,10 @@
 /*	Google, Inc.
 /*	111 8th Avenue
 /*	New York, NY 10011, USA
+/*
+/*	Wietse Venema
+/*	porcupine.org
+/*	Amawalk, NY 10501, USA
 /*--*/
 
 /* System library. */
@@ -56,8 +60,9 @@
 /* Utility library. */
 
 #include "stringops.h"
+#include "parse_utf8_char.h"
 
-int util_utf8_enable = 0;
+int     util_utf8_enable = 0;
 
 /* printable -  binary compatibility */
 
@@ -74,27 +79,150 @@ char   *printable(char *string, int replacement)
 
 char   *printable_except(char *string, int replacement, const char *except)
 {
-    unsigned char *cp;
+    char   *cp;
+    char   *last;
     int     ch;
 
     /*
-     * XXX Replace invalid UTF8 sequences (too short, over-long encodings,
-     * out-of-range code points, etc). See valid_utf8_string.c.
+     * In case of a non-UTF8 sequence (bad leader byte, bad non-leader byte,
+     * over-long encodings, out-of-range code points, etc), replace the first
+     * byte, and try to resynchronize at the next byte.
      */
-    cp = (unsigned char *) string;
-    while ((ch = *cp) != 0) {
-	if (ISASCII(ch) && (ISPRINT(ch) || (except && strchr(except, ch)))) {
-	    /* ok */
-	} else if (util_utf8_enable && ch >= 194 && ch <= 254
-		   && cp[1] >= 128 && cp[1] < 192) {
-	    /* UTF8; skip the rest of the bytes in the character. */
-	    while (cp[1] >= 128 && cp[1] < 192)
-		cp++;
-	} else {
-	    /* Not ASCII and not UTF8. */
-	    *cp = replacement;
+#define PRINT_OR_EXCEPT(ch) (ISPRINT(ch) || (except && strchr(except, ch)))
+
+    for (cp = string; (ch = *(unsigned char *) cp) != 0; cp++) {
+	if (util_utf8_enable == 0) {
+	    if (ISASCII(ch) && PRINT_OR_EXCEPT(ch))
+		continue;
+	} else if ((last = parse_utf8_char(cp, 0)) == cp) {	/* ASCII */
+	    if (PRINT_OR_EXCEPT(ch))
+		continue;
+	} else if (last != 0) {			/* Other UTF8 */
+	    cp = last;
+	    continue;
 	}
-	cp++;
+	*cp = replacement;
     }
     return (string);
 }
+
+#ifdef TEST
+
+#include <stdlib.h>
+#include <string.h>
+#include <msg.h>
+#include <msg_vstream.h>
+#include <mymalloc.h>
+#include <vstream.h>
+
+ /*
+  * Test cases for 1-, 2-, and 3-byte encodings. Originally contributed by
+  * Viktor Dukhovni, and annotated using translate.google.com.
+  * 
+  * See valid_utf8_string.c for single-error tests.
+  * 
+  * XXX Need a test for 4-byte encodings, preferably with strings that can be
+  * displayed.
+  */
+struct testcase {
+    const char *name;
+    const char *input;
+    const char *expected;;
+};
+static const struct testcase testcases[] = {
+    {"Printable ASCII",
+	"printable", "printable"
+    },
+    {"ASCII with control character",
+	"non\bn-printable", "non?n-printable"
+    },
+    {"Latin accented text, no error",
+	"na\303\257ve", "na\303\257ve"
+    },
+    {"Latin text, with error",
+	"na\303ve", "na?ve"
+    },
+    {"Viktor, Cyrillic, no error",
+	"\320\262\320\270\320\272\321\202\320\276\321\200",
+	"\320\262\320\270\320\272\321\202\320\276\321\200"
+    },
+    {"Viktor, Cyrillic, two errors",
+	"\320\262\320\320\272\272\321\202\320\276\321\200",
+	"\320\262?\320\272?\321\202\320\276\321\200"
+    },
+    {"Viktor, Hebrew, no error",
+	"\327\225\327\231\327\247\327\230\327\225\326\274\327\250",
+	"\327\225\327\231\327\247\327\230\327\225\326\274\327\250"
+    },
+    {"Viktor, Hebrew, with error",
+	"\327\225\231\327\247\327\230\327\225\326\274\327\250",
+	"\327\225?\327\247\327\230\327\225\326\274\327\250"
+    },
+    {"Chinese (Simplified), no error",
+	"\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347"
+	"\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+	"\237\350\256\241\346\212\245\345\221\212",
+	"\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347"
+	"\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+	"\237\350\256\241\346\212\245\345\221\212"
+    },
+    {"Chinese (Simplified), with errors",
+	"\344\270\255\345\344\272\222\350\224\347\275\221\347"
+	"\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+	"\237\350\256\241\346\212\245\345",
+	"\344\270\255?\344\272\222??\347\275\221\347"
+	"\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+	"\237\350\256\241\346\212\245?"
+    },
+};
+
+int     main(int argc, char **argv)
+{
+    const struct testcase *tp;
+    int     pass;
+    int     fail;
+
+#define NUM_TESTS	sizeof(testcases)/sizeof(testcases[0])
+
+    msg_vstream_init(basename(argv[0]), VSTREAM_ERR);
+    util_utf8_enable = 1;
+
+    for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) {
+	char   *input;
+	char   *actual;
+	int     ok = 0;
+
+	/*
+	 * Notes:
+	 * 
+	 * - The input is modified, therefore it must be copied.
+	 * 
+	 * - The msg(3) functions use printable() which interferes when logging
+	 * inputs and outputs. Use vstream_fprintf() instead.
+	 */
+	vstream_fprintf(VSTREAM_ERR, "RUN  %s\n", tp->name);
+	input = mystrdup(tp->input);
+	actual = printable(input, '?');
+
+	if (strcmp(actual, tp->expected) != 0) {
+	    vstream_fprintf(VSTREAM_ERR, "input: >%s<, got: >%s<, want: >%s<\n",
+			    tp->input, actual, tp->expected);
+	} else {
+	    vstream_fprintf(VSTREAM_ERR, "input: >%s<, got and want: >%s<\n",
+			    tp->input, actual);
+	    ok = 1;
+	}
+	if (ok) {
+	    vstream_fprintf(VSTREAM_ERR, "PASS %s\n", tp->name);
+	    pass++;
+	} else {
+	    vstream_fprintf(VSTREAM_ERR, "FAIL %s\n", tp->name);
+	    fail++;
+	}
+	myfree(input);
+    }
+    msg_info("PASS=%d FAIL=%d", pass, fail);
+    return (fail > 0);
+}
+
+#endif
diff --git a/src/util/quote_for_json.c b/src/util/quote_for_json.c
new file mode 100644
index 0000000..f54af3f
--- /dev/null
+++ b/src/util/quote_for_json.c
@@ -0,0 +1,218 @@
+/*++
+/* NAME
+/*	quote_for_json 3
+/* SUMMARY
+/*	quote UTF-8 string value for JSON
+/* SYNOPSIS
+/*	#include <quote_for_json.h>
+/*
+/*	char	*quote_for_json(
+/*	VSTRING	*result,
+/*	const char *in,
+/*	ssize_t	len)
+/*
+/*	char	*quote_for_json_append(
+/*	VSTRING	*result,
+/*	const char *in,
+/*	ssize_t	len)
+/* DESCRIPTION
+/*	quote_for_json() takes well-formed UTF-8 encoded text,
+/*	quotes that text compliant with RFC 4627, and returns a
+/*	pointer to the resulting text. The input may contain null
+/*	bytes, but the output will not.
+/*
+/*	quote_for_json() produces short (two-letter) escape sequences
+/*	for common control characters, double quote and backslash.
+/*	It will not quote "/" (0x2F), and will quote DEL (0x7f) as
+/*	\u007F to make it printable. The input byte sequence "\uXXXX"
+/*	is quoted like any other text (the "\" is escaped as "\\").
+/*
+/*	quote_for_json() does not perform UTF-8 validation. The caller
+/*	should use valid_utf8_string() or printable() as appropriate.
+/*
+/*	quote_for_json_append() appends the output to the result buffer.
+/*
+/*	Arguments:
+/* .IP result
+/*	Storage for the result, resized automatically.
+/* .IP in
+/*	Pointer to the input byte sequence.
+/* .IP len
+/*	The length of the input byte sequence, or a negative number
+/*	when the byte sequence is null-terminated.
+/* DIAGNOSTICS
+/*	Fatal error: memory allocation error.
+/* LICENSE
+/* .ad
+/* .fi
+/*	The Secure Mailer license must be distributed with this software.
+/* AUTHOR(S)
+/*	Wietse Venema
+/*	Google, Inc.
+/*	111 8th Avenue
+/*	New York, NY 10011, USA
+/*
+/*	Wietse Venema
+/*	porcupine.org
+/*--*/
+
+ /*
+  * System library.
+  */
+#include <sys_defs.h>
+#include <ctype.h>
+#include <string.h>
+
+ /*
+  * Utility library.
+  */
+#include <stringops.h>
+#include <vstring.h>
+
+#define STR(x) vstring_str(x)
+
+/* quote_for_json_append - quote JSON string, append result */
+
+char   *quote_for_json_append(VSTRING *result, const char *text, ssize_t len)
+{
+    const char *cp;
+    int     ch;
+
+    if (len < 0)
+	len = strlen(text);
+
+    for (cp = text; len > 0; len--, cp++) {
+	ch = *(const unsigned char *) cp;
+	if (UNEXPECTED(ISCNTRL(ch))) {
+	    switch (ch) {
+	    case '\b':
+		VSTRING_ADDCH(result, '\\');
+		VSTRING_ADDCH(result, 'b');
+		break;
+	    case '\f':
+		VSTRING_ADDCH(result, '\\');
+		VSTRING_ADDCH(result, 'f');
+		break;
+	    case '\n':
+		VSTRING_ADDCH(result, '\\');
+		VSTRING_ADDCH(result, 'n');
+		break;
+	    case '\r':
+		VSTRING_ADDCH(result, '\\');
+		VSTRING_ADDCH(result, 'r');
+		break;
+	    case '\t':
+		VSTRING_ADDCH(result, '\\');
+		VSTRING_ADDCH(result, 't');
+		break;
+	    default:
+		/* All other controls including DEL and NUL. */
+		vstring_sprintf_append(result, "\\u%04X", ch);
+		break;
+	    }
+	} else {
+	    switch (ch) {
+	    case '\\':
+	    case '"':
+		VSTRING_ADDCH(result, '\\');
+		/* FALLTHROUGH */
+	    default:
+		/* Includes malformed UTF-8. */
+		VSTRING_ADDCH(result, ch);
+		break;
+	    }
+	}
+    }
+    VSTRING_TERMINATE(result);
+    return (STR(result));
+}
+
+/* quote_for_json - quote JSON string */
+
+char   *quote_for_json(VSTRING *result, const char *text, ssize_t len)
+{
+    VSTRING_RESET(result);
+    return (quote_for_json_append(result, text, len));
+}
+
+#ifdef TEST
+
+ /*
+  * System library.
+  */
+#include <stdlib.h>
+
+ /*
+  * Utility library.
+  */
+#include <msg.h>
+#include <msg_vstream.h>
+
+typedef struct TEST_CASE {
+    const char *label;			/* identifies test case */
+    char   *(*fn) (VSTRING *, const char *, ssize_t);
+    const char *input;			/* input string */
+    ssize_t input_len;			/* -1 or input length */
+    const char *exp_res;		/* expected result */
+} TEST_CASE;
+
+#define PASS	(0)
+#define FAIL	(1)
+
+ /*
+  * The test cases.
+  */
+static const TEST_CASE test_cases[] = {
+    {"ordinary ASCII text", quote_for_json,
+	" abcABC012.,[]{}/", -1, " abcABC012.,[]{}/",
+    },
+    {"quote_for_json_append", quote_for_json_append,
+	"foo", -1, " abcABC012.,[]{}/foo",
+    },
+    {"common control characters", quote_for_json,
+	"\b\f\r\n\t", -1, "\\b\\f\\r\\n\\t",
+    },
+    {"uncommon control characters and DEL", quote_for_json,
+	"\0\01\037\040\176\177", 6, "\\u0000\\u0001\\u001F ~\\u007F",
+    },
+    {"malformed UTF-8", quote_for_json,
+	"\\*\\uasd\\u007F\x80", -1, "\\\\*\\\\uasd\\\\u007F\x80",
+    },
+    0,
+};
+
+int     main(int argc, char **argv)
+{
+    const TEST_CASE *tp;
+    int     pass = 0;
+    int     fail = 0;
+    VSTRING *res_buf = vstring_alloc(100);
+
+    msg_vstream_init(sane_basename((VSTRING *) 0, argv[0]), VSTREAM_ERR);
+
+    for (tp = test_cases; tp->label != 0; tp++) {
+	int     test_fail = 0;
+	char   *res;
+
+	msg_info("RUN  %s", tp->label);
+	res = tp->fn(res_buf, tp->input, tp->input_len);
+	if (strcmp(res, tp->exp_res) != 0) {
+	    msg_warn("test case '%s': got '%s', want '%s'",
+		     tp->label, res, tp->exp_res);
+	    test_fail = 1;
+	}
+	if (test_fail) {
+	    fail++;
+	    msg_info("FAIL %s", tp->label);
+	    test_fail = 1;
+	} else {
+	    msg_info("PASS %s", tp->label);
+	    pass++;
+	}
+    }
+    msg_info("PASS=%d FAIL=%d", pass, fail);
+    vstring_free(res_buf);
+    exit(fail != 0);
+}
+
+#endif
diff --git a/src/util/readlline.c b/src/util/readlline.c
index 015877a..721b75f 100644
--- a/src/util/readlline.c
+++ b/src/util/readlline.c
@@ -85,9 +85,15 @@ VSTRING *readllines(VSTRING *buf, VSTREAM *fp, int *lineno, int *first_line)
     int     next;
     ssize_t start;
     char   *cp;
+    int     my_lineno = 0, my_first_line, got_null = 0;
 
     VSTRING_RESET(buf);
 
+    if (lineno == 0)
+	lineno = &my_lineno;
+    if (first_line == 0)
+	first_line = &my_first_line;
+
     /*
      * Ignore comment lines, all whitespace lines, and empty lines. Terminate
      * at EOF or at the beginning of the next logical line.
@@ -95,16 +101,19 @@ VSTRING *readllines(VSTRING *buf, VSTREAM *fp, int *lineno, int *first_line)
     for (;;) {
 	/* Read one line, possibly not newline terminated. */
 	start = LEN(buf);
-	while ((ch = VSTREAM_GETC(fp)) != VSTREAM_EOF && ch != '\n')
+	while ((ch = VSTREAM_GETC(fp)) != VSTREAM_EOF && ch != '\n') {
 	    VSTRING_ADDCH(buf, ch);
-	if (lineno != 0 && (ch == '\n' || LEN(buf) > start))
+	    if (ch == 0)
+		got_null = 1;
+	}
+	if (ch == '\n' || LEN(buf) > start)
 	    *lineno += 1;
 	/* Ignore comment line, all whitespace line, or empty line. */
 	for (cp = STR(buf) + start; cp < END(buf) && ISSPACE(*cp); cp++)
 	     /* void */ ;
 	if (cp == END(buf) || *cp == '#')
 	    vstring_truncate(buf, start);
-	else if (start == 0 && lineno != 0 && first_line != 0)
+	if (start == 0)
 	    *first_line = *lineno;
 	/* Terminate at EOF or at the beginning of the next logical line. */
 	if (ch == VSTREAM_EOF)
@@ -118,6 +127,20 @@ VSTRING *readllines(VSTRING *buf, VSTREAM *fp, int *lineno, int *first_line)
     }
     VSTRING_TERMINATE(buf);
 
+    /*
+     * This code does not care about embedded null bytes, but callers do.
+     */
+    if (got_null) {
+	const char *why = "text after null byte may be ignored";
+
+	if (*first_line == *lineno)
+	    msg_warn("%s, line %d: %s",
+		     VSTREAM_PATH(fp), *lineno, why);
+	else
+	    msg_warn("%s, line %d-%d: %s",
+		     VSTREAM_PATH(fp), *first_line, *lineno, why);
+    }
+
     /*
      * Invalid input: continuing text without preceding text. Allowing this
      * would complicate "postconf -e", which implements its own multi-line
@@ -136,3 +159,205 @@ VSTRING *readllines(VSTRING *buf, VSTREAM *fp, int *lineno, int *first_line)
      */
     return (LEN(buf) > 0 ? buf : 0);
 }
+
+ /*
+  * Stand-alone test program.
+  */
+#ifdef TEST
+#include <stdlib.h>
+#include <string.h>
+#include <msg.h>
+#include <msg_vstream.h>
+#include <stringops.h>
+#include <vstream.h>
+#include <vstring.h>
+
+ /*
+  * Test cases. Note: the input and exp_output fields are converted with
+  * unescape(). Embedded null bytes must be specified as \\0.
+  */
+struct testcase {
+    const char *name;
+    const char *input;
+    const char *exp_output;
+    int     exp_first_line;
+    int     exp_last_line;
+};
+
+static const struct testcase testcases[] = {
+    {"leading space before non-comment",
+	" abcde\nfghij\n",
+	"fghij",
+	2, 2
+	/* Expect "logical line must not start with whitespace" */
+    },
+    {"leading space before leading comment",
+	" #abcde\nfghij\n",
+	"fghij",
+	2, 2
+    },
+    {"leading #comment at beginning of line",
+	"#abc\ndef",
+	"def",
+	2, 2,
+    },
+    {"empty line before non-comment",
+	"\nabc\n",
+	"abc",
+	2, 2,
+    },
+    {"whitespace line before non-comment",
+	" \nabc\n",
+	"abc",
+	2, 2,
+    },
+    {"missing newline at end of non-comment",
+	"abc def",
+	"abc def",
+	1, 1,
+    },
+    {"missing newline at end of comment",
+	"#abc def",
+	"",
+	1, 1,
+    },
+    {"embedded null, single-line",
+	"abc\\0def",
+	"abc\\0def",
+	1, 1,
+	/* Expect "line 1: text after null byte may be ignored" */
+    },
+    {"embedded null, multiline",
+	"abc\\0\n def",
+	"abc\\0 def",
+	1, 2,
+	/* Expect "line 1-2: text after null byte may be ignored" */
+    },
+    {"embedded null in comment",
+	"#abc\\0\ndef",
+	"def",
+	2, 2,
+	/* Expect "line 2: text after null byte may be ignored" */
+    },
+    {"multiline input",
+	"abc\n def\n",
+	"abc def",
+	1, 2,
+    },
+    {"multiline input with embedded #comment after space",
+	"abc\n #def\n ghi",
+	"abc ghi",
+	1, 3,
+    },
+    {"multiline input with embedded #comment flush left",
+	"abc\n#def\n ghi",
+	"abc ghi",
+	1, 3,
+    },
+    {"multiline input with embedded whitespace line",
+	"abc\n \n ghi",
+	"abc ghi",
+	1, 3,
+    },
+    {"multiline input with embedded empty line",
+	"abc\n\n ghi",
+	"abc ghi",
+	1, 3,
+    },
+    {"multiline input with embedded #comment after space",
+	"abc\n #def\n",
+	"abc",
+	1, 2,
+    },
+    {"multiline input with embedded #comment flush left",
+	"abc\n#def\n",
+	"abc",
+	1, 2,
+    },
+    {"empty line at end of file",
+	"\n",
+	"",
+	1, 1,
+    },
+    {"whitespace line at end of file",
+	"\n \n",
+	"",
+	2, 2,
+    },
+    {"whitespace at end of file",
+	"abc\n ",
+	"abc",
+	1, 2,
+    },
+};
+
+int     main(int argc, char **argv)
+{
+    const struct testcase *tp;
+    VSTRING *inp_buf = vstring_alloc(100);
+    VSTRING *exp_buf = vstring_alloc(100);
+    VSTRING *out_buf = vstring_alloc(100);
+    VSTRING *esc_buf = vstring_alloc(100);
+    VSTREAM *fp;
+    int     last_line;
+    int     first_line;
+    int     pass;
+    int     fail;
+
+#define NUM_TESTS       sizeof(testcases)/sizeof(testcases[0])
+
+    msg_vstream_init(basename(argv[0]), VSTREAM_ERR);
+    util_utf8_enable = 1;
+
+    for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) {
+	int     ok = 0;
+
+	vstream_fprintf(VSTREAM_ERR, "RUN  %s\n", tp->name);
+	unescape(inp_buf, tp->input);
+	unescape(exp_buf, tp->exp_output);
+	if ((fp = vstream_memopen(inp_buf, O_RDONLY)) == 0)
+	    msg_panic("open memory stream for reading: %m");
+	vstream_control(fp, CA_VSTREAM_CTL_PATH("memory buffer"),
+			CA_VSTREAM_CTL_END);
+	last_line = 0;
+	if (readllines(out_buf, fp, &last_line, &first_line) == 0) {
+	    VSTRING_RESET(out_buf);
+	    VSTRING_TERMINATE(out_buf);
+	}
+	if (LEN(out_buf) != LEN(exp_buf)) {
+	    msg_warn("unexpected output length, got: %ld, want: %ld",
+		     (long) LEN(out_buf), (long) LEN(exp_buf));
+	} else if (memcmp(STR(out_buf), STR(exp_buf), LEN(out_buf)) != 0) {
+	    msg_warn("unexpected output: got: >%s<, want: >%s<",
+		     STR(escape(esc_buf, STR(out_buf), LEN(out_buf))),
+		     tp->exp_output);
+	} else if (first_line != tp->exp_first_line) {
+	    msg_warn("unexpected first_line: got: %d, want: %d",
+		     first_line, tp->exp_first_line);
+	} else if (last_line != tp->exp_last_line) {
+	    msg_warn("unexpected last_line: got: %d, want: %d",
+		     last_line, tp->exp_last_line);
+	} else {
+	    vstream_fprintf(VSTREAM_ERR, "got and want: >%s<\n",
+			    tp->exp_output);
+	    ok = 1;
+	}
+	if (ok) {
+	    vstream_fprintf(VSTREAM_ERR, "PASS %s\n", tp->name);
+	    pass++;
+	} else {
+	    vstream_fprintf(VSTREAM_ERR, "FAIL %s\n", tp->name);
+	    fail++;
+	}
+	vstream_fclose(fp);
+    }
+    vstring_free(inp_buf);
+    vstring_free(exp_buf);
+    vstring_free(out_buf);
+    vstring_free(esc_buf);
+
+    msg_info("PASS=%d FAIL=%d", pass, fail);
+    return (fail > 0);
+}
+
+#endif
diff --git a/src/util/stringops.h b/src/util/stringops.h
index 97aa597..db56f23 100644
--- a/src/util/stringops.h
+++ b/src/util/stringops.h
@@ -60,10 +60,13 @@ extern int allascii_len(const char *, ssize_t);
 extern const char *WARN_UNUSED_RESULT split_nameval(char *, char **, char **);
 extern const char *WARN_UNUSED_RESULT split_qnameval(char *, char **, char **);
 extern int valid_utf8_string(const char *, ssize_t);
+extern int valid_utf8_stringz(const char *);
 extern size_t balpar(const char *, const char *);
 extern char *WARN_UNUSED_RESULT extpar(char **, const char *, int);
 extern int strcasecmp_utf8x(int, const char *, const char *);
 extern int strncasecmp_utf8x(int, const char *, const char *, ssize_t);
+extern char *quote_for_json(VSTRING *, const char *, ssize_t);
+extern char *quote_for_json_append(VSTRING *, const char *, ssize_t);
 
 #define EXTPAR_FLAG_NONE	(0)
 #define EXTPAR_FLAG_STRIP	(1<<0)	/* "{ text }" -> "text" */
diff --git a/src/util/sys_defs.h b/src/util/sys_defs.h
index 9247185..62749ab 100644
--- a/src/util/sys_defs.h
+++ b/src/util/sys_defs.h
@@ -1331,6 +1331,13 @@ extern int dup2_pass_on_exec(int oldd, int newd);
 #undef HAVE_RES_SEND
 #endif
 
+ /*
+  * The RFC 5322 Date and Time Specification recommends single space between
+  * date-time tokens. To avoid breaking change, format all numerical days as
+  * two-digit days (i.e. days 1-9 now have a leading zero instead of space).
+  */
+#define TWO_DIGIT_DAY_IN_DATE_TIME
+
  /*
   * Check for required but missing definitions.
   */
diff --git a/src/util/valid_hostname.c b/src/util/valid_hostname.c
index 8b234c4..457d1f1 100644
--- a/src/util/valid_hostname.c
+++ b/src/util/valid_hostname.c
@@ -6,9 +6,9 @@
 /* SYNOPSIS
 /*	#include <valid_hostname.h>
 /*
-/*	int	valid_hostname(name, gripe)
+/*	int	valid_hostname(name, flags)
 /*	const char *name;
-/*	int	gripe;
+/*	int	flags;
 /*
 /*	int	valid_hostaddr(addr, gripe)
 /*	const char *addr;
@@ -32,6 +32,10 @@
 /*	dots, no leading or trailing dots or hyphens, no labels
 /*	longer than VALID_LABEL_LEN characters, and it should not
 /*	be all numeric.
+/*	The flags argument is the bit-wise or of zero or more of
+/*	DO_GRIPE or DO_WILDCARD (the latter allows the "*." name
+/*	prefix, which is rare but valid in some DNS responses and
+/*	queries).
 /*
 /*	valid_hostaddr() requires that the input is a valid string
 /*	representation of an IPv4 or IPv6 network address as
@@ -403,8 +407,9 @@ int     main(int unused_argc, char **argv)
 
     while (vstring_fgets_nonl(buffer, VSTREAM_IN)) {
 	msg_info("testing: \"%s\"", vstring_str(buffer));
-	valid_hostname(vstring_str(buffer), DO_GRIPE);
-	valid_hostaddr(vstring_str(buffer), DO_GRIPE);
+	valid_hostname(vstring_str(buffer), DO_GRIPE | DO_WILDCARD);
+	if (strchr(vstring_str(buffer), '*') == 0)
+	    valid_hostaddr(vstring_str(buffer), DO_GRIPE);
     }
     exit(0);
 }
diff --git a/src/util/valid_hostname.in b/src/util/valid_hostname.in
index 608c0d1..4cdf019 100644
--- a/src/util/valid_hostname.in
+++ b/src/util/valid_hostname.in
@@ -53,3 +53,9 @@ g:a:a:a:a:a:a:a
 a::b
 :a::b
 a::b:
+*.foo.bar
+*foo.bar
+foo.*.bar
+foo*bar
+foo.bar*
+*
diff --git a/src/util/valid_hostname.ref b/src/util/valid_hostname.ref
index 08b23b8..eccc558 100644
--- a/src/util/valid_hostname.ref
+++ b/src/util/valid_hostname.ref
@@ -141,3 +141,13 @@
 ./valid_hostname: testing: "a::b:"
 ./valid_hostname: warning: valid_hostname: invalid character 58(decimal): a::b:
 ./valid_hostname: warning: valid_ipv6_hostaddr: bad null last field in IPv6 address: a::b:
+./valid_hostname: testing: "*.foo.bar"
+./valid_hostname: testing: "*foo.bar"
+./valid_hostname: warning: valid_hostname: '*' can be the first label only: *foo.bar
+./valid_hostname: testing: "foo.*.bar"
+./valid_hostname: warning: valid_hostname: '*' can be the first label only: foo.*.bar
+./valid_hostname: testing: "foo*bar"
+./valid_hostname: warning: valid_hostname: '*' can be the first label only: foo*bar
+./valid_hostname: testing: "foo.bar*"
+./valid_hostname: warning: valid_hostname: '*' can be the first label only: foo.bar*
+./valid_hostname: testing: "*"
diff --git a/src/util/valid_utf8_string.c b/src/util/valid_utf8_string.c
index 96b5b4d..f5b4ff4 100644
--- a/src/util/valid_utf8_string.c
+++ b/src/util/valid_utf8_string.c
@@ -9,24 +9,24 @@
 /*	int	valid_utf8_string(str, len)
 /*	const char *str;
 /*	ssize_t	len;
+/*
+/*	int	valid_utf8_stringz(str)
+/*	const char *str;
+/*	ssize_t	len;
 /* DESCRIPTION
-/*	valid_utf8_string() determines if a string satisfies the UTF-8
-/*	definition in RFC 3629. That is, it contains proper encodings
-/*	of code points U+0000..U+10FFFF, excluding over-long encodings
-/*	and excluding U+D800..U+DFFF surrogates.
+/*	valid_utf8_string() determines if all bytes in a string
+/*	satisfy parse_utf8_char(3h) checks. See there for any
+/*	implementation limitations.
+/*
+/*	valid_utf8_stringz() determines the same for zero-terminated
+/*	strings.
 /*
 /*	A zero-length string is considered valid.
 /* DIAGNOSTICS
 /*	The result value is zero when the caller specifies a negative
-/*	length, or a string that violates RFC 3629, for example a
-/*	string that is truncated in the middle of a multi-byte
-/*	sequence.
-/* BUGS
-/*	But wait, there is more. Code points in the range U+FDD0..U+FDEF
-/*	and ending in FFFE or FFFF are non-characters in UNICODE. This
-/*	function does not block these.
+/*	length, or a string that does not pass parse_utf8_char(3h) checks.
 /* SEE ALSO
-/*	RFC 3629
+/*	parse_utf8_char(3h), parse one UTF-8 multibyte character
 /* LICENSE
 /* .ad
 /* .fi
@@ -36,6 +36,10 @@
 /*	IBM T.J. Watson Research
 /*	P.O. Box 704
 /*	Yorktown Heights, NY 10598, USA
+/*
+/*	Wietse Venema
+/*	porcupine.org
+/*	Amawalk, NY 10501, USA
 /*--*/
 
 /* System library. */
@@ -45,66 +49,50 @@
 /* Utility library. */
 
 #include <stringops.h>
+#include <parse_utf8_char.h>
 
 /* valid_utf8_string - validate string according to RFC 3629 */
 
 int     valid_utf8_string(const char *str, ssize_t len)
 {
-    const unsigned char *end = (const unsigned char *) str + len;
-    const unsigned char *cp;
-    unsigned char c0, ch;
+    const char *ep = str + len;
+    const char *cp;
+    const char *last;
 
     if (len < 0)
 	return (0);
-    if (len <= 0)
+    if (len == 0)
 	return (1);
 
     /*
-     * Optimized for correct input, time, space, and for CPUs that have a
-     * decent number of registers.
+     * Ideally, the compiler will inline parse_utf8_char().
      */
-    for (cp = (const unsigned char *) str; cp < end; cp++) {
-	/* Single-byte encodings. */
-	if (EXPECTED((c0 = *cp) <= 0x7f) /* we know that c0 >= 0x0 */ ) {
-	     /* void */ ;
-	}
-	/* Two-byte encodings. */
-	else if (EXPECTED(c0 <= 0xdf) /* we know that c0 >= 0x80 */ ) {
-	    /* Exclude over-long encodings. */
-	    if (UNEXPECTED(c0 < 0xc2)
-		|| UNEXPECTED(cp + 1 >= end)
-	    /* Require UTF-8 tail byte. */
-		|| UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80))
-		return (0);
-	}
-	/* Three-byte encodings. */
-	else if (EXPECTED(c0 <= 0xef) /* we know that c0 >= 0xe0 */ ) {
-	    if (UNEXPECTED(cp + 2 >= end)
-	    /* Exclude over-long encodings. */
-		|| UNEXPECTED((ch = *++cp) < (c0 == 0xe0 ? 0xa0 : 0x80))
-	    /* Exclude U+D800..U+DFFF. */
-		|| UNEXPECTED(ch > (c0 == 0xed ? 0x9f : 0xbf))
-	    /* Require UTF-8 tail byte. */
-		|| UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80))
-		return (0);
-	}
-	/* Four-byte encodings. */
-	else if (EXPECTED(c0 <= 0xf4) /* we know that c0 >= 0xf0 */ ) {
-	    if (UNEXPECTED(cp + 3 >= end)
-	    /* Exclude over-long encodings. */
-		|| UNEXPECTED((ch = *++cp) < (c0 == 0xf0 ? 0x90 : 0x80))
-	    /* Exclude code points above U+10FFFF. */
-		|| UNEXPECTED(ch > (c0 == 0xf4 ? 0x8f : 0xbf))
-	    /* Require UTF-8 tail byte. */
-		|| UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80)
-	    /* Require UTF-8 tail byte. */
-		|| UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80))
-		return (0);
-	}
-	/* Invalid: c0 >= 0xf5 */
-	else {
+    for (cp = str; cp < ep; cp++) {
+	if ((last = parse_utf8_char(cp, ep)) != 0)
+	    cp = last;
+	else
+	    return (0);
+    }
+    return (1);
+}
+
+/* valid_utf8_stringz - validate string according to RFC 3629 */
+
+int     valid_utf8_stringz(const char *str)
+{
+    const char *cp;
+    const char *last;
+
+    /*
+     * Ideally, the compiler will inline parse_utf8_char(), propagate the
+     * null pointer constant value, and eliminate code branches that test
+     * whether 0 != 0.
+     */
+    for (cp = str; *cp; cp++) {
+	if ((last = parse_utf8_char(cp, 0)) != 0)
+	    cp = last;
+	else
 	    return (0);
-	}
     }
     return (1);
 }
@@ -114,26 +102,139 @@ int     valid_utf8_string(const char *str, ssize_t len)
   */
 #ifdef TEST
 #include <stdlib.h>
+#include <string.h>
+#include <msg.h>
 #include <vstream.h>
-#include <vstring.h>
-#include <vstring_vstream.h>
+#include <msg_vstream.h>
+
+ /*
+  * Test cases for 1-, 2-, and 3-byte encodings. See printable.c for UTF8
+  * parser resychronization tests.
+  * 
+  * XXX Need a test for 4-byte encodings, preferably with strings that can be
+  * displayed.
+  * 
+  * XXX Need tests with hand-crafted over-long encodings and surrogates.
+  */
+struct testcase {
+    const char *name;
+    const char *input;
+    int     expected;
+};
 
-#define STR(x) vstring_str(x)
-#define LEN(x) VSTRING_LEN(x)
+#define T_VALID		(1)
+#define T_INVALID	(0)
+#define valid_to_str(v)	((v) ? "VALID" : "INVALID")
 
-int     main(void)
+static const struct testcase testcases[] = {
+    {"Printable ASCII",
+	"printable", T_VALID,
+    },
+    {"Latin script, accented, no error",
+	"na\303\257ve", T_VALID,
+    },
+    {"Latin script, accented, missing non-leading byte",
+	"na\303ve", T_INVALID,
+    },
+    {"Latin script, accented, missing leading byte",
+	"na\257ve", T_INVALID,
+    },
+    {"Viktor, Cyrillic, no error",
+	"\320\262\320\270\320\272\321\202\320\276\321\200", T_VALID,
+    },
+    {"Viktor, Cyrillic, missing non-leading byte",
+	"\320\262\320\320\272\321\202\320\276\321\200", T_INVALID,
+    },
+    {"Viktor, Cyrillic, missing leading byte",
+	"\320\262\270\320\272\321\202\320\276\321\200", T_INVALID,
+    },
+    {"Viktor, Cyrillic, truncated",
+	"\320\262\320\270\320\272\321\202\320\276\321", T_INVALID,
+    },
+    {"Viktor, Hebrew, no error",
+	"\327\225\327\231\327\247\327\230\327\225\326\274\327\250", T_VALID,
+    },
+    {"Viktor, Hebrew, missing leading byte",
+	"\327\225\231\327\247\327\230\327\225\326\274\327\250", T_INVALID,
+    },
+    {"Chinese (Simplified), no error",
+	"\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347"
+	"\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+	"\237\350\256\241\346\212\245\345\221\212", T_VALID,
+    },
+    {"Chinese (Simplified), missing leading byte",
+	"\344\270\255\345\233\275\344\272\222\350\201\224\275\221\347"
+	"\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+	"\237\350\256\241\346\212\245\345\221\212", T_INVALID,
+    },
+    {"Chinese (Simplified), missing first non-leading byte",
+	"\344\270\255\345\233\275\344\272\222\350\201\224\347\221\347"
+	"\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+	"\237\350\256\241\346\212\245\345\221\212", T_INVALID,
+    },
+    {"Chinese (Simplified), missing second non-leading byte",
+	"\344\270\255\345\233\275\344\272\222\350\201\224\347\275\347"
+	"\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+	"\237\350\256\241\346\212\245\345\221\212", T_INVALID,
+    },
+    {"Chinese (Simplified), truncated",
+	"\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347"
+	"\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+	"\237\350\256\241\346\212\245\345", T_INVALID,
+    },
+};
+
+int     main(int argc, char **argv)
 {
-    VSTRING *buf = vstring_alloc(1);
+    const struct testcase *tp;
+    int     pass;
+    int     fail;
+
+#define NUM_TESTS       sizeof(testcases)/sizeof(testcases[0])
+
+    msg_vstream_init(basename(argv[0]), VSTREAM_ERR);
+    util_utf8_enable = 1;
+
+    for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) {
+	int     actual_l;
+	int     actual_z;
+	int     ok = 0;
 
-    while (vstring_get_nonl(buf, VSTREAM_IN) != VSTREAM_EOF) {
-	vstream_printf("%c", (LEN(buf) && !valid_utf8_string(STR(buf), LEN(buf))) ?
-		       '!' : ' ');
-	vstream_fwrite(VSTREAM_OUT, STR(buf), LEN(buf));
-	vstream_printf("\n");
+	/*
+	 * Notes:
+	 * 
+	 * - The msg(3) functions use printable() which interferes when logging
+	 * inputs and outputs. Use vstream_fprintf() instead.
+	 */
+	vstream_fprintf(VSTREAM_ERR, "RUN  %s\n", tp->name);
+	actual_l = valid_utf8_string(tp->input, strlen(tp->input));
+	actual_z = valid_utf8_stringz(tp->input);
+
+	if (actual_l != tp->expected) {
+	    vstream_fprintf(VSTREAM_ERR,
+			  "input: >%s<, 'actual_l' got: >%s<, want: >%s<\n",
+			    tp->input, valid_to_str(actual_l),
+			    valid_to_str(tp->expected));
+	} else if (actual_z != tp->expected) {
+	    vstream_fprintf(VSTREAM_ERR,
+			  "input: >%s<, 'actual_z' got: >%s<, want: >%s<\n",
+			    tp->input, valid_to_str(actual_z),
+			    valid_to_str(tp->expected));
+	} else {
+	    vstream_fprintf(VSTREAM_ERR, "input: >%s<, got and want: >%s<\n",
+			    tp->input, valid_to_str(actual_l));
+	    ok = 1;
+	}
+	if (ok) {
+	    vstream_fprintf(VSTREAM_ERR, "PASS %s\n", tp->name);
+	    pass++;
+	} else {
+	    vstream_fprintf(VSTREAM_ERR, "FAIL %s\n", tp->name);
+	    fail++;
+	}
     }
-    vstream_fflush(VSTREAM_OUT);
-    vstring_free(buf);
-    exit(0);
+    msg_info("PASS=%d FAIL=%d", pass, fail);
+    return (fail > 0);
 }
 
 #endif
diff --git a/src/util/vstream.c b/src/util/vstream.c
index b4f9fbb..affbcc0 100644
--- a/src/util/vstream.c
+++ b/src/util/vstream.c
@@ -522,6 +522,7 @@
 /* System library. */
 
 #include <sys_defs.h>
+#include <sys/stat.h>
 #include <stdlib.h>			/* 44BSD stdarg.h uses abort() */
 #include <stdarg.h>
 #include <stddef.h>
@@ -1386,7 +1387,38 @@ VSTREAM *vstream_fopen(const char *path, int flags, mode_t mode)
     VSTREAM *stream;
     int     fd;
 
-    if ((fd = open(path, flags, mode)) < 0) {
+    /*
+     * To set permissions on new files only, we need to distinguish between
+     * creating a new file and opening an existing one.
+     */
+#define open_create(path, flags, mode) \
+	open((path), (flags) | (O_CREAT | O_EXCL), (mode))
+#define open_exist(path, flags, mode) \
+	open((path), (flags) & ~(O_CREAT | O_EXCL), (mode))
+
+    switch (flags & (O_CREAT | O_EXCL)) {
+    case O_CREAT:
+	fd = open_exist(path, flags, mode);
+	if (fd < 0 && errno == ENOENT) {
+	    fd = open_create(path, flags, mode);
+	    if (fd >= 0) {
+		if (fchmod(fd, mode) < 0)	/* can't uncreate */
+		    msg_warn("fchmod %s 0%o: %m", path, (unsigned) mode);
+	    } else if ( /* fd < 0 && */ errno == EEXIST)
+		fd = open_exist(path, flags, mode);
+	}
+	break;
+    case O_CREAT | O_EXCL:
+	fd = open(path, flags, mode);
+	if (fd >= 0)
+	    if (fchmod(fd, mode) < 0)		/* can't uncreate */
+		msg_warn("fchmod %s 0%o: %m", path, (unsigned) mode);
+	break;
+    default:
+	fd = open(path, flags, mode);
+	break;
+    }
+    if (fd < 0) {
 	return (0);
     } else {
 	stream = vstream_fdopen(fd, flags);
-- 
cgit v1.2.3