summaryrefslogtreecommitdiffstats
path: root/src/util
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/util/Makefile.in47
-rw-r--r--src/util/argv.c57
-rw-r--r--src/util/argv.h2
-rw-r--r--src/util/casefold.c2
-rw-r--r--src/util/dict_inline.c2
-rw-r--r--src/util/dict_thash.c6
-rw-r--r--src/util/dict_utf8.c4
-rw-r--r--src/util/inet_prefix_top.c1
-rw-r--r--src/util/logwriter.c34
-rw-r--r--src/util/logwriter.h1
-rw-r--r--src/util/midna_domain.c4
-rw-r--r--src/util/parse_utf8_char.h122
-rw-r--r--src/util/printable.c162
-rw-r--r--src/util/quote_for_json.c218
-rw-r--r--src/util/readlline.c231
-rw-r--r--src/util/stringops.h3
-rw-r--r--src/util/sys_defs.h7
-rw-r--r--src/util/valid_hostname.c13
-rw-r--r--src/util/valid_hostname.in6
-rw-r--r--src/util/valid_hostname.ref10
-rw-r--r--src/util/valid_utf8_string.c247
-rw-r--r--src/util/vstream.c34
22 files changed, 1100 insertions, 113 deletions
diff --git a/src/util/Makefile.in b/src/util/Makefile.in
index f69dec5..01211fb 100644
--- a/src/util/Makefile.in
+++ b/src/util/Makefile.in
@@ -45,7 +45,7 @@ SRCS = alldig.c allprint.c argv.c argv_split.c attr_clnt.c attr_print0.c \
byte_mask.c known_tcp_ports.c argv_split_at.c dict_stream.c \
sane_strtol.c hash_fnv.c ldseed.c mkmap_cdb.c mkmap_db.c mkmap_dbm.c \
mkmap_fail.c mkmap_lmdb.c mkmap_open.c mkmap_sdbm.c inet_prefix_top.c \
- inet_addr_sizes.c
+ inet_addr_sizes.c quote_for_json.c
OBJS = alldig.o allprint.o argv.o argv_split.o attr_clnt.o attr_print0.o \
attr_print64.o attr_print_plain.o attr_scan0.o attr_scan64.o \
attr_scan_plain.o auto_clnt.o base64_code.o basename.o binhash.o \
@@ -91,7 +91,8 @@ OBJS = alldig.o allprint.o argv.o argv_split.o attr_clnt.o attr_print0.o \
msg_logger.o logwriter.o unix_dgram_connect.o unix_dgram_listen.o \
byte_mask.o known_tcp_ports.o argv_split_at.o dict_stream.o \
sane_strtol.o hash_fnv.o ldseed.o mkmap_db.o mkmap_dbm.o \
- mkmap_fail.o mkmap_open.o inet_prefix_top.o inet_addr_sizes.o
+ mkmap_fail.o mkmap_open.o inet_prefix_top.o inet_addr_sizes.o \
+ quote_for_json.o
# MAP_OBJ is for maps that may be dynamically loaded with dynamicmaps.cf.
# When hard-linking these, makedefs sets NON_PLUGIN_MAP_OBJ=$(MAP_OBJ),
# otherwise it sets the PLUGIN_* macros.
@@ -145,7 +146,7 @@ TESTPROG= dict_open dup2_pass_on_exec events exec_command fifo_open \
vstream timecmp dict_cache midna_domain casefold strcasecmp_utf8 \
vbuf_print split_qnameval vstream msg_logger byte_mask \
known_tcp_ports dict_stream find_inet binhash hash_fnv argv \
- clean_env inet_prefix_top
+ clean_env inet_prefix_top printable readlline quote_for_json
PLUGIN_MAP_SO = $(LIB_PREFIX)pcre$(LIB_SUFFIX) $(LIB_PREFIX)lmdb$(LIB_SUFFIX) \
$(LIB_PREFIX)cdb$(LIB_SUFFIX) $(LIB_PREFIX)sdbm$(LIB_SUFFIX)
HTABLE_FIX = NORANDOMIZE=1
@@ -365,6 +366,16 @@ unescape: $(LIB)
$(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS)
mv junk $@.o
+printable: $(LIB)
+ mv $@.o junk
+ $(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS)
+ mv junk $@.o
+
+readlline: $(LIB)
+ mv $@.o junk
+ $(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS)
+ mv junk $@.o
+
hex_quote: $(LIB)
mv $@.o junk
$(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS)
@@ -609,6 +620,11 @@ inet_prefix_top: $(LIB)
$(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS)
mv junk $@.o
+quote_for_json: $(LIB)
+ mv $@.o junk
+ $(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS)
+ mv junk $@.o
+
tests: all valid_hostname_test mac_expand_test dict_test unescape_test \
hex_quote_test ctable_test inet_addr_list_test base64_code_test \
attr_scan64_test attr_scan0_test host_port_test dict_tests \
@@ -618,7 +634,8 @@ tests: all valid_hostname_test mac_expand_test dict_test unescape_test \
strcasecmp_utf8_test vbuf_print_test miss_endif_cidr_test \
miss_endif_regexp_test split_qnameval_test vstring_test \
vstream_test byte_mask_tests mystrtok_test known_tcp_ports_test \
- binhash_test argv_test inet_prefix_top_test
+ binhash_test argv_test inet_prefix_top_test printable_test \
+ valid_utf8_string_test readlline_test quote_for_json_test
dict_tests: all dict_test \
dict_pcre_tests dict_cidr_test dict_thash_test dict_static_test \
@@ -650,6 +667,15 @@ unescape_test: unescape unescape.in unescape.ref
# diff unescape.in unescape.tmp
rm -f unescape.tmp
+printable_test: printable
+ $(SHLIB_ENV) ${VALGRIND} ./printable
+
+readlline_test: readlline
+ $(SHLIB_ENV) ${VALGRIND} ./readlline
+
+valid_utf8_string_test: valid_utf8_string
+ $(SHLIB_ENV) ${VALGRIND} ./valid_utf8_string
+
hex_quote_test: hex_quote
$(SHLIB_ENV) ${VALGRIND} ./hex_quote <hex_quote.c | od -cb >hex_quote.tmp
od -cb <hex_quote.c >hex_quote.ref
@@ -1083,6 +1109,9 @@ argv_test: argv
inet_prefix_top_test: inet_prefix_top
$(SHLIB_ENV) ${VALGRIND} ./inet_prefix_top
+quote_for_json_test: quote_for_json
+ $(SHLIB_ENV) ${VALGRIND} ./quote_for_json
+
depend: $(MAKES)
(sed '1,/^# do not edit/!d' Makefile.in; \
set -e; for i in [a-z][a-z0-9]*.c; do \
@@ -1119,9 +1148,12 @@ allspace.o: vbuf.h
allspace.o: vstring.h
argv.o: argv.c
argv.o: argv.h
+argv.o: check_arg.h
argv.o: msg.h
argv.o: mymalloc.h
argv.o: sys_defs.h
+argv.o: vbuf.h
+argv.o: vstring.h
argv_attr_print.o: argv.h
argv_attr_print.o: argv_attr.h
argv_attr_print.o: argv_attr_print.c
@@ -2157,6 +2189,7 @@ logwriter.o: logwriter.c
logwriter.o: logwriter.h
logwriter.o: msg.h
logwriter.o: mymalloc.h
+logwriter.o: name_code.h
logwriter.o: safe_open.h
logwriter.o: sys_defs.h
logwriter.o: vbuf.h
@@ -2525,11 +2558,16 @@ posix_signals.o: posix_signals.c
posix_signals.o: posix_signals.h
posix_signals.o: sys_defs.h
printable.o: check_arg.h
+printable.o: parse_utf8_char.h
printable.o: printable.c
printable.o: stringops.h
printable.o: sys_defs.h
printable.o: vbuf.h
printable.o: vstring.h
+quote_for_json.o: quote_for_json.c
+quote_for_json.o: stringops.h
+quote_for_json.o: sys_defs.h
+quote_for_json.o: vstring.h
rand_sleep.o: iostuff.h
rand_sleep.o: msg.h
rand_sleep.o: myrand.h
@@ -2848,6 +2886,7 @@ valid_utf8_hostname.o: valid_utf8_hostname.h
valid_utf8_hostname.o: vbuf.h
valid_utf8_hostname.o: vstring.h
valid_utf8_string.o: check_arg.h
+valid_utf8_string.o: parse_utf8_char.h
valid_utf8_string.o: stringops.h
valid_utf8_string.o: sys_defs.h
valid_utf8_string.o: valid_utf8_string.c
diff --git a/src/util/argv.c b/src/util/argv.c
index 4e05fd0..332426e 100644
--- a/src/util/argv.c
+++ b/src/util/argv.c
@@ -53,6 +53,11 @@
/* ssize_t pos;
/* ssize_t how_many;
/*
+/* char *argv_join(buf, argvp, delim)
+/* VSTRING *buf;
+/* ARGV *argvp;
+/* int delim;
+/*
/* void ARGV_FAKE_BEGIN(argv, arg)
/* const char *arg;
/*
@@ -109,6 +114,10 @@
/* starting at the specified array position. The result is
/* null-terminated.
/*
+/* argv_join() joins all elements in an array using the
+/* specified delimiter value, and appends the result to the
+/* specified buffer.
+/*
/* ARGV_FAKE_BEGIN/END are an optimization for the case where
/* a single string needs to be passed into an ARGV-based
/* interface. ARGV_FAKE_BEGIN() opens a statement block and
@@ -148,6 +157,7 @@
#include "mymalloc.h"
#include "msg.h"
+#include "vstring.h"
#include "argv.h"
#ifdef TEST
@@ -379,6 +389,20 @@ void argv_delete(ARGV *argvp, ssize_t first, ssize_t how_many)
argvp->argc -= how_many;
}
+/* argv_join - concatenate array elements with delimiter */
+
+char *argv_join(VSTRING *buf, ARGV *argv, int delim)
+{
+ char **cpp;
+
+ for (cpp = argv->argv; *cpp; cpp++) {
+ vstring_strcat(buf, *cpp);
+ if (cpp[1])
+ VSTRING_ADDCH(buf, delim);
+ }
+ return (vstring_str(buf));
+}
+
#ifdef TEST
/*
@@ -402,6 +426,7 @@ typedef struct TEST_CASE {
const char *exp_panic_msg; /* expected panic */
int exp_argc; /* expected array length */
const char *exp_argv[ARRAY_LEN]; /* expected array content */
+ int join_delim; /* argv_join() delimiter */
} TEST_CASE;
#define TERMINATE_ARRAY (1)
@@ -559,6 +584,24 @@ static ARGV *test_argv_bad_delete3(const TEST_CASE *tp, ARGV *argvp)
return (argvp);
}
+/* test_argv_join - populate, join, and overwrite */
+
+static ARGV *test_argv_join(const TEST_CASE *tp, ARGV *argvp)
+{
+ VSTRING *buf = vstring_alloc(100);
+
+ /*
+ * Impedance mismatch: argv_join() produces output to VSTRING, but the
+ * test fixture wants output to ARGV.
+ */
+ test_argv_populate(tp, argvp);
+ argv_join(buf, argvp, tp->join_delim);
+ argv_delete(argvp, 0, argvp->argc);
+ argv_add(argvp, vstring_str(buf), ARGV_END);
+ vstring_free(buf);
+ return (argvp);
+}
+
/* test_argv_verify - verify result */
static int test_argv_verify(const TEST_CASE *tp, ARGV *argvp)
@@ -573,7 +616,7 @@ static int test_argv_verify(const TEST_CASE *tp, ARGV *argvp)
}
if (strcmp(vstring_str(test_panic_str), tp->exp_panic_msg) != 0) {
msg_warn("test case '%s': got '%s', want: '%s'",
- tp->label, vstring_str(test_panic_str), tp->exp_panic_msg);
+ tp->label, vstring_str(test_panic_str), tp->exp_panic_msg);
return (FAIL);
}
return (PASS);
@@ -682,6 +725,18 @@ static const TEST_CASE test_cases[] = {
{"foo", "baz", "bar", 0}, 0, test_argv_bad_delete3,
"argv_delete bad range: (start=100 count=1)"
},
+ {"argv_join, multiple strings",
+ {"foo", "baz", "bar", 0}, 0, test_argv_join,
+ 0, 1, {"foo:baz:bar", 0}, ':'
+ },
+ {"argv_join, one string",
+ {"foo", 0}, 0, test_argv_join,
+ 0, 1, {"foo", 0}, ':'
+ },
+ {"argv_join, empty",
+ {0}, 0, test_argv_join,
+ 0, 1, {"", 0}, ':'
+ },
0,
};
diff --git a/src/util/argv.h b/src/util/argv.h
index b0098ce..f1e746a 100644
--- a/src/util/argv.h
+++ b/src/util/argv.h
@@ -33,6 +33,8 @@ extern void argv_truncate(ARGV *, ssize_t);
extern void argv_insert_one(ARGV *, ssize_t, const char *);
extern void argv_replace_one(ARGV *, ssize_t, const char *);
extern void argv_delete(ARGV *, ssize_t, ssize_t);
+struct VSTRING;
+extern char *argv_join(struct VSTRING *buf, ARGV *, int);
extern ARGV *argv_free(ARGV *);
extern ARGV *argv_split(const char *, const char *);
diff --git a/src/util/casefold.c b/src/util/casefold.c
index d3ebd4b..94860b8 100644
--- a/src/util/casefold.c
+++ b/src/util/casefold.c
@@ -300,7 +300,7 @@ int main(int argc, char **argv)
encode_utf8(buffer, codepoint);
if (msg_verbose)
vstream_printf("U+%X -> %s\n", codepoint, STR(buffer));
- if (valid_utf8_string(STR(buffer), LEN(buffer)) == 0)
+ if (valid_utf8_stringz(STR(buffer)) == 0)
msg_fatal("bad utf-8 encoding for U+%X\n", codepoint);
casefold(dest, STR(buffer));
}
diff --git a/src/util/dict_inline.c b/src/util/dict_inline.c
index 72339b2..d7f9344 100644
--- a/src/util/dict_inline.c
+++ b/src/util/dict_inline.c
@@ -87,7 +87,7 @@ DICT *dict_inline_open(const char *name, int open_flags, int dict_flags)
*/
if (DICT_NEED_UTF8_ACTIVATION(util_utf8_enable, dict_flags)
&& allascii(name) == 0
- && valid_utf8_string(name, strlen(name)) == 0)
+ && valid_utf8_stringz(name) == 0)
DICT_INLINE_RETURN(dict_surrogate(DICT_TYPE_INLINE, name,
open_flags, dict_flags,
"bad UTF-8 syntax: \"%s:%s\"; "
diff --git a/src/util/dict_thash.c b/src/util/dict_thash.c
index 69eb17b..bae4a63 100644
--- a/src/util/dict_thash.c
+++ b/src/util/dict_thash.c
@@ -127,7 +127,7 @@ DICT *dict_thash_open(const char *path, int open_flags, int dict_flags)
*/
if ((dict->flags & DICT_FLAG_UTF8_ACTIVE)
&& allascii(STR(line_buffer)) == 0
- && valid_utf8_string(STR(line_buffer), LEN(line_buffer)) == 0) {
+ && valid_utf8_stringz(STR(line_buffer)) == 0) {
msg_warn("%s, line %d: non-UTF-8 input \"%s\""
" -- ignoring this line",
VSTREAM_PATH(fp), lineno, STR(line_buffer));
@@ -181,8 +181,8 @@ DICT *dict_thash_open(const char *path, int open_flags, int dict_flags)
" is this an alias file?", path, lineno);
/*
- * Optionally treat the value as a filename, and replace the value
- * with the BASE64-encoded content of the named file.
+ * Optionally treat the value as a filename, and replace the
+ * value with the BASE64-encoded content of the named file.
*/
if (dict_flags & DICT_FLAG_SRC_RHS_IS_FILE) {
VSTRING *base64_buf;
diff --git a/src/util/dict_utf8.c b/src/util/dict_utf8.c
index f1fc65a..9bb6b7b 100644
--- a/src/util/dict_utf8.c
+++ b/src/util/dict_utf8.c
@@ -100,7 +100,7 @@ static char *dict_utf8_check_fold(DICT *dict, const char *string,
/*
* Validate UTF-8 without casefolding.
*/
- if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) {
+ if (!allascii(string) && valid_utf8_stringz(string) == 0) {
if (err)
*err = "malformed UTF-8 or invalid codepoint";
return (0);
@@ -123,7 +123,7 @@ static char *dict_utf8_check_fold(DICT *dict, const char *string,
static int dict_utf8_check(const char *string, CONST_CHAR_STAR *err)
{
- if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) {
+ if (!allascii(string) && valid_utf8_stringz(string) == 0) {
if (err)
*err = "malformed UTF-8 or invalid codepoint";
return (0);
diff --git a/src/util/inet_prefix_top.c b/src/util/inet_prefix_top.c
index 8d5af00..f35d5f0 100644
--- a/src/util/inet_prefix_top.c
+++ b/src/util/inet_prefix_top.c
@@ -164,6 +164,7 @@ int main(int argc, char **argv)
msg_info("PASS %s/%d", str_name_code(af_map, tp->in_af),
tp->in_prefix_len);
}
+ myfree(act_prefix);
}
msg_info("PASS=%d FAIL=%d", pass, fail);
return (fail > 0);
diff --git a/src/util/logwriter.c b/src/util/logwriter.c
index aea2767..4a18be3 100644
--- a/src/util/logwriter.c
+++ b/src/util/logwriter.c
@@ -21,6 +21,9 @@
/* const char *path,
/* const char *buffer,
/* ssize_t buflen)
+/*
+/* int set_logwriter_create_perms(
+/* const char *mode)
/* DESCRIPTION
/* This module manages a logfile writer.
/*
@@ -38,6 +41,15 @@
/* logwriter_one_shot() combines all the above operations. The
/* result is zero if successful, VSTREAM_EOF if any operation
/* failed.
+/*
+/* set_logwriter_create_perms() sets the file permissions that
+/* will be used when creating a logfile. Valid inputs are
+/* "644", "640", and "600". Leading zeros are allowed and
+/* ignored.
+/* DIAGNOSTICS
+/* Fatal error: logfile create error; warning: logfile permission
+/* change error. set_logwriter_create_perms() returns the file
+/* create permission if the request is valid, -1 otherwise.
/* LICENSE
/* .ad
/* .fi
@@ -66,10 +78,12 @@
#include <mymalloc.h>
#include <safe_open.h>
#include <vstream.h>
+#include <name_code.h>
/*
* Application-specific.
*/
+static int logwriter_perms = 0600;
/* logwriter_open_or_die - open logfile */
@@ -82,7 +96,7 @@ VSTREAM *logwriter_open_or_die(const char *path)
#define NO_CHOWN (-1)
#define NO_CHGRP (-1)
- fp = safe_open(path, O_CREAT | O_WRONLY | O_APPEND, 0644,
+ fp = safe_open(path, O_CREAT | O_WRONLY | O_APPEND, logwriter_perms,
NO_STATP, NO_CHOWN, NO_CHGRP, why);
if (fp == 0)
msg_fatal("open logfile '%s': %s", path, vstring_str(why));
@@ -122,3 +136,21 @@ int logwriter_one_shot(const char *path, const char *buf, ssize_t len)
err |= logwriter_close(fp);
return (err ? VSTREAM_EOF : 0);
}
+
+/* set_logwriter_create_perms - logfile permission control */
+
+int set_logwriter_create_perms(const char *mode_str)
+{
+ static const NAME_CODE sane_perms[] = {
+ "644", 0644,
+ "640", 0640,
+ "600", 0600,
+ 0, -1,
+ };
+ int perms;
+
+ if ((perms = name_code(sane_perms, NAME_CODE_FLAG_NONE,
+ mode_str + strspn(mode_str, "0"))) != -1)
+ logwriter_perms = perms;
+ return (perms);
+}
diff --git a/src/util/logwriter.h b/src/util/logwriter.h
index f5266e4..c827d25 100644
--- a/src/util/logwriter.h
+++ b/src/util/logwriter.h
@@ -23,6 +23,7 @@ extern VSTREAM *logwriter_open_or_die(const char *);
extern int logwriter_write(VSTREAM *, const char *, ssize_t);
extern int logwriter_close(VSTREAM *);
extern int logwriter_one_shot(const char *, const char *, ssize_t);
+extern int set_logwriter_create_perms(const char *);
/* LICENSE
/* .ad
diff --git a/src/util/midna_domain.c b/src/util/midna_domain.c
index 333a5c9..bc016b6 100644
--- a/src/util/midna_domain.c
+++ b/src/util/midna_domain.c
@@ -178,7 +178,7 @@ static void *midna_domain_to_ascii_create(const char *name, void *unused_context
/*
* Paranoia: do not expose uidna_*() to unfiltered network data.
*/
- if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
+ if (allascii(name) == 0 && valid_utf8_stringz(name) == 0) {
msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
myname, name, "malformed UTF-8");
return (0);
@@ -232,7 +232,7 @@ static void *midna_domain_to_utf8_create(const char *name, void *unused_context)
/*
* Paranoia: do not expose uidna_*() to unfiltered network data.
*/
- if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
+ if (allascii(name) == 0 && valid_utf8_stringz(name) == 0) {
msg_warn("%s: Problem translating domain \"%.100s\" to UTF-8 form: %s",
myname, name, "malformed UTF-8");
return (0);
diff --git a/src/util/parse_utf8_char.h b/src/util/parse_utf8_char.h
new file mode 100644
index 0000000..b00a1c2
--- /dev/null
+++ b/src/util/parse_utf8_char.h
@@ -0,0 +1,122 @@
+/*++
+/* NAME
+/* parse_utf8_char 3h
+/* SUMMARY
+/* parse one UTF-8 multibyte character
+/* SYNOPSIS
+/* #include <parse_utf8_char.h>
+/*
+/* char *parse_utf8_char(str, end)
+/* const char *str;
+/* const char *end;
+/* DESCRIPTION
+/* parse_utf8_char() determines if the byte sequence starting
+/* at \fBstr\fR begins with a complete UTF-8 character as
+/* defined in RFC 3629. That is, a proper encoding of code
+/* points U+0000..U+10FFFF, excluding over-long encodings and
+/* excluding U+D800..U+DFFF surrogates.
+/*
+/* When the byte sequence starting at \fBstr\fR begins with a
+/* complete UTF-8 character, this function returns a pointer
+/* to the last byte in that character. Otherwise, it returns
+/* a null pointer.
+/*
+/* The \fBend\fR argument is either null (the byte sequence
+/* starting at \fBstr\fR must be null terminated), or \fBend
+/* - str\fR specifies the length of the byte sequence.
+/* BUGS
+/* Code points in the range U+FDD0..U+FDEF and ending in FFFE
+/* or FFFF are non-characters in UNICODE. This function does
+/* not reject these.
+/* LICENSE
+/* .ad
+/* .fi
+/* The Secure Mailer license must be distributed with this software.
+/* AUTHOR(S)
+/* Wietse Venema
+/* IBM T.J. Watson Research
+/* P.O. Box 704
+/* Yorktown Heights, NY 10598, USA
+/*
+/* Wietse Venema
+/* porcupine.org
+/* Amawalk, NY 10501, USA
+/*--*/
+
+ /*
+ * System library.
+ */
+#include <sys_defs.h>
+
+#ifdef NO_INLINE
+#define inline /* */
+#endif
+
+/* parse_utf8_char - parse and validate one UTF8 multibyte sequence */
+
+static inline char *parse_utf8_char(const char *str, const char *end)
+{
+ const unsigned char *cp = (const unsigned char *) str;
+ const unsigned char *ep = (const unsigned char *) end;
+ unsigned char c0, ch;
+
+ /*
+ * Optimized for correct input, time, space, and for CPUs that have a
+ * decent number of registers. Other implementation considerations:
+ *
+ * - In the UTF-8 encoding, a non-leading byte is never null. Therefore,
+ * this function will correctly reject a partial UTF-8 character at the
+ * end of a null-terminated string.
+ *
+ * - If the "end" argument is a null constant, and if this function is
+ * inlined, then an optimizing compiler should propagate the constant
+ * through the "ep" variable, and eliminate any code branches that
+ * require ep != 0.
+ */
+ /* Single-byte encodings. */
+ if (EXPECTED((c0 = *cp) <= 0x7f) /* we know that c0 >= 0x0 */ ) {
+ return ((char *) cp);
+ }
+ /* Two-byte encodings. */
+ else if (EXPECTED(c0 <= 0xdf) /* we know that c0 >= 0x80 */ ) {
+ /* Exclude over-long encodings. */
+ if (UNEXPECTED(c0 < 0xc2)
+ || UNEXPECTED(ep && cp + 1 >= ep)
+ /* Require UTF-8 tail byte. */
+ || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80))
+ return (0);
+ return ((char *) cp);
+ }
+ /* Three-byte encodings. */
+ else if (EXPECTED(c0 <= 0xef) /* we know that c0 >= 0xe0 */ ) {
+ if (UNEXPECTED(ep && cp + 2 >= ep)
+ /* Exclude over-long encodings. */
+ || UNEXPECTED((ch = *++cp) < (c0 == 0xe0 ? 0xa0 : 0x80))
+ /* Exclude U+D800..U+DFFF. */
+ || UNEXPECTED(ch > (c0 == 0xed ? 0x9f : 0xbf))
+ /* Require UTF-8 tail byte. */
+ || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80))
+ return (0);
+ return ((char *) cp);
+ }
+ /* Four-byte encodings. */
+ else if (EXPECTED(c0 <= 0xf4) /* we know that c0 >= 0xf0 */ ) {
+ if (UNEXPECTED(ep && cp + 3 >= ep)
+ /* Exclude over-long encodings. */
+ || UNEXPECTED((ch = *++cp) < (c0 == 0xf0 ? 0x90 : 0x80))
+ /* Exclude code points above U+10FFFF. */
+ || UNEXPECTED(ch > (c0 == 0xf4 ? 0x8f : 0xbf))
+ /* Require UTF-8 tail byte. */
+ || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80)
+ /* Require UTF-8 tail byte. */
+ || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80))
+ return (0);
+ return ((char *) cp);
+ }
+ /* Invalid: c0 >= 0xf5 */
+ else {
+ return (0);
+ }
+}
+
+#undef inline
diff --git a/src/util/printable.c b/src/util/printable.c
index 6c148fd..0e1ae19 100644
--- a/src/util/printable.c
+++ b/src/util/printable.c
@@ -45,6 +45,10 @@
/* Google, Inc.
/* 111 8th Avenue
/* New York, NY 10011, USA
+/*
+/* Wietse Venema
+/* porcupine.org
+/* Amawalk, NY 10501, USA
/*--*/
/* System library. */
@@ -56,8 +60,9 @@
/* Utility library. */
#include "stringops.h"
+#include "parse_utf8_char.h"
-int util_utf8_enable = 0;
+int util_utf8_enable = 0;
/* printable - binary compatibility */
@@ -74,27 +79,150 @@ char *printable(char *string, int replacement)
char *printable_except(char *string, int replacement, const char *except)
{
- unsigned char *cp;
+ char *cp;
+ char *last;
int ch;
/*
- * XXX Replace invalid UTF8 sequences (too short, over-long encodings,
- * out-of-range code points, etc). See valid_utf8_string.c.
+ * In case of a non-UTF8 sequence (bad leader byte, bad non-leader byte,
+ * over-long encodings, out-of-range code points, etc), replace the first
+ * byte, and try to resynchronize at the next byte.
*/
- cp = (unsigned char *) string;
- while ((ch = *cp) != 0) {
- if (ISASCII(ch) && (ISPRINT(ch) || (except && strchr(except, ch)))) {
- /* ok */
- } else if (util_utf8_enable && ch >= 194 && ch <= 254
- && cp[1] >= 128 && cp[1] < 192) {
- /* UTF8; skip the rest of the bytes in the character. */
- while (cp[1] >= 128 && cp[1] < 192)
- cp++;
- } else {
- /* Not ASCII and not UTF8. */
- *cp = replacement;
+#define PRINT_OR_EXCEPT(ch) (ISPRINT(ch) || (except && strchr(except, ch)))
+
+ for (cp = string; (ch = *(unsigned char *) cp) != 0; cp++) {
+ if (util_utf8_enable == 0) {
+ if (ISASCII(ch) && PRINT_OR_EXCEPT(ch))
+ continue;
+ } else if ((last = parse_utf8_char(cp, 0)) == cp) { /* ASCII */
+ if (PRINT_OR_EXCEPT(ch))
+ continue;
+ } else if (last != 0) { /* Other UTF8 */
+ cp = last;
+ continue;
}
- cp++;
+ *cp = replacement;
}
return (string);
}
+
+#ifdef TEST
+
+#include <stdlib.h>
+#include <string.h>
+#include <msg.h>
+#include <msg_vstream.h>
+#include <mymalloc.h>
+#include <vstream.h>
+
+ /*
+ * Test cases for 1-, 2-, and 3-byte encodings. Originally contributed by
+ * Viktor Dukhovni, and annotated using translate.google.com.
+ *
+ * See valid_utf8_string.c for single-error tests.
+ *
+ * XXX Need a test for 4-byte encodings, preferably with strings that can be
+ * displayed.
+ */
+struct testcase {
+ const char *name;
+ const char *input;
+ const char *expected;;
+};
+static const struct testcase testcases[] = {
+ {"Printable ASCII",
+ "printable", "printable"
+ },
+ {"ASCII with control character",
+ "non\bn-printable", "non?n-printable"
+ },
+ {"Latin accented text, no error",
+ "na\303\257ve", "na\303\257ve"
+ },
+ {"Latin text, with error",
+ "na\303ve", "na?ve"
+ },
+ {"Viktor, Cyrillic, no error",
+ "\320\262\320\270\320\272\321\202\320\276\321\200",
+ "\320\262\320\270\320\272\321\202\320\276\321\200"
+ },
+ {"Viktor, Cyrillic, two errors",
+ "\320\262\320\320\272\272\321\202\320\276\321\200",
+ "\320\262?\320\272?\321\202\320\276\321\200"
+ },
+ {"Viktor, Hebrew, no error",
+ "\327\225\327\231\327\247\327\230\327\225\326\274\327\250",
+ "\327\225\327\231\327\247\327\230\327\225\326\274\327\250"
+ },
+ {"Viktor, Hebrew, with error",
+ "\327\225\231\327\247\327\230\327\225\326\274\327\250",
+ "\327\225?\327\247\327\230\327\225\326\274\327\250"
+ },
+ {"Chinese (Simplified), no error",
+ "\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347"
+ "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+ "\237\350\256\241\346\212\245\345\221\212",
+ "\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347"
+ "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+ "\237\350\256\241\346\212\245\345\221\212"
+ },
+ {"Chinese (Simplified), with errors",
+ "\344\270\255\345\344\272\222\350\224\347\275\221\347"
+ "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+ "\237\350\256\241\346\212\245\345",
+ "\344\270\255?\344\272\222??\347\275\221\347"
+ "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+ "\237\350\256\241\346\212\245?"
+ },
+};
+
+int main(int argc, char **argv)
+{
+ const struct testcase *tp;
+ int pass;
+ int fail;
+
+#define NUM_TESTS sizeof(testcases)/sizeof(testcases[0])
+
+ msg_vstream_init(basename(argv[0]), VSTREAM_ERR);
+ util_utf8_enable = 1;
+
+ for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) {
+ char *input;
+ char *actual;
+ int ok = 0;
+
+ /*
+ * Notes:
+ *
+ * - The input is modified, therefore it must be copied.
+ *
+ * - The msg(3) functions use printable() which interferes when logging
+ * inputs and outputs. Use vstream_fprintf() instead.
+ */
+ vstream_fprintf(VSTREAM_ERR, "RUN %s\n", tp->name);
+ input = mystrdup(tp->input);
+ actual = printable(input, '?');
+
+ if (strcmp(actual, tp->expected) != 0) {
+ vstream_fprintf(VSTREAM_ERR, "input: >%s<, got: >%s<, want: >%s<\n",
+ tp->input, actual, tp->expected);
+ } else {
+ vstream_fprintf(VSTREAM_ERR, "input: >%s<, got and want: >%s<\n",
+ tp->input, actual);
+ ok = 1;
+ }
+ if (ok) {
+ vstream_fprintf(VSTREAM_ERR, "PASS %s\n", tp->name);
+ pass++;
+ } else {
+ vstream_fprintf(VSTREAM_ERR, "FAIL %s\n", tp->name);
+ fail++;
+ }
+ myfree(input);
+ }
+ msg_info("PASS=%d FAIL=%d", pass, fail);
+ return (fail > 0);
+}
+
+#endif
diff --git a/src/util/quote_for_json.c b/src/util/quote_for_json.c
new file mode 100644
index 0000000..f54af3f
--- /dev/null
+++ b/src/util/quote_for_json.c
@@ -0,0 +1,218 @@
+/*++
+/* NAME
+/* quote_for_json 3
+/* SUMMARY
+/* quote UTF-8 string value for JSON
+/* SYNOPSIS
+/* #include <quote_for_json.h>
+/*
+/* char *quote_for_json(
+/* VSTRING *result,
+/* const char *in,
+/* ssize_t len)
+/*
+/* char *quote_for_json_append(
+/* VSTRING *result,
+/* const char *in,
+/* ssize_t len)
+/* DESCRIPTION
+/* quote_for_json() takes well-formed UTF-8 encoded text,
+/* quotes that text compliant with RFC 4627, and returns a
+/* pointer to the resulting text. The input may contain null
+/* bytes, but the output will not.
+/*
+/* quote_for_json() produces short (two-letter) escape sequences
+/* for common control characters, double quote and backslash.
+/* It will not quote "/" (0x2F), and will quote DEL (0x7f) as
+/* \u007F to make it printable. The input byte sequence "\uXXXX"
+/* is quoted like any other text (the "\" is escaped as "\\").
+/*
+/* quote_for_json() does not perform UTF-8 validation. The caller
+/* should use valid_utf8_string() or printable() as appropriate.
+/*
+/* quote_for_json_append() appends the output to the result buffer.
+/*
+/* Arguments:
+/* .IP result
+/* Storage for the result, resized automatically.
+/* .IP in
+/* Pointer to the input byte sequence.
+/* .IP len
+/* The length of the input byte sequence, or a negative number
+/* when the byte sequence is null-terminated.
+/* DIAGNOSTICS
+/* Fatal error: memory allocation error.
+/* LICENSE
+/* .ad
+/* .fi
+/* The Secure Mailer license must be distributed with this software.
+/* AUTHOR(S)
+/* Wietse Venema
+/* Google, Inc.
+/* 111 8th Avenue
+/* New York, NY 10011, USA
+/*
+/* Wietse Venema
+/* porcupine.org
+/*--*/
+
+ /*
+ * System library.
+ */
+#include <sys_defs.h>
+#include <ctype.h>
+#include <string.h>
+
+ /*
+ * Utility library.
+ */
+#include <stringops.h>
+#include <vstring.h>
+
+#define STR(x) vstring_str(x)
+
+/* quote_for_json_append - quote JSON string, append result */
+
+char *quote_for_json_append(VSTRING *result, const char *text, ssize_t len)
+{
+ const char *cp;
+ int ch;
+
+ if (len < 0)
+ len = strlen(text);
+
+ for (cp = text; len > 0; len--, cp++) {
+ ch = *(const unsigned char *) cp;
+ if (UNEXPECTED(ISCNTRL(ch))) {
+ switch (ch) {
+ case '\b':
+ VSTRING_ADDCH(result, '\\');
+ VSTRING_ADDCH(result, 'b');
+ break;
+ case '\f':
+ VSTRING_ADDCH(result, '\\');
+ VSTRING_ADDCH(result, 'f');
+ break;
+ case '\n':
+ VSTRING_ADDCH(result, '\\');
+ VSTRING_ADDCH(result, 'n');
+ break;
+ case '\r':
+ VSTRING_ADDCH(result, '\\');
+ VSTRING_ADDCH(result, 'r');
+ break;
+ case '\t':
+ VSTRING_ADDCH(result, '\\');
+ VSTRING_ADDCH(result, 't');
+ break;
+ default:
+ /* All other controls including DEL and NUL. */
+ vstring_sprintf_append(result, "\\u%04X", ch);
+ break;
+ }
+ } else {
+ switch (ch) {
+ case '\\':
+ case '"':
+ VSTRING_ADDCH(result, '\\');
+ /* FALLTHROUGH */
+ default:
+ /* Includes malformed UTF-8. */
+ VSTRING_ADDCH(result, ch);
+ break;
+ }
+ }
+ }
+ VSTRING_TERMINATE(result);
+ return (STR(result));
+}
+
+/* quote_for_json - quote JSON string */
+
+char *quote_for_json(VSTRING *result, const char *text, ssize_t len)
+{
+ VSTRING_RESET(result);
+ return (quote_for_json_append(result, text, len));
+}
+
+#ifdef TEST
+
+ /*
+ * System library.
+ */
+#include <stdlib.h>
+
+ /*
+ * Utility library.
+ */
+#include <msg.h>
+#include <msg_vstream.h>
+
+typedef struct TEST_CASE {
+ const char *label; /* identifies test case */
+ char *(*fn) (VSTRING *, const char *, ssize_t);
+ const char *input; /* input string */
+ ssize_t input_len; /* -1 or input length */
+ const char *exp_res; /* expected result */
+} TEST_CASE;
+
+#define PASS (0)
+#define FAIL (1)
+
+ /*
+ * The test cases.
+ */
+static const TEST_CASE test_cases[] = {
+ {"ordinary ASCII text", quote_for_json,
+ " abcABC012.,[]{}/", -1, " abcABC012.,[]{}/",
+ },
+ {"quote_for_json_append", quote_for_json_append,
+ "foo", -1, " abcABC012.,[]{}/foo",
+ },
+ {"common control characters", quote_for_json,
+ "\b\f\r\n\t", -1, "\\b\\f\\r\\n\\t",
+ },
+ {"uncommon control characters and DEL", quote_for_json,
+ "\0\01\037\040\176\177", 6, "\\u0000\\u0001\\u001F ~\\u007F",
+ },
+ {"malformed UTF-8", quote_for_json,
+ "\\*\\uasd\\u007F\x80", -1, "\\\\*\\\\uasd\\\\u007F\x80",
+ },
+ 0,
+};
+
+int main(int argc, char **argv)
+{
+ const TEST_CASE *tp;
+ int pass = 0;
+ int fail = 0;
+ VSTRING *res_buf = vstring_alloc(100);
+
+ msg_vstream_init(sane_basename((VSTRING *) 0, argv[0]), VSTREAM_ERR);
+
+ for (tp = test_cases; tp->label != 0; tp++) {
+ int test_fail = 0;
+ char *res;
+
+ msg_info("RUN %s", tp->label);
+ res = tp->fn(res_buf, tp->input, tp->input_len);
+ if (strcmp(res, tp->exp_res) != 0) {
+ msg_warn("test case '%s': got '%s', want '%s'",
+ tp->label, res, tp->exp_res);
+ test_fail = 1;
+ }
+ if (test_fail) {
+ fail++;
+ msg_info("FAIL %s", tp->label);
+ test_fail = 1;
+ } else {
+ msg_info("PASS %s", tp->label);
+ pass++;
+ }
+ }
+ msg_info("PASS=%d FAIL=%d", pass, fail);
+ vstring_free(res_buf);
+ exit(fail != 0);
+}
+
+#endif
diff --git a/src/util/readlline.c b/src/util/readlline.c
index 015877a..721b75f 100644
--- a/src/util/readlline.c
+++ b/src/util/readlline.c
@@ -85,9 +85,15 @@ VSTRING *readllines(VSTRING *buf, VSTREAM *fp, int *lineno, int *first_line)
int next;
ssize_t start;
char *cp;
+ int my_lineno = 0, my_first_line, got_null = 0;
VSTRING_RESET(buf);
+ if (lineno == 0)
+ lineno = &my_lineno;
+ if (first_line == 0)
+ first_line = &my_first_line;
+
/*
* Ignore comment lines, all whitespace lines, and empty lines. Terminate
* at EOF or at the beginning of the next logical line.
@@ -95,16 +101,19 @@ VSTRING *readllines(VSTRING *buf, VSTREAM *fp, int *lineno, int *first_line)
for (;;) {
/* Read one line, possibly not newline terminated. */
start = LEN(buf);
- while ((ch = VSTREAM_GETC(fp)) != VSTREAM_EOF && ch != '\n')
+ while ((ch = VSTREAM_GETC(fp)) != VSTREAM_EOF && ch != '\n') {
VSTRING_ADDCH(buf, ch);
- if (lineno != 0 && (ch == '\n' || LEN(buf) > start))
+ if (ch == 0)
+ got_null = 1;
+ }
+ if (ch == '\n' || LEN(buf) > start)
*lineno += 1;
/* Ignore comment line, all whitespace line, or empty line. */
for (cp = STR(buf) + start; cp < END(buf) && ISSPACE(*cp); cp++)
/* void */ ;
if (cp == END(buf) || *cp == '#')
vstring_truncate(buf, start);
- else if (start == 0 && lineno != 0 && first_line != 0)
+ if (start == 0)
*first_line = *lineno;
/* Terminate at EOF or at the beginning of the next logical line. */
if (ch == VSTREAM_EOF)
@@ -119,6 +128,20 @@ VSTRING *readllines(VSTRING *buf, VSTREAM *fp, int *lineno, int *first_line)
VSTRING_TERMINATE(buf);
/*
+ * This code does not care about embedded null bytes, but callers do.
+ */
+ if (got_null) {
+ const char *why = "text after null byte may be ignored";
+
+ if (*first_line == *lineno)
+ msg_warn("%s, line %d: %s",
+ VSTREAM_PATH(fp), *lineno, why);
+ else
+ msg_warn("%s, line %d-%d: %s",
+ VSTREAM_PATH(fp), *first_line, *lineno, why);
+ }
+
+ /*
* Invalid input: continuing text without preceding text. Allowing this
* would complicate "postconf -e", which implements its own multi-line
* parsing routine. Do not abort, just warn, so that critical programs
@@ -136,3 +159,205 @@ VSTRING *readllines(VSTRING *buf, VSTREAM *fp, int *lineno, int *first_line)
*/
return (LEN(buf) > 0 ? buf : 0);
}
+
+ /*
+ * Stand-alone test program.
+ */
+#ifdef TEST
+#include <stdlib.h>
+#include <string.h>
+#include <msg.h>
+#include <msg_vstream.h>
+#include <stringops.h>
+#include <vstream.h>
+#include <vstring.h>
+
+ /*
+ * Test cases. Note: the input and exp_output fields are converted with
+ * unescape(). Embedded null bytes must be specified as \\0.
+ */
+struct testcase {
+ const char *name;
+ const char *input;
+ const char *exp_output;
+ int exp_first_line;
+ int exp_last_line;
+};
+
+static const struct testcase testcases[] = {
+ {"leading space before non-comment",
+ " abcde\nfghij\n",
+ "fghij",
+ 2, 2
+ /* Expect "logical line must not start with whitespace" */
+ },
+ {"leading space before leading comment",
+ " #abcde\nfghij\n",
+ "fghij",
+ 2, 2
+ },
+ {"leading #comment at beginning of line",
+ "#abc\ndef",
+ "def",
+ 2, 2,
+ },
+ {"empty line before non-comment",
+ "\nabc\n",
+ "abc",
+ 2, 2,
+ },
+ {"whitespace line before non-comment",
+ " \nabc\n",
+ "abc",
+ 2, 2,
+ },
+ {"missing newline at end of non-comment",
+ "abc def",
+ "abc def",
+ 1, 1,
+ },
+ {"missing newline at end of comment",
+ "#abc def",
+ "",
+ 1, 1,
+ },
+ {"embedded null, single-line",
+ "abc\\0def",
+ "abc\\0def",
+ 1, 1,
+ /* Expect "line 1: text after null byte may be ignored" */
+ },
+ {"embedded null, multiline",
+ "abc\\0\n def",
+ "abc\\0 def",
+ 1, 2,
+ /* Expect "line 1-2: text after null byte may be ignored" */
+ },
+ {"embedded null in comment",
+ "#abc\\0\ndef",
+ "def",
+ 2, 2,
+ /* Expect "line 2: text after null byte may be ignored" */
+ },
+ {"multiline input",
+ "abc\n def\n",
+ "abc def",
+ 1, 2,
+ },
+ {"multiline input with embedded #comment after space",
+ "abc\n #def\n ghi",
+ "abc ghi",
+ 1, 3,
+ },
+ {"multiline input with embedded #comment flush left",
+ "abc\n#def\n ghi",
+ "abc ghi",
+ 1, 3,
+ },
+ {"multiline input with embedded whitespace line",
+ "abc\n \n ghi",
+ "abc ghi",
+ 1, 3,
+ },
+ {"multiline input with embedded empty line",
+ "abc\n\n ghi",
+ "abc ghi",
+ 1, 3,
+ },
+ {"multiline input with embedded #comment after space",
+ "abc\n #def\n",
+ "abc",
+ 1, 2,
+ },
+ {"multiline input with embedded #comment flush left",
+ "abc\n#def\n",
+ "abc",
+ 1, 2,
+ },
+ {"empty line at end of file",
+ "\n",
+ "",
+ 1, 1,
+ },
+ {"whitespace line at end of file",
+ "\n \n",
+ "",
+ 2, 2,
+ },
+ {"whitespace at end of file",
+ "abc\n ",
+ "abc",
+ 1, 2,
+ },
+};
+
+int main(int argc, char **argv)
+{
+ const struct testcase *tp;
+ VSTRING *inp_buf = vstring_alloc(100);
+ VSTRING *exp_buf = vstring_alloc(100);
+ VSTRING *out_buf = vstring_alloc(100);
+ VSTRING *esc_buf = vstring_alloc(100);
+ VSTREAM *fp;
+ int last_line;
+ int first_line;
+ int pass;
+ int fail;
+
+#define NUM_TESTS sizeof(testcases)/sizeof(testcases[0])
+
+ msg_vstream_init(basename(argv[0]), VSTREAM_ERR);
+ util_utf8_enable = 1;
+
+ for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) {
+ int ok = 0;
+
+ vstream_fprintf(VSTREAM_ERR, "RUN %s\n", tp->name);
+ unescape(inp_buf, tp->input);
+ unescape(exp_buf, tp->exp_output);
+ if ((fp = vstream_memopen(inp_buf, O_RDONLY)) == 0)
+ msg_panic("open memory stream for reading: %m");
+ vstream_control(fp, CA_VSTREAM_CTL_PATH("memory buffer"),
+ CA_VSTREAM_CTL_END);
+ last_line = 0;
+ if (readllines(out_buf, fp, &last_line, &first_line) == 0) {
+ VSTRING_RESET(out_buf);
+ VSTRING_TERMINATE(out_buf);
+ }
+ if (LEN(out_buf) != LEN(exp_buf)) {
+ msg_warn("unexpected output length, got: %ld, want: %ld",
+ (long) LEN(out_buf), (long) LEN(exp_buf));
+ } else if (memcmp(STR(out_buf), STR(exp_buf), LEN(out_buf)) != 0) {
+ msg_warn("unexpected output: got: >%s<, want: >%s<",
+ STR(escape(esc_buf, STR(out_buf), LEN(out_buf))),
+ tp->exp_output);
+ } else if (first_line != tp->exp_first_line) {
+ msg_warn("unexpected first_line: got: %d, want: %d",
+ first_line, tp->exp_first_line);
+ } else if (last_line != tp->exp_last_line) {
+ msg_warn("unexpected last_line: got: %d, want: %d",
+ last_line, tp->exp_last_line);
+ } else {
+ vstream_fprintf(VSTREAM_ERR, "got and want: >%s<\n",
+ tp->exp_output);
+ ok = 1;
+ }
+ if (ok) {
+ vstream_fprintf(VSTREAM_ERR, "PASS %s\n", tp->name);
+ pass++;
+ } else {
+ vstream_fprintf(VSTREAM_ERR, "FAIL %s\n", tp->name);
+ fail++;
+ }
+ vstream_fclose(fp);
+ }
+ vstring_free(inp_buf);
+ vstring_free(exp_buf);
+ vstring_free(out_buf);
+ vstring_free(esc_buf);
+
+ msg_info("PASS=%d FAIL=%d", pass, fail);
+ return (fail > 0);
+}
+
+#endif
diff --git a/src/util/stringops.h b/src/util/stringops.h
index 97aa597..db56f23 100644
--- a/src/util/stringops.h
+++ b/src/util/stringops.h
@@ -60,10 +60,13 @@ extern int allascii_len(const char *, ssize_t);
extern const char *WARN_UNUSED_RESULT split_nameval(char *, char **, char **);
extern const char *WARN_UNUSED_RESULT split_qnameval(char *, char **, char **);
extern int valid_utf8_string(const char *, ssize_t);
+extern int valid_utf8_stringz(const char *);
extern size_t balpar(const char *, const char *);
extern char *WARN_UNUSED_RESULT extpar(char **, const char *, int);
extern int strcasecmp_utf8x(int, const char *, const char *);
extern int strncasecmp_utf8x(int, const char *, const char *, ssize_t);
+extern char *quote_for_json(VSTRING *, const char *, ssize_t);
+extern char *quote_for_json_append(VSTRING *, const char *, ssize_t);
#define EXTPAR_FLAG_NONE (0)
#define EXTPAR_FLAG_STRIP (1<<0) /* "{ text }" -> "text" */
diff --git a/src/util/sys_defs.h b/src/util/sys_defs.h
index 9247185..62749ab 100644
--- a/src/util/sys_defs.h
+++ b/src/util/sys_defs.h
@@ -1332,6 +1332,13 @@ extern int dup2_pass_on_exec(int oldd, int newd);
#endif
/*
+ * The RFC 5322 Date and Time Specification recommends single space between
+ * date-time tokens. To avoid breaking change, format all numerical days as
+ * two-digit days (i.e. days 1-9 now have a leading zero instead of space).
+ */
+#define TWO_DIGIT_DAY_IN_DATE_TIME
+
+ /*
* Check for required but missing definitions.
*/
#if !defined(HAS_FCNTL_LOCK) && !defined(HAS_FLOCK_LOCK)
diff --git a/src/util/valid_hostname.c b/src/util/valid_hostname.c
index 8b234c4..457d1f1 100644
--- a/src/util/valid_hostname.c
+++ b/src/util/valid_hostname.c
@@ -6,9 +6,9 @@
/* SYNOPSIS
/* #include <valid_hostname.h>
/*
-/* int valid_hostname(name, gripe)
+/* int valid_hostname(name, flags)
/* const char *name;
-/* int gripe;
+/* int flags;
/*
/* int valid_hostaddr(addr, gripe)
/* const char *addr;
@@ -32,6 +32,10 @@
/* dots, no leading or trailing dots or hyphens, no labels
/* longer than VALID_LABEL_LEN characters, and it should not
/* be all numeric.
+/* The flags argument is the bit-wise or of zero or more of
+/* DO_GRIPE or DO_WILDCARD (the latter allows the "*." name
+/* prefix, which is rare but valid in some DNS responses and
+/* queries).
/*
/* valid_hostaddr() requires that the input is a valid string
/* representation of an IPv4 or IPv6 network address as
@@ -403,8 +407,9 @@ int main(int unused_argc, char **argv)
while (vstring_fgets_nonl(buffer, VSTREAM_IN)) {
msg_info("testing: \"%s\"", vstring_str(buffer));
- valid_hostname(vstring_str(buffer), DO_GRIPE);
- valid_hostaddr(vstring_str(buffer), DO_GRIPE);
+ valid_hostname(vstring_str(buffer), DO_GRIPE | DO_WILDCARD);
+ if (strchr(vstring_str(buffer), '*') == 0)
+ valid_hostaddr(vstring_str(buffer), DO_GRIPE);
}
exit(0);
}
diff --git a/src/util/valid_hostname.in b/src/util/valid_hostname.in
index 608c0d1..4cdf019 100644
--- a/src/util/valid_hostname.in
+++ b/src/util/valid_hostname.in
@@ -53,3 +53,9 @@ g:a:a:a:a:a:a:a
a::b
:a::b
a::b:
+*.foo.bar
+*foo.bar
+foo.*.bar
+foo*bar
+foo.bar*
+*
diff --git a/src/util/valid_hostname.ref b/src/util/valid_hostname.ref
index 08b23b8..eccc558 100644
--- a/src/util/valid_hostname.ref
+++ b/src/util/valid_hostname.ref
@@ -141,3 +141,13 @@
./valid_hostname: testing: "a::b:"
./valid_hostname: warning: valid_hostname: invalid character 58(decimal): a::b:
./valid_hostname: warning: valid_ipv6_hostaddr: bad null last field in IPv6 address: a::b:
+./valid_hostname: testing: "*.foo.bar"
+./valid_hostname: testing: "*foo.bar"
+./valid_hostname: warning: valid_hostname: '*' can be the first label only: *foo.bar
+./valid_hostname: testing: "foo.*.bar"
+./valid_hostname: warning: valid_hostname: '*' can be the first label only: foo.*.bar
+./valid_hostname: testing: "foo*bar"
+./valid_hostname: warning: valid_hostname: '*' can be the first label only: foo*bar
+./valid_hostname: testing: "foo.bar*"
+./valid_hostname: warning: valid_hostname: '*' can be the first label only: foo.bar*
+./valid_hostname: testing: "*"
diff --git a/src/util/valid_utf8_string.c b/src/util/valid_utf8_string.c
index 96b5b4d..f5b4ff4 100644
--- a/src/util/valid_utf8_string.c
+++ b/src/util/valid_utf8_string.c
@@ -9,24 +9,24 @@
/* int valid_utf8_string(str, len)
/* const char *str;
/* ssize_t len;
+/*
+/* int valid_utf8_stringz(str)
+/* const char *str;
+/* ssize_t len;
/* DESCRIPTION
-/* valid_utf8_string() determines if a string satisfies the UTF-8
-/* definition in RFC 3629. That is, it contains proper encodings
-/* of code points U+0000..U+10FFFF, excluding over-long encodings
-/* and excluding U+D800..U+DFFF surrogates.
+/* valid_utf8_string() determines if all bytes in a string
+/* satisfy parse_utf8_char(3h) checks. See there for any
+/* implementation limitations.
+/*
+/* valid_utf8_stringz() determines the same for zero-terminated
+/* strings.
/*
/* A zero-length string is considered valid.
/* DIAGNOSTICS
/* The result value is zero when the caller specifies a negative
-/* length, or a string that violates RFC 3629, for example a
-/* string that is truncated in the middle of a multi-byte
-/* sequence.
-/* BUGS
-/* But wait, there is more. Code points in the range U+FDD0..U+FDEF
-/* and ending in FFFE or FFFF are non-characters in UNICODE. This
-/* function does not block these.
+/* length, or a string that does not pass parse_utf8_char(3h) checks.
/* SEE ALSO
-/* RFC 3629
+/* parse_utf8_char(3h), parse one UTF-8 multibyte character
/* LICENSE
/* .ad
/* .fi
@@ -36,6 +36,10 @@
/* IBM T.J. Watson Research
/* P.O. Box 704
/* Yorktown Heights, NY 10598, USA
+/*
+/* Wietse Venema
+/* porcupine.org
+/* Amawalk, NY 10501, USA
/*--*/
/* System library. */
@@ -45,66 +49,50 @@
/* Utility library. */
#include <stringops.h>
+#include <parse_utf8_char.h>
/* valid_utf8_string - validate string according to RFC 3629 */
int valid_utf8_string(const char *str, ssize_t len)
{
- const unsigned char *end = (const unsigned char *) str + len;
- const unsigned char *cp;
- unsigned char c0, ch;
+ const char *ep = str + len;
+ const char *cp;
+ const char *last;
if (len < 0)
return (0);
- if (len <= 0)
+ if (len == 0)
return (1);
/*
- * Optimized for correct input, time, space, and for CPUs that have a
- * decent number of registers.
+ * Ideally, the compiler will inline parse_utf8_char().
*/
- for (cp = (const unsigned char *) str; cp < end; cp++) {
- /* Single-byte encodings. */
- if (EXPECTED((c0 = *cp) <= 0x7f) /* we know that c0 >= 0x0 */ ) {
- /* void */ ;
- }
- /* Two-byte encodings. */
- else if (EXPECTED(c0 <= 0xdf) /* we know that c0 >= 0x80 */ ) {
- /* Exclude over-long encodings. */
- if (UNEXPECTED(c0 < 0xc2)
- || UNEXPECTED(cp + 1 >= end)
- /* Require UTF-8 tail byte. */
- || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80))
- return (0);
- }
- /* Three-byte encodings. */
- else if (EXPECTED(c0 <= 0xef) /* we know that c0 >= 0xe0 */ ) {
- if (UNEXPECTED(cp + 2 >= end)
- /* Exclude over-long encodings. */
- || UNEXPECTED((ch = *++cp) < (c0 == 0xe0 ? 0xa0 : 0x80))
- /* Exclude U+D800..U+DFFF. */
- || UNEXPECTED(ch > (c0 == 0xed ? 0x9f : 0xbf))
- /* Require UTF-8 tail byte. */
- || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80))
- return (0);
- }
- /* Four-byte encodings. */
- else if (EXPECTED(c0 <= 0xf4) /* we know that c0 >= 0xf0 */ ) {
- if (UNEXPECTED(cp + 3 >= end)
- /* Exclude over-long encodings. */
- || UNEXPECTED((ch = *++cp) < (c0 == 0xf0 ? 0x90 : 0x80))
- /* Exclude code points above U+10FFFF. */
- || UNEXPECTED(ch > (c0 == 0xf4 ? 0x8f : 0xbf))
- /* Require UTF-8 tail byte. */
- || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80)
- /* Require UTF-8 tail byte. */
- || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80))
- return (0);
- }
- /* Invalid: c0 >= 0xf5 */
- else {
+ for (cp = str; cp < ep; cp++) {
+ if ((last = parse_utf8_char(cp, ep)) != 0)
+ cp = last;
+ else
+ return (0);
+ }
+ return (1);
+}
+
+/* valid_utf8_stringz - validate string according to RFC 3629 */
+
+int valid_utf8_stringz(const char *str)
+{
+ const char *cp;
+ const char *last;
+
+ /*
+ * Ideally, the compiler will inline parse_utf8_char(), propagate the
+ * null pointer constant value, and eliminate code branches that test
+ * whether 0 != 0.
+ */
+ for (cp = str; *cp; cp++) {
+ if ((last = parse_utf8_char(cp, 0)) != 0)
+ cp = last;
+ else
return (0);
- }
}
return (1);
}
@@ -114,26 +102,139 @@ int valid_utf8_string(const char *str, ssize_t len)
*/
#ifdef TEST
#include <stdlib.h>
+#include <string.h>
+#include <msg.h>
#include <vstream.h>
-#include <vstring.h>
-#include <vstring_vstream.h>
+#include <msg_vstream.h>
+
+ /*
+ * Test cases for 1-, 2-, and 3-byte encodings. See printable.c for UTF8
+ * parser resychronization tests.
+ *
+ * XXX Need a test for 4-byte encodings, preferably with strings that can be
+ * displayed.
+ *
+ * XXX Need tests with hand-crafted over-long encodings and surrogates.
+ */
+struct testcase {
+ const char *name;
+ const char *input;
+ int expected;
+};
-#define STR(x) vstring_str(x)
-#define LEN(x) VSTRING_LEN(x)
+#define T_VALID (1)
+#define T_INVALID (0)
+#define valid_to_str(v) ((v) ? "VALID" : "INVALID")
-int main(void)
+static const struct testcase testcases[] = {
+ {"Printable ASCII",
+ "printable", T_VALID,
+ },
+ {"Latin script, accented, no error",
+ "na\303\257ve", T_VALID,
+ },
+ {"Latin script, accented, missing non-leading byte",
+ "na\303ve", T_INVALID,
+ },
+ {"Latin script, accented, missing leading byte",
+ "na\257ve", T_INVALID,
+ },
+ {"Viktor, Cyrillic, no error",
+ "\320\262\320\270\320\272\321\202\320\276\321\200", T_VALID,
+ },
+ {"Viktor, Cyrillic, missing non-leading byte",
+ "\320\262\320\320\272\321\202\320\276\321\200", T_INVALID,
+ },
+ {"Viktor, Cyrillic, missing leading byte",
+ "\320\262\270\320\272\321\202\320\276\321\200", T_INVALID,
+ },
+ {"Viktor, Cyrillic, truncated",
+ "\320\262\320\270\320\272\321\202\320\276\321", T_INVALID,
+ },
+ {"Viktor, Hebrew, no error",
+ "\327\225\327\231\327\247\327\230\327\225\326\274\327\250", T_VALID,
+ },
+ {"Viktor, Hebrew, missing leading byte",
+ "\327\225\231\327\247\327\230\327\225\326\274\327\250", T_INVALID,
+ },
+ {"Chinese (Simplified), no error",
+ "\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347"
+ "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+ "\237\350\256\241\346\212\245\345\221\212", T_VALID,
+ },
+ {"Chinese (Simplified), missing leading byte",
+ "\344\270\255\345\233\275\344\272\222\350\201\224\275\221\347"
+ "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+ "\237\350\256\241\346\212\245\345\221\212", T_INVALID,
+ },
+ {"Chinese (Simplified), missing first non-leading byte",
+ "\344\270\255\345\233\275\344\272\222\350\201\224\347\221\347"
+ "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+ "\237\350\256\241\346\212\245\345\221\212", T_INVALID,
+ },
+ {"Chinese (Simplified), missing second non-leading byte",
+ "\344\270\255\345\233\275\344\272\222\350\201\224\347\275\347"
+ "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+ "\237\350\256\241\346\212\245\345\221\212", T_INVALID,
+ },
+ {"Chinese (Simplified), truncated",
+ "\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347"
+ "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
+ "\237\350\256\241\346\212\245\345", T_INVALID,
+ },
+};
+
+int main(int argc, char **argv)
{
- VSTRING *buf = vstring_alloc(1);
+ const struct testcase *tp;
+ int pass;
+ int fail;
+
+#define NUM_TESTS sizeof(testcases)/sizeof(testcases[0])
+
+ msg_vstream_init(basename(argv[0]), VSTREAM_ERR);
+ util_utf8_enable = 1;
+
+ for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) {
+ int actual_l;
+ int actual_z;
+ int ok = 0;
- while (vstring_get_nonl(buf, VSTREAM_IN) != VSTREAM_EOF) {
- vstream_printf("%c", (LEN(buf) && !valid_utf8_string(STR(buf), LEN(buf))) ?
- '!' : ' ');
- vstream_fwrite(VSTREAM_OUT, STR(buf), LEN(buf));
- vstream_printf("\n");
+ /*
+ * Notes:
+ *
+ * - The msg(3) functions use printable() which interferes when logging
+ * inputs and outputs. Use vstream_fprintf() instead.
+ */
+ vstream_fprintf(VSTREAM_ERR, "RUN %s\n", tp->name);
+ actual_l = valid_utf8_string(tp->input, strlen(tp->input));
+ actual_z = valid_utf8_stringz(tp->input);
+
+ if (actual_l != tp->expected) {
+ vstream_fprintf(VSTREAM_ERR,
+ "input: >%s<, 'actual_l' got: >%s<, want: >%s<\n",
+ tp->input, valid_to_str(actual_l),
+ valid_to_str(tp->expected));
+ } else if (actual_z != tp->expected) {
+ vstream_fprintf(VSTREAM_ERR,
+ "input: >%s<, 'actual_z' got: >%s<, want: >%s<\n",
+ tp->input, valid_to_str(actual_z),
+ valid_to_str(tp->expected));
+ } else {
+ vstream_fprintf(VSTREAM_ERR, "input: >%s<, got and want: >%s<\n",
+ tp->input, valid_to_str(actual_l));
+ ok = 1;
+ }
+ if (ok) {
+ vstream_fprintf(VSTREAM_ERR, "PASS %s\n", tp->name);
+ pass++;
+ } else {
+ vstream_fprintf(VSTREAM_ERR, "FAIL %s\n", tp->name);
+ fail++;
+ }
}
- vstream_fflush(VSTREAM_OUT);
- vstring_free(buf);
- exit(0);
+ msg_info("PASS=%d FAIL=%d", pass, fail);
+ return (fail > 0);
}
#endif
diff --git a/src/util/vstream.c b/src/util/vstream.c
index b4f9fbb..affbcc0 100644
--- a/src/util/vstream.c
+++ b/src/util/vstream.c
@@ -522,6 +522,7 @@
/* System library. */
#include <sys_defs.h>
+#include <sys/stat.h>
#include <stdlib.h> /* 44BSD stdarg.h uses abort() */
#include <stdarg.h>
#include <stddef.h>
@@ -1386,7 +1387,38 @@ VSTREAM *vstream_fopen(const char *path, int flags, mode_t mode)
VSTREAM *stream;
int fd;
- if ((fd = open(path, flags, mode)) < 0) {
+ /*
+ * To set permissions on new files only, we need to distinguish between
+ * creating a new file and opening an existing one.
+ */
+#define open_create(path, flags, mode) \
+ open((path), (flags) | (O_CREAT | O_EXCL), (mode))
+#define open_exist(path, flags, mode) \
+ open((path), (flags) & ~(O_CREAT | O_EXCL), (mode))
+
+ switch (flags & (O_CREAT | O_EXCL)) {
+ case O_CREAT:
+ fd = open_exist(path, flags, mode);
+ if (fd < 0 && errno == ENOENT) {
+ fd = open_create(path, flags, mode);
+ if (fd >= 0) {
+ if (fchmod(fd, mode) < 0) /* can't uncreate */
+ msg_warn("fchmod %s 0%o: %m", path, (unsigned) mode);
+ } else if ( /* fd < 0 && */ errno == EEXIST)
+ fd = open_exist(path, flags, mode);
+ }
+ break;
+ case O_CREAT | O_EXCL:
+ fd = open(path, flags, mode);
+ if (fd >= 0)
+ if (fchmod(fd, mode) < 0) /* can't uncreate */
+ msg_warn("fchmod %s 0%o: %m", path, (unsigned) mode);
+ break;
+ default:
+ fd = open(path, flags, mode);
+ break;
+ }
+ if (fd < 0) {
return (0);
} else {
stream = vstream_fdopen(fd, flags);