From 3e160e27e4686620d16477a9ea9cf00141e52ce7 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 13 Apr 2024 10:41:51 +0200 Subject: Adding upstream version 3.9.0. Signed-off-by: Daniel Baumann --- src/util/Makefile.in | 47 +++++++- src/util/argv.c | 57 +++++++++- src/util/argv.h | 2 + src/util/casefold.c | 2 +- src/util/dict_inline.c | 2 +- src/util/dict_thash.c | 6 +- src/util/dict_utf8.c | 4 +- src/util/inet_prefix_top.c | 1 + src/util/logwriter.c | 34 +++++- src/util/logwriter.h | 1 + src/util/midna_domain.c | 4 +- src/util/parse_utf8_char.h | 122 +++++++++++++++++++++ src/util/printable.c | 162 +++++++++++++++++++++++++--- src/util/quote_for_json.c | 218 ++++++++++++++++++++++++++++++++++++++ src/util/readlline.c | 231 +++++++++++++++++++++++++++++++++++++++- src/util/stringops.h | 3 + src/util/sys_defs.h | 7 ++ src/util/valid_hostname.c | 13 ++- src/util/valid_hostname.in | 6 ++ src/util/valid_hostname.ref | 10 ++ src/util/valid_utf8_string.c | 247 ++++++++++++++++++++++++++++++------------- src/util/vstream.c | 34 +++++- 22 files changed, 1100 insertions(+), 113 deletions(-) create mode 100644 src/util/parse_utf8_char.h create mode 100644 src/util/quote_for_json.c (limited to 'src/util') diff --git a/src/util/Makefile.in b/src/util/Makefile.in index f69dec5..01211fb 100644 --- a/src/util/Makefile.in +++ b/src/util/Makefile.in @@ -45,7 +45,7 @@ SRCS = alldig.c allprint.c argv.c argv_split.c attr_clnt.c attr_print0.c \ byte_mask.c known_tcp_ports.c argv_split_at.c dict_stream.c \ sane_strtol.c hash_fnv.c ldseed.c mkmap_cdb.c mkmap_db.c mkmap_dbm.c \ mkmap_fail.c mkmap_lmdb.c mkmap_open.c mkmap_sdbm.c inet_prefix_top.c \ - inet_addr_sizes.c + inet_addr_sizes.c quote_for_json.c OBJS = alldig.o allprint.o argv.o argv_split.o attr_clnt.o attr_print0.o \ attr_print64.o attr_print_plain.o attr_scan0.o attr_scan64.o \ attr_scan_plain.o auto_clnt.o base64_code.o basename.o binhash.o \ @@ -91,7 +91,8 @@ OBJS = alldig.o allprint.o argv.o argv_split.o attr_clnt.o attr_print0.o \ msg_logger.o logwriter.o unix_dgram_connect.o unix_dgram_listen.o \ byte_mask.o known_tcp_ports.o argv_split_at.o dict_stream.o \ sane_strtol.o hash_fnv.o ldseed.o mkmap_db.o mkmap_dbm.o \ - mkmap_fail.o mkmap_open.o inet_prefix_top.o inet_addr_sizes.o + mkmap_fail.o mkmap_open.o inet_prefix_top.o inet_addr_sizes.o \ + quote_for_json.o # MAP_OBJ is for maps that may be dynamically loaded with dynamicmaps.cf. # When hard-linking these, makedefs sets NON_PLUGIN_MAP_OBJ=$(MAP_OBJ), # otherwise it sets the PLUGIN_* macros. @@ -145,7 +146,7 @@ TESTPROG= dict_open dup2_pass_on_exec events exec_command fifo_open \ vstream timecmp dict_cache midna_domain casefold strcasecmp_utf8 \ vbuf_print split_qnameval vstream msg_logger byte_mask \ known_tcp_ports dict_stream find_inet binhash hash_fnv argv \ - clean_env inet_prefix_top + clean_env inet_prefix_top printable readlline quote_for_json PLUGIN_MAP_SO = $(LIB_PREFIX)pcre$(LIB_SUFFIX) $(LIB_PREFIX)lmdb$(LIB_SUFFIX) \ $(LIB_PREFIX)cdb$(LIB_SUFFIX) $(LIB_PREFIX)sdbm$(LIB_SUFFIX) HTABLE_FIX = NORANDOMIZE=1 @@ -365,6 +366,16 @@ unescape: $(LIB) $(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS) mv junk $@.o +printable: $(LIB) + mv $@.o junk + $(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS) + mv junk $@.o + +readlline: $(LIB) + mv $@.o junk + $(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS) + mv junk $@.o + hex_quote: $(LIB) mv $@.o junk $(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS) @@ -609,6 +620,11 @@ inet_prefix_top: $(LIB) $(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS) mv junk $@.o +quote_for_json: $(LIB) + mv $@.o junk + $(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(SYSLIBS) + mv junk $@.o + tests: all valid_hostname_test mac_expand_test dict_test unescape_test \ hex_quote_test ctable_test inet_addr_list_test base64_code_test \ attr_scan64_test attr_scan0_test host_port_test dict_tests \ @@ -618,7 +634,8 @@ tests: all valid_hostname_test mac_expand_test dict_test unescape_test \ strcasecmp_utf8_test vbuf_print_test miss_endif_cidr_test \ miss_endif_regexp_test split_qnameval_test vstring_test \ vstream_test byte_mask_tests mystrtok_test known_tcp_ports_test \ - binhash_test argv_test inet_prefix_top_test + binhash_test argv_test inet_prefix_top_test printable_test \ + valid_utf8_string_test readlline_test quote_for_json_test dict_tests: all dict_test \ dict_pcre_tests dict_cidr_test dict_thash_test dict_static_test \ @@ -650,6 +667,15 @@ unescape_test: unescape unescape.in unescape.ref # diff unescape.in unescape.tmp rm -f unescape.tmp +printable_test: printable + $(SHLIB_ENV) ${VALGRIND} ./printable + +readlline_test: readlline + $(SHLIB_ENV) ${VALGRIND} ./readlline + +valid_utf8_string_test: valid_utf8_string + $(SHLIB_ENV) ${VALGRIND} ./valid_utf8_string + hex_quote_test: hex_quote $(SHLIB_ENV) ${VALGRIND} ./hex_quote hex_quote.tmp od -cb hex_quote.ref @@ -1083,6 +1109,9 @@ argv_test: argv inet_prefix_top_test: inet_prefix_top $(SHLIB_ENV) ${VALGRIND} ./inet_prefix_top +quote_for_json_test: quote_for_json + $(SHLIB_ENV) ${VALGRIND} ./quote_for_json + depend: $(MAKES) (sed '1,/^# do not edit/!d' Makefile.in; \ set -e; for i in [a-z][a-z0-9]*.c; do \ @@ -1119,9 +1148,12 @@ allspace.o: vbuf.h allspace.o: vstring.h argv.o: argv.c argv.o: argv.h +argv.o: check_arg.h argv.o: msg.h argv.o: mymalloc.h argv.o: sys_defs.h +argv.o: vbuf.h +argv.o: vstring.h argv_attr_print.o: argv.h argv_attr_print.o: argv_attr.h argv_attr_print.o: argv_attr_print.c @@ -2157,6 +2189,7 @@ logwriter.o: logwriter.c logwriter.o: logwriter.h logwriter.o: msg.h logwriter.o: mymalloc.h +logwriter.o: name_code.h logwriter.o: safe_open.h logwriter.o: sys_defs.h logwriter.o: vbuf.h @@ -2525,11 +2558,16 @@ posix_signals.o: posix_signals.c posix_signals.o: posix_signals.h posix_signals.o: sys_defs.h printable.o: check_arg.h +printable.o: parse_utf8_char.h printable.o: printable.c printable.o: stringops.h printable.o: sys_defs.h printable.o: vbuf.h printable.o: vstring.h +quote_for_json.o: quote_for_json.c +quote_for_json.o: stringops.h +quote_for_json.o: sys_defs.h +quote_for_json.o: vstring.h rand_sleep.o: iostuff.h rand_sleep.o: msg.h rand_sleep.o: myrand.h @@ -2848,6 +2886,7 @@ valid_utf8_hostname.o: valid_utf8_hostname.h valid_utf8_hostname.o: vbuf.h valid_utf8_hostname.o: vstring.h valid_utf8_string.o: check_arg.h +valid_utf8_string.o: parse_utf8_char.h valid_utf8_string.o: stringops.h valid_utf8_string.o: sys_defs.h valid_utf8_string.o: valid_utf8_string.c diff --git a/src/util/argv.c b/src/util/argv.c index 4e05fd0..332426e 100644 --- a/src/util/argv.c +++ b/src/util/argv.c @@ -53,6 +53,11 @@ /* ssize_t pos; /* ssize_t how_many; /* +/* char *argv_join(buf, argvp, delim) +/* VSTRING *buf; +/* ARGV *argvp; +/* int delim; +/* /* void ARGV_FAKE_BEGIN(argv, arg) /* const char *arg; /* @@ -109,6 +114,10 @@ /* starting at the specified array position. The result is /* null-terminated. /* +/* argv_join() joins all elements in an array using the +/* specified delimiter value, and appends the result to the +/* specified buffer. +/* /* ARGV_FAKE_BEGIN/END are an optimization for the case where /* a single string needs to be passed into an ARGV-based /* interface. ARGV_FAKE_BEGIN() opens a statement block and @@ -148,6 +157,7 @@ #include "mymalloc.h" #include "msg.h" +#include "vstring.h" #include "argv.h" #ifdef TEST @@ -379,6 +389,20 @@ void argv_delete(ARGV *argvp, ssize_t first, ssize_t how_many) argvp->argc -= how_many; } +/* argv_join - concatenate array elements with delimiter */ + +char *argv_join(VSTRING *buf, ARGV *argv, int delim) +{ + char **cpp; + + for (cpp = argv->argv; *cpp; cpp++) { + vstring_strcat(buf, *cpp); + if (cpp[1]) + VSTRING_ADDCH(buf, delim); + } + return (vstring_str(buf)); +} + #ifdef TEST /* @@ -402,6 +426,7 @@ typedef struct TEST_CASE { const char *exp_panic_msg; /* expected panic */ int exp_argc; /* expected array length */ const char *exp_argv[ARRAY_LEN]; /* expected array content */ + int join_delim; /* argv_join() delimiter */ } TEST_CASE; #define TERMINATE_ARRAY (1) @@ -559,6 +584,24 @@ static ARGV *test_argv_bad_delete3(const TEST_CASE *tp, ARGV *argvp) return (argvp); } +/* test_argv_join - populate, join, and overwrite */ + +static ARGV *test_argv_join(const TEST_CASE *tp, ARGV *argvp) +{ + VSTRING *buf = vstring_alloc(100); + + /* + * Impedance mismatch: argv_join() produces output to VSTRING, but the + * test fixture wants output to ARGV. + */ + test_argv_populate(tp, argvp); + argv_join(buf, argvp, tp->join_delim); + argv_delete(argvp, 0, argvp->argc); + argv_add(argvp, vstring_str(buf), ARGV_END); + vstring_free(buf); + return (argvp); +} + /* test_argv_verify - verify result */ static int test_argv_verify(const TEST_CASE *tp, ARGV *argvp) @@ -573,7 +616,7 @@ static int test_argv_verify(const TEST_CASE *tp, ARGV *argvp) } if (strcmp(vstring_str(test_panic_str), tp->exp_panic_msg) != 0) { msg_warn("test case '%s': got '%s', want: '%s'", - tp->label, vstring_str(test_panic_str), tp->exp_panic_msg); + tp->label, vstring_str(test_panic_str), tp->exp_panic_msg); return (FAIL); } return (PASS); @@ -682,6 +725,18 @@ static const TEST_CASE test_cases[] = { {"foo", "baz", "bar", 0}, 0, test_argv_bad_delete3, "argv_delete bad range: (start=100 count=1)" }, + {"argv_join, multiple strings", + {"foo", "baz", "bar", 0}, 0, test_argv_join, + 0, 1, {"foo:baz:bar", 0}, ':' + }, + {"argv_join, one string", + {"foo", 0}, 0, test_argv_join, + 0, 1, {"foo", 0}, ':' + }, + {"argv_join, empty", + {0}, 0, test_argv_join, + 0, 1, {"", 0}, ':' + }, 0, }; diff --git a/src/util/argv.h b/src/util/argv.h index b0098ce..f1e746a 100644 --- a/src/util/argv.h +++ b/src/util/argv.h @@ -33,6 +33,8 @@ extern void argv_truncate(ARGV *, ssize_t); extern void argv_insert_one(ARGV *, ssize_t, const char *); extern void argv_replace_one(ARGV *, ssize_t, const char *); extern void argv_delete(ARGV *, ssize_t, ssize_t); +struct VSTRING; +extern char *argv_join(struct VSTRING *buf, ARGV *, int); extern ARGV *argv_free(ARGV *); extern ARGV *argv_split(const char *, const char *); diff --git a/src/util/casefold.c b/src/util/casefold.c index d3ebd4b..94860b8 100644 --- a/src/util/casefold.c +++ b/src/util/casefold.c @@ -300,7 +300,7 @@ int main(int argc, char **argv) encode_utf8(buffer, codepoint); if (msg_verbose) vstream_printf("U+%X -> %s\n", codepoint, STR(buffer)); - if (valid_utf8_string(STR(buffer), LEN(buffer)) == 0) + if (valid_utf8_stringz(STR(buffer)) == 0) msg_fatal("bad utf-8 encoding for U+%X\n", codepoint); casefold(dest, STR(buffer)); } diff --git a/src/util/dict_inline.c b/src/util/dict_inline.c index 72339b2..d7f9344 100644 --- a/src/util/dict_inline.c +++ b/src/util/dict_inline.c @@ -87,7 +87,7 @@ DICT *dict_inline_open(const char *name, int open_flags, int dict_flags) */ if (DICT_NEED_UTF8_ACTIVATION(util_utf8_enable, dict_flags) && allascii(name) == 0 - && valid_utf8_string(name, strlen(name)) == 0) + && valid_utf8_stringz(name) == 0) DICT_INLINE_RETURN(dict_surrogate(DICT_TYPE_INLINE, name, open_flags, dict_flags, "bad UTF-8 syntax: \"%s:%s\"; " diff --git a/src/util/dict_thash.c b/src/util/dict_thash.c index 69eb17b..bae4a63 100644 --- a/src/util/dict_thash.c +++ b/src/util/dict_thash.c @@ -127,7 +127,7 @@ DICT *dict_thash_open(const char *path, int open_flags, int dict_flags) */ if ((dict->flags & DICT_FLAG_UTF8_ACTIVE) && allascii(STR(line_buffer)) == 0 - && valid_utf8_string(STR(line_buffer), LEN(line_buffer)) == 0) { + && valid_utf8_stringz(STR(line_buffer)) == 0) { msg_warn("%s, line %d: non-UTF-8 input \"%s\"" " -- ignoring this line", VSTREAM_PATH(fp), lineno, STR(line_buffer)); @@ -181,8 +181,8 @@ DICT *dict_thash_open(const char *path, int open_flags, int dict_flags) " is this an alias file?", path, lineno); /* - * Optionally treat the value as a filename, and replace the value - * with the BASE64-encoded content of the named file. + * Optionally treat the value as a filename, and replace the + * value with the BASE64-encoded content of the named file. */ if (dict_flags & DICT_FLAG_SRC_RHS_IS_FILE) { VSTRING *base64_buf; diff --git a/src/util/dict_utf8.c b/src/util/dict_utf8.c index f1fc65a..9bb6b7b 100644 --- a/src/util/dict_utf8.c +++ b/src/util/dict_utf8.c @@ -100,7 +100,7 @@ static char *dict_utf8_check_fold(DICT *dict, const char *string, /* * Validate UTF-8 without casefolding. */ - if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) { + if (!allascii(string) && valid_utf8_stringz(string) == 0) { if (err) *err = "malformed UTF-8 or invalid codepoint"; return (0); @@ -123,7 +123,7 @@ static char *dict_utf8_check_fold(DICT *dict, const char *string, static int dict_utf8_check(const char *string, CONST_CHAR_STAR *err) { - if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) { + if (!allascii(string) && valid_utf8_stringz(string) == 0) { if (err) *err = "malformed UTF-8 or invalid codepoint"; return (0); diff --git a/src/util/inet_prefix_top.c b/src/util/inet_prefix_top.c index 8d5af00..f35d5f0 100644 --- a/src/util/inet_prefix_top.c +++ b/src/util/inet_prefix_top.c @@ -164,6 +164,7 @@ int main(int argc, char **argv) msg_info("PASS %s/%d", str_name_code(af_map, tp->in_af), tp->in_prefix_len); } + myfree(act_prefix); } msg_info("PASS=%d FAIL=%d", pass, fail); return (fail > 0); diff --git a/src/util/logwriter.c b/src/util/logwriter.c index aea2767..4a18be3 100644 --- a/src/util/logwriter.c +++ b/src/util/logwriter.c @@ -21,6 +21,9 @@ /* const char *path, /* const char *buffer, /* ssize_t buflen) +/* +/* int set_logwriter_create_perms( +/* const char *mode) /* DESCRIPTION /* This module manages a logfile writer. /* @@ -38,6 +41,15 @@ /* logwriter_one_shot() combines all the above operations. The /* result is zero if successful, VSTREAM_EOF if any operation /* failed. +/* +/* set_logwriter_create_perms() sets the file permissions that +/* will be used when creating a logfile. Valid inputs are +/* "644", "640", and "600". Leading zeros are allowed and +/* ignored. +/* DIAGNOSTICS +/* Fatal error: logfile create error; warning: logfile permission +/* change error. set_logwriter_create_perms() returns the file +/* create permission if the request is valid, -1 otherwise. /* LICENSE /* .ad /* .fi @@ -66,10 +78,12 @@ #include #include #include +#include /* * Application-specific. */ +static int logwriter_perms = 0600; /* logwriter_open_or_die - open logfile */ @@ -82,7 +96,7 @@ VSTREAM *logwriter_open_or_die(const char *path) #define NO_CHOWN (-1) #define NO_CHGRP (-1) - fp = safe_open(path, O_CREAT | O_WRONLY | O_APPEND, 0644, + fp = safe_open(path, O_CREAT | O_WRONLY | O_APPEND, logwriter_perms, NO_STATP, NO_CHOWN, NO_CHGRP, why); if (fp == 0) msg_fatal("open logfile '%s': %s", path, vstring_str(why)); @@ -122,3 +136,21 @@ int logwriter_one_shot(const char *path, const char *buf, ssize_t len) err |= logwriter_close(fp); return (err ? VSTREAM_EOF : 0); } + +/* set_logwriter_create_perms - logfile permission control */ + +int set_logwriter_create_perms(const char *mode_str) +{ + static const NAME_CODE sane_perms[] = { + "644", 0644, + "640", 0640, + "600", 0600, + 0, -1, + }; + int perms; + + if ((perms = name_code(sane_perms, NAME_CODE_FLAG_NONE, + mode_str + strspn(mode_str, "0"))) != -1) + logwriter_perms = perms; + return (perms); +} diff --git a/src/util/logwriter.h b/src/util/logwriter.h index f5266e4..c827d25 100644 --- a/src/util/logwriter.h +++ b/src/util/logwriter.h @@ -23,6 +23,7 @@ extern VSTREAM *logwriter_open_or_die(const char *); extern int logwriter_write(VSTREAM *, const char *, ssize_t); extern int logwriter_close(VSTREAM *); extern int logwriter_one_shot(const char *, const char *, ssize_t); +extern int set_logwriter_create_perms(const char *); /* LICENSE /* .ad diff --git a/src/util/midna_domain.c b/src/util/midna_domain.c index 333a5c9..bc016b6 100644 --- a/src/util/midna_domain.c +++ b/src/util/midna_domain.c @@ -178,7 +178,7 @@ static void *midna_domain_to_ascii_create(const char *name, void *unused_context /* * Paranoia: do not expose uidna_*() to unfiltered network data. */ - if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) { + if (allascii(name) == 0 && valid_utf8_stringz(name) == 0) { msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s", myname, name, "malformed UTF-8"); return (0); @@ -232,7 +232,7 @@ static void *midna_domain_to_utf8_create(const char *name, void *unused_context) /* * Paranoia: do not expose uidna_*() to unfiltered network data. */ - if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) { + if (allascii(name) == 0 && valid_utf8_stringz(name) == 0) { msg_warn("%s: Problem translating domain \"%.100s\" to UTF-8 form: %s", myname, name, "malformed UTF-8"); return (0); diff --git a/src/util/parse_utf8_char.h b/src/util/parse_utf8_char.h new file mode 100644 index 0000000..b00a1c2 --- /dev/null +++ b/src/util/parse_utf8_char.h @@ -0,0 +1,122 @@ +/*++ +/* NAME +/* parse_utf8_char 3h +/* SUMMARY +/* parse one UTF-8 multibyte character +/* SYNOPSIS +/* #include +/* +/* char *parse_utf8_char(str, end) +/* const char *str; +/* const char *end; +/* DESCRIPTION +/* parse_utf8_char() determines if the byte sequence starting +/* at \fBstr\fR begins with a complete UTF-8 character as +/* defined in RFC 3629. That is, a proper encoding of code +/* points U+0000..U+10FFFF, excluding over-long encodings and +/* excluding U+D800..U+DFFF surrogates. +/* +/* When the byte sequence starting at \fBstr\fR begins with a +/* complete UTF-8 character, this function returns a pointer +/* to the last byte in that character. Otherwise, it returns +/* a null pointer. +/* +/* The \fBend\fR argument is either null (the byte sequence +/* starting at \fBstr\fR must be null terminated), or \fBend +/* - str\fR specifies the length of the byte sequence. +/* BUGS +/* Code points in the range U+FDD0..U+FDEF and ending in FFFE +/* or FFFF are non-characters in UNICODE. This function does +/* not reject these. +/* LICENSE +/* .ad +/* .fi +/* The Secure Mailer license must be distributed with this software. +/* AUTHOR(S) +/* Wietse Venema +/* IBM T.J. Watson Research +/* P.O. Box 704 +/* Yorktown Heights, NY 10598, USA +/* +/* Wietse Venema +/* porcupine.org +/* Amawalk, NY 10501, USA +/*--*/ + + /* + * System library. + */ +#include + +#ifdef NO_INLINE +#define inline /* */ +#endif + +/* parse_utf8_char - parse and validate one UTF8 multibyte sequence */ + +static inline char *parse_utf8_char(const char *str, const char *end) +{ + const unsigned char *cp = (const unsigned char *) str; + const unsigned char *ep = (const unsigned char *) end; + unsigned char c0, ch; + + /* + * Optimized for correct input, time, space, and for CPUs that have a + * decent number of registers. Other implementation considerations: + * + * - In the UTF-8 encoding, a non-leading byte is never null. Therefore, + * this function will correctly reject a partial UTF-8 character at the + * end of a null-terminated string. + * + * - If the "end" argument is a null constant, and if this function is + * inlined, then an optimizing compiler should propagate the constant + * through the "ep" variable, and eliminate any code branches that + * require ep != 0. + */ + /* Single-byte encodings. */ + if (EXPECTED((c0 = *cp) <= 0x7f) /* we know that c0 >= 0x0 */ ) { + return ((char *) cp); + } + /* Two-byte encodings. */ + else if (EXPECTED(c0 <= 0xdf) /* we know that c0 >= 0x80 */ ) { + /* Exclude over-long encodings. */ + if (UNEXPECTED(c0 < 0xc2) + || UNEXPECTED(ep && cp + 1 >= ep) + /* Require UTF-8 tail byte. */ + || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80)) + return (0); + return ((char *) cp); + } + /* Three-byte encodings. */ + else if (EXPECTED(c0 <= 0xef) /* we know that c0 >= 0xe0 */ ) { + if (UNEXPECTED(ep && cp + 2 >= ep) + /* Exclude over-long encodings. */ + || UNEXPECTED((ch = *++cp) < (c0 == 0xe0 ? 0xa0 : 0x80)) + /* Exclude U+D800..U+DFFF. */ + || UNEXPECTED(ch > (c0 == 0xed ? 0x9f : 0xbf)) + /* Require UTF-8 tail byte. */ + || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80)) + return (0); + return ((char *) cp); + } + /* Four-byte encodings. */ + else if (EXPECTED(c0 <= 0xf4) /* we know that c0 >= 0xf0 */ ) { + if (UNEXPECTED(ep && cp + 3 >= ep) + /* Exclude over-long encodings. */ + || UNEXPECTED((ch = *++cp) < (c0 == 0xf0 ? 0x90 : 0x80)) + /* Exclude code points above U+10FFFF. */ + || UNEXPECTED(ch > (c0 == 0xf4 ? 0x8f : 0xbf)) + /* Require UTF-8 tail byte. */ + || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80) + /* Require UTF-8 tail byte. */ + || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80)) + return (0); + return ((char *) cp); + } + /* Invalid: c0 >= 0xf5 */ + else { + return (0); + } +} + +#undef inline diff --git a/src/util/printable.c b/src/util/printable.c index 6c148fd..0e1ae19 100644 --- a/src/util/printable.c +++ b/src/util/printable.c @@ -45,6 +45,10 @@ /* Google, Inc. /* 111 8th Avenue /* New York, NY 10011, USA +/* +/* Wietse Venema +/* porcupine.org +/* Amawalk, NY 10501, USA /*--*/ /* System library. */ @@ -56,8 +60,9 @@ /* Utility library. */ #include "stringops.h" +#include "parse_utf8_char.h" -int util_utf8_enable = 0; +int util_utf8_enable = 0; /* printable - binary compatibility */ @@ -74,27 +79,150 @@ char *printable(char *string, int replacement) char *printable_except(char *string, int replacement, const char *except) { - unsigned char *cp; + char *cp; + char *last; int ch; /* - * XXX Replace invalid UTF8 sequences (too short, over-long encodings, - * out-of-range code points, etc). See valid_utf8_string.c. + * In case of a non-UTF8 sequence (bad leader byte, bad non-leader byte, + * over-long encodings, out-of-range code points, etc), replace the first + * byte, and try to resynchronize at the next byte. */ - cp = (unsigned char *) string; - while ((ch = *cp) != 0) { - if (ISASCII(ch) && (ISPRINT(ch) || (except && strchr(except, ch)))) { - /* ok */ - } else if (util_utf8_enable && ch >= 194 && ch <= 254 - && cp[1] >= 128 && cp[1] < 192) { - /* UTF8; skip the rest of the bytes in the character. */ - while (cp[1] >= 128 && cp[1] < 192) - cp++; - } else { - /* Not ASCII and not UTF8. */ - *cp = replacement; +#define PRINT_OR_EXCEPT(ch) (ISPRINT(ch) || (except && strchr(except, ch))) + + for (cp = string; (ch = *(unsigned char *) cp) != 0; cp++) { + if (util_utf8_enable == 0) { + if (ISASCII(ch) && PRINT_OR_EXCEPT(ch)) + continue; + } else if ((last = parse_utf8_char(cp, 0)) == cp) { /* ASCII */ + if (PRINT_OR_EXCEPT(ch)) + continue; + } else if (last != 0) { /* Other UTF8 */ + cp = last; + continue; } - cp++; + *cp = replacement; } return (string); } + +#ifdef TEST + +#include +#include +#include +#include +#include +#include + + /* + * Test cases for 1-, 2-, and 3-byte encodings. Originally contributed by + * Viktor Dukhovni, and annotated using translate.google.com. + * + * See valid_utf8_string.c for single-error tests. + * + * XXX Need a test for 4-byte encodings, preferably with strings that can be + * displayed. + */ +struct testcase { + const char *name; + const char *input; + const char *expected;; +}; +static const struct testcase testcases[] = { + {"Printable ASCII", + "printable", "printable" + }, + {"ASCII with control character", + "non\bn-printable", "non?n-printable" + }, + {"Latin accented text, no error", + "na\303\257ve", "na\303\257ve" + }, + {"Latin text, with error", + "na\303ve", "na?ve" + }, + {"Viktor, Cyrillic, no error", + "\320\262\320\270\320\272\321\202\320\276\321\200", + "\320\262\320\270\320\272\321\202\320\276\321\200" + }, + {"Viktor, Cyrillic, two errors", + "\320\262\320\320\272\272\321\202\320\276\321\200", + "\320\262?\320\272?\321\202\320\276\321\200" + }, + {"Viktor, Hebrew, no error", + "\327\225\327\231\327\247\327\230\327\225\326\274\327\250", + "\327\225\327\231\327\247\327\230\327\225\326\274\327\250" + }, + {"Viktor, Hebrew, with error", + "\327\225\231\327\247\327\230\327\225\326\274\327\250", + "\327\225?\327\247\327\230\327\225\326\274\327\250" + }, + {"Chinese (Simplified), no error", + "\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347" + "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273" + "\237\350\256\241\346\212\245\345\221\212", + "\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347" + "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273" + "\237\350\256\241\346\212\245\345\221\212" + }, + {"Chinese (Simplified), with errors", + "\344\270\255\345\344\272\222\350\224\347\275\221\347" + "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273" + "\237\350\256\241\346\212\245\345", + "\344\270\255?\344\272\222??\347\275\221\347" + "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273" + "\237\350\256\241\346\212\245?" + }, +}; + +int main(int argc, char **argv) +{ + const struct testcase *tp; + int pass; + int fail; + +#define NUM_TESTS sizeof(testcases)/sizeof(testcases[0]) + + msg_vstream_init(basename(argv[0]), VSTREAM_ERR); + util_utf8_enable = 1; + + for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) { + char *input; + char *actual; + int ok = 0; + + /* + * Notes: + * + * - The input is modified, therefore it must be copied. + * + * - The msg(3) functions use printable() which interferes when logging + * inputs and outputs. Use vstream_fprintf() instead. + */ + vstream_fprintf(VSTREAM_ERR, "RUN %s\n", tp->name); + input = mystrdup(tp->input); + actual = printable(input, '?'); + + if (strcmp(actual, tp->expected) != 0) { + vstream_fprintf(VSTREAM_ERR, "input: >%s<, got: >%s<, want: >%s<\n", + tp->input, actual, tp->expected); + } else { + vstream_fprintf(VSTREAM_ERR, "input: >%s<, got and want: >%s<\n", + tp->input, actual); + ok = 1; + } + if (ok) { + vstream_fprintf(VSTREAM_ERR, "PASS %s\n", tp->name); + pass++; + } else { + vstream_fprintf(VSTREAM_ERR, "FAIL %s\n", tp->name); + fail++; + } + myfree(input); + } + msg_info("PASS=%d FAIL=%d", pass, fail); + return (fail > 0); +} + +#endif diff --git a/src/util/quote_for_json.c b/src/util/quote_for_json.c new file mode 100644 index 0000000..f54af3f --- /dev/null +++ b/src/util/quote_for_json.c @@ -0,0 +1,218 @@ +/*++ +/* NAME +/* quote_for_json 3 +/* SUMMARY +/* quote UTF-8 string value for JSON +/* SYNOPSIS +/* #include +/* +/* char *quote_for_json( +/* VSTRING *result, +/* const char *in, +/* ssize_t len) +/* +/* char *quote_for_json_append( +/* VSTRING *result, +/* const char *in, +/* ssize_t len) +/* DESCRIPTION +/* quote_for_json() takes well-formed UTF-8 encoded text, +/* quotes that text compliant with RFC 4627, and returns a +/* pointer to the resulting text. The input may contain null +/* bytes, but the output will not. +/* +/* quote_for_json() produces short (two-letter) escape sequences +/* for common control characters, double quote and backslash. +/* It will not quote "/" (0x2F), and will quote DEL (0x7f) as +/* \u007F to make it printable. The input byte sequence "\uXXXX" +/* is quoted like any other text (the "\" is escaped as "\\"). +/* +/* quote_for_json() does not perform UTF-8 validation. The caller +/* should use valid_utf8_string() or printable() as appropriate. +/* +/* quote_for_json_append() appends the output to the result buffer. +/* +/* Arguments: +/* .IP result +/* Storage for the result, resized automatically. +/* .IP in +/* Pointer to the input byte sequence. +/* .IP len +/* The length of the input byte sequence, or a negative number +/* when the byte sequence is null-terminated. +/* DIAGNOSTICS +/* Fatal error: memory allocation error. +/* LICENSE +/* .ad +/* .fi +/* The Secure Mailer license must be distributed with this software. +/* AUTHOR(S) +/* Wietse Venema +/* Google, Inc. +/* 111 8th Avenue +/* New York, NY 10011, USA +/* +/* Wietse Venema +/* porcupine.org +/*--*/ + + /* + * System library. + */ +#include +#include +#include + + /* + * Utility library. + */ +#include +#include + +#define STR(x) vstring_str(x) + +/* quote_for_json_append - quote JSON string, append result */ + +char *quote_for_json_append(VSTRING *result, const char *text, ssize_t len) +{ + const char *cp; + int ch; + + if (len < 0) + len = strlen(text); + + for (cp = text; len > 0; len--, cp++) { + ch = *(const unsigned char *) cp; + if (UNEXPECTED(ISCNTRL(ch))) { + switch (ch) { + case '\b': + VSTRING_ADDCH(result, '\\'); + VSTRING_ADDCH(result, 'b'); + break; + case '\f': + VSTRING_ADDCH(result, '\\'); + VSTRING_ADDCH(result, 'f'); + break; + case '\n': + VSTRING_ADDCH(result, '\\'); + VSTRING_ADDCH(result, 'n'); + break; + case '\r': + VSTRING_ADDCH(result, '\\'); + VSTRING_ADDCH(result, 'r'); + break; + case '\t': + VSTRING_ADDCH(result, '\\'); + VSTRING_ADDCH(result, 't'); + break; + default: + /* All other controls including DEL and NUL. */ + vstring_sprintf_append(result, "\\u%04X", ch); + break; + } + } else { + switch (ch) { + case '\\': + case '"': + VSTRING_ADDCH(result, '\\'); + /* FALLTHROUGH */ + default: + /* Includes malformed UTF-8. */ + VSTRING_ADDCH(result, ch); + break; + } + } + } + VSTRING_TERMINATE(result); + return (STR(result)); +} + +/* quote_for_json - quote JSON string */ + +char *quote_for_json(VSTRING *result, const char *text, ssize_t len) +{ + VSTRING_RESET(result); + return (quote_for_json_append(result, text, len)); +} + +#ifdef TEST + + /* + * System library. + */ +#include + + /* + * Utility library. + */ +#include +#include + +typedef struct TEST_CASE { + const char *label; /* identifies test case */ + char *(*fn) (VSTRING *, const char *, ssize_t); + const char *input; /* input string */ + ssize_t input_len; /* -1 or input length */ + const char *exp_res; /* expected result */ +} TEST_CASE; + +#define PASS (0) +#define FAIL (1) + + /* + * The test cases. + */ +static const TEST_CASE test_cases[] = { + {"ordinary ASCII text", quote_for_json, + " abcABC012.,[]{}/", -1, " abcABC012.,[]{}/", + }, + {"quote_for_json_append", quote_for_json_append, + "foo", -1, " abcABC012.,[]{}/foo", + }, + {"common control characters", quote_for_json, + "\b\f\r\n\t", -1, "\\b\\f\\r\\n\\t", + }, + {"uncommon control characters and DEL", quote_for_json, + "\0\01\037\040\176\177", 6, "\\u0000\\u0001\\u001F ~\\u007F", + }, + {"malformed UTF-8", quote_for_json, + "\\*\\uasd\\u007F\x80", -1, "\\\\*\\\\uasd\\\\u007F\x80", + }, + 0, +}; + +int main(int argc, char **argv) +{ + const TEST_CASE *tp; + int pass = 0; + int fail = 0; + VSTRING *res_buf = vstring_alloc(100); + + msg_vstream_init(sane_basename((VSTRING *) 0, argv[0]), VSTREAM_ERR); + + for (tp = test_cases; tp->label != 0; tp++) { + int test_fail = 0; + char *res; + + msg_info("RUN %s", tp->label); + res = tp->fn(res_buf, tp->input, tp->input_len); + if (strcmp(res, tp->exp_res) != 0) { + msg_warn("test case '%s': got '%s', want '%s'", + tp->label, res, tp->exp_res); + test_fail = 1; + } + if (test_fail) { + fail++; + msg_info("FAIL %s", tp->label); + test_fail = 1; + } else { + msg_info("PASS %s", tp->label); + pass++; + } + } + msg_info("PASS=%d FAIL=%d", pass, fail); + vstring_free(res_buf); + exit(fail != 0); +} + +#endif diff --git a/src/util/readlline.c b/src/util/readlline.c index 015877a..721b75f 100644 --- a/src/util/readlline.c +++ b/src/util/readlline.c @@ -85,9 +85,15 @@ VSTRING *readllines(VSTRING *buf, VSTREAM *fp, int *lineno, int *first_line) int next; ssize_t start; char *cp; + int my_lineno = 0, my_first_line, got_null = 0; VSTRING_RESET(buf); + if (lineno == 0) + lineno = &my_lineno; + if (first_line == 0) + first_line = &my_first_line; + /* * Ignore comment lines, all whitespace lines, and empty lines. Terminate * at EOF or at the beginning of the next logical line. @@ -95,16 +101,19 @@ VSTRING *readllines(VSTRING *buf, VSTREAM *fp, int *lineno, int *first_line) for (;;) { /* Read one line, possibly not newline terminated. */ start = LEN(buf); - while ((ch = VSTREAM_GETC(fp)) != VSTREAM_EOF && ch != '\n') + while ((ch = VSTREAM_GETC(fp)) != VSTREAM_EOF && ch != '\n') { VSTRING_ADDCH(buf, ch); - if (lineno != 0 && (ch == '\n' || LEN(buf) > start)) + if (ch == 0) + got_null = 1; + } + if (ch == '\n' || LEN(buf) > start) *lineno += 1; /* Ignore comment line, all whitespace line, or empty line. */ for (cp = STR(buf) + start; cp < END(buf) && ISSPACE(*cp); cp++) /* void */ ; if (cp == END(buf) || *cp == '#') vstring_truncate(buf, start); - else if (start == 0 && lineno != 0 && first_line != 0) + if (start == 0) *first_line = *lineno; /* Terminate at EOF or at the beginning of the next logical line. */ if (ch == VSTREAM_EOF) @@ -118,6 +127,20 @@ VSTRING *readllines(VSTRING *buf, VSTREAM *fp, int *lineno, int *first_line) } VSTRING_TERMINATE(buf); + /* + * This code does not care about embedded null bytes, but callers do. + */ + if (got_null) { + const char *why = "text after null byte may be ignored"; + + if (*first_line == *lineno) + msg_warn("%s, line %d: %s", + VSTREAM_PATH(fp), *lineno, why); + else + msg_warn("%s, line %d-%d: %s", + VSTREAM_PATH(fp), *first_line, *lineno, why); + } + /* * Invalid input: continuing text without preceding text. Allowing this * would complicate "postconf -e", which implements its own multi-line @@ -136,3 +159,205 @@ VSTRING *readllines(VSTRING *buf, VSTREAM *fp, int *lineno, int *first_line) */ return (LEN(buf) > 0 ? buf : 0); } + + /* + * Stand-alone test program. + */ +#ifdef TEST +#include +#include +#include +#include +#include +#include +#include + + /* + * Test cases. Note: the input and exp_output fields are converted with + * unescape(). Embedded null bytes must be specified as \\0. + */ +struct testcase { + const char *name; + const char *input; + const char *exp_output; + int exp_first_line; + int exp_last_line; +}; + +static const struct testcase testcases[] = { + {"leading space before non-comment", + " abcde\nfghij\n", + "fghij", + 2, 2 + /* Expect "logical line must not start with whitespace" */ + }, + {"leading space before leading comment", + " #abcde\nfghij\n", + "fghij", + 2, 2 + }, + {"leading #comment at beginning of line", + "#abc\ndef", + "def", + 2, 2, + }, + {"empty line before non-comment", + "\nabc\n", + "abc", + 2, 2, + }, + {"whitespace line before non-comment", + " \nabc\n", + "abc", + 2, 2, + }, + {"missing newline at end of non-comment", + "abc def", + "abc def", + 1, 1, + }, + {"missing newline at end of comment", + "#abc def", + "", + 1, 1, + }, + {"embedded null, single-line", + "abc\\0def", + "abc\\0def", + 1, 1, + /* Expect "line 1: text after null byte may be ignored" */ + }, + {"embedded null, multiline", + "abc\\0\n def", + "abc\\0 def", + 1, 2, + /* Expect "line 1-2: text after null byte may be ignored" */ + }, + {"embedded null in comment", + "#abc\\0\ndef", + "def", + 2, 2, + /* Expect "line 2: text after null byte may be ignored" */ + }, + {"multiline input", + "abc\n def\n", + "abc def", + 1, 2, + }, + {"multiline input with embedded #comment after space", + "abc\n #def\n ghi", + "abc ghi", + 1, 3, + }, + {"multiline input with embedded #comment flush left", + "abc\n#def\n ghi", + "abc ghi", + 1, 3, + }, + {"multiline input with embedded whitespace line", + "abc\n \n ghi", + "abc ghi", + 1, 3, + }, + {"multiline input with embedded empty line", + "abc\n\n ghi", + "abc ghi", + 1, 3, + }, + {"multiline input with embedded #comment after space", + "abc\n #def\n", + "abc", + 1, 2, + }, + {"multiline input with embedded #comment flush left", + "abc\n#def\n", + "abc", + 1, 2, + }, + {"empty line at end of file", + "\n", + "", + 1, 1, + }, + {"whitespace line at end of file", + "\n \n", + "", + 2, 2, + }, + {"whitespace at end of file", + "abc\n ", + "abc", + 1, 2, + }, +}; + +int main(int argc, char **argv) +{ + const struct testcase *tp; + VSTRING *inp_buf = vstring_alloc(100); + VSTRING *exp_buf = vstring_alloc(100); + VSTRING *out_buf = vstring_alloc(100); + VSTRING *esc_buf = vstring_alloc(100); + VSTREAM *fp; + int last_line; + int first_line; + int pass; + int fail; + +#define NUM_TESTS sizeof(testcases)/sizeof(testcases[0]) + + msg_vstream_init(basename(argv[0]), VSTREAM_ERR); + util_utf8_enable = 1; + + for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) { + int ok = 0; + + vstream_fprintf(VSTREAM_ERR, "RUN %s\n", tp->name); + unescape(inp_buf, tp->input); + unescape(exp_buf, tp->exp_output); + if ((fp = vstream_memopen(inp_buf, O_RDONLY)) == 0) + msg_panic("open memory stream for reading: %m"); + vstream_control(fp, CA_VSTREAM_CTL_PATH("memory buffer"), + CA_VSTREAM_CTL_END); + last_line = 0; + if (readllines(out_buf, fp, &last_line, &first_line) == 0) { + VSTRING_RESET(out_buf); + VSTRING_TERMINATE(out_buf); + } + if (LEN(out_buf) != LEN(exp_buf)) { + msg_warn("unexpected output length, got: %ld, want: %ld", + (long) LEN(out_buf), (long) LEN(exp_buf)); + } else if (memcmp(STR(out_buf), STR(exp_buf), LEN(out_buf)) != 0) { + msg_warn("unexpected output: got: >%s<, want: >%s<", + STR(escape(esc_buf, STR(out_buf), LEN(out_buf))), + tp->exp_output); + } else if (first_line != tp->exp_first_line) { + msg_warn("unexpected first_line: got: %d, want: %d", + first_line, tp->exp_first_line); + } else if (last_line != tp->exp_last_line) { + msg_warn("unexpected last_line: got: %d, want: %d", + last_line, tp->exp_last_line); + } else { + vstream_fprintf(VSTREAM_ERR, "got and want: >%s<\n", + tp->exp_output); + ok = 1; + } + if (ok) { + vstream_fprintf(VSTREAM_ERR, "PASS %s\n", tp->name); + pass++; + } else { + vstream_fprintf(VSTREAM_ERR, "FAIL %s\n", tp->name); + fail++; + } + vstream_fclose(fp); + } + vstring_free(inp_buf); + vstring_free(exp_buf); + vstring_free(out_buf); + vstring_free(esc_buf); + + msg_info("PASS=%d FAIL=%d", pass, fail); + return (fail > 0); +} + +#endif diff --git a/src/util/stringops.h b/src/util/stringops.h index 97aa597..db56f23 100644 --- a/src/util/stringops.h +++ b/src/util/stringops.h @@ -60,10 +60,13 @@ extern int allascii_len(const char *, ssize_t); extern const char *WARN_UNUSED_RESULT split_nameval(char *, char **, char **); extern const char *WARN_UNUSED_RESULT split_qnameval(char *, char **, char **); extern int valid_utf8_string(const char *, ssize_t); +extern int valid_utf8_stringz(const char *); extern size_t balpar(const char *, const char *); extern char *WARN_UNUSED_RESULT extpar(char **, const char *, int); extern int strcasecmp_utf8x(int, const char *, const char *); extern int strncasecmp_utf8x(int, const char *, const char *, ssize_t); +extern char *quote_for_json(VSTRING *, const char *, ssize_t); +extern char *quote_for_json_append(VSTRING *, const char *, ssize_t); #define EXTPAR_FLAG_NONE (0) #define EXTPAR_FLAG_STRIP (1<<0) /* "{ text }" -> "text" */ diff --git a/src/util/sys_defs.h b/src/util/sys_defs.h index 9247185..62749ab 100644 --- a/src/util/sys_defs.h +++ b/src/util/sys_defs.h @@ -1331,6 +1331,13 @@ extern int dup2_pass_on_exec(int oldd, int newd); #undef HAVE_RES_SEND #endif + /* + * The RFC 5322 Date and Time Specification recommends single space between + * date-time tokens. To avoid breaking change, format all numerical days as + * two-digit days (i.e. days 1-9 now have a leading zero instead of space). + */ +#define TWO_DIGIT_DAY_IN_DATE_TIME + /* * Check for required but missing definitions. */ diff --git a/src/util/valid_hostname.c b/src/util/valid_hostname.c index 8b234c4..457d1f1 100644 --- a/src/util/valid_hostname.c +++ b/src/util/valid_hostname.c @@ -6,9 +6,9 @@ /* SYNOPSIS /* #include /* -/* int valid_hostname(name, gripe) +/* int valid_hostname(name, flags) /* const char *name; -/* int gripe; +/* int flags; /* /* int valid_hostaddr(addr, gripe) /* const char *addr; @@ -32,6 +32,10 @@ /* dots, no leading or trailing dots or hyphens, no labels /* longer than VALID_LABEL_LEN characters, and it should not /* be all numeric. +/* The flags argument is the bit-wise or of zero or more of +/* DO_GRIPE or DO_WILDCARD (the latter allows the "*." name +/* prefix, which is rare but valid in some DNS responses and +/* queries). /* /* valid_hostaddr() requires that the input is a valid string /* representation of an IPv4 or IPv6 network address as @@ -403,8 +407,9 @@ int main(int unused_argc, char **argv) while (vstring_fgets_nonl(buffer, VSTREAM_IN)) { msg_info("testing: \"%s\"", vstring_str(buffer)); - valid_hostname(vstring_str(buffer), DO_GRIPE); - valid_hostaddr(vstring_str(buffer), DO_GRIPE); + valid_hostname(vstring_str(buffer), DO_GRIPE | DO_WILDCARD); + if (strchr(vstring_str(buffer), '*') == 0) + valid_hostaddr(vstring_str(buffer), DO_GRIPE); } exit(0); } diff --git a/src/util/valid_hostname.in b/src/util/valid_hostname.in index 608c0d1..4cdf019 100644 --- a/src/util/valid_hostname.in +++ b/src/util/valid_hostname.in @@ -53,3 +53,9 @@ g:a:a:a:a:a:a:a a::b :a::b a::b: +*.foo.bar +*foo.bar +foo.*.bar +foo*bar +foo.bar* +* diff --git a/src/util/valid_hostname.ref b/src/util/valid_hostname.ref index 08b23b8..eccc558 100644 --- a/src/util/valid_hostname.ref +++ b/src/util/valid_hostname.ref @@ -141,3 +141,13 @@ ./valid_hostname: testing: "a::b:" ./valid_hostname: warning: valid_hostname: invalid character 58(decimal): a::b: ./valid_hostname: warning: valid_ipv6_hostaddr: bad null last field in IPv6 address: a::b: +./valid_hostname: testing: "*.foo.bar" +./valid_hostname: testing: "*foo.bar" +./valid_hostname: warning: valid_hostname: '*' can be the first label only: *foo.bar +./valid_hostname: testing: "foo.*.bar" +./valid_hostname: warning: valid_hostname: '*' can be the first label only: foo.*.bar +./valid_hostname: testing: "foo*bar" +./valid_hostname: warning: valid_hostname: '*' can be the first label only: foo*bar +./valid_hostname: testing: "foo.bar*" +./valid_hostname: warning: valid_hostname: '*' can be the first label only: foo.bar* +./valid_hostname: testing: "*" diff --git a/src/util/valid_utf8_string.c b/src/util/valid_utf8_string.c index 96b5b4d..f5b4ff4 100644 --- a/src/util/valid_utf8_string.c +++ b/src/util/valid_utf8_string.c @@ -9,24 +9,24 @@ /* int valid_utf8_string(str, len) /* const char *str; /* ssize_t len; +/* +/* int valid_utf8_stringz(str) +/* const char *str; +/* ssize_t len; /* DESCRIPTION -/* valid_utf8_string() determines if a string satisfies the UTF-8 -/* definition in RFC 3629. That is, it contains proper encodings -/* of code points U+0000..U+10FFFF, excluding over-long encodings -/* and excluding U+D800..U+DFFF surrogates. +/* valid_utf8_string() determines if all bytes in a string +/* satisfy parse_utf8_char(3h) checks. See there for any +/* implementation limitations. +/* +/* valid_utf8_stringz() determines the same for zero-terminated +/* strings. /* /* A zero-length string is considered valid. /* DIAGNOSTICS /* The result value is zero when the caller specifies a negative -/* length, or a string that violates RFC 3629, for example a -/* string that is truncated in the middle of a multi-byte -/* sequence. -/* BUGS -/* But wait, there is more. Code points in the range U+FDD0..U+FDEF -/* and ending in FFFE or FFFF are non-characters in UNICODE. This -/* function does not block these. +/* length, or a string that does not pass parse_utf8_char(3h) checks. /* SEE ALSO -/* RFC 3629 +/* parse_utf8_char(3h), parse one UTF-8 multibyte character /* LICENSE /* .ad /* .fi @@ -36,6 +36,10 @@ /* IBM T.J. Watson Research /* P.O. Box 704 /* Yorktown Heights, NY 10598, USA +/* +/* Wietse Venema +/* porcupine.org +/* Amawalk, NY 10501, USA /*--*/ /* System library. */ @@ -45,66 +49,50 @@ /* Utility library. */ #include +#include /* valid_utf8_string - validate string according to RFC 3629 */ int valid_utf8_string(const char *str, ssize_t len) { - const unsigned char *end = (const unsigned char *) str + len; - const unsigned char *cp; - unsigned char c0, ch; + const char *ep = str + len; + const char *cp; + const char *last; if (len < 0) return (0); - if (len <= 0) + if (len == 0) return (1); /* - * Optimized for correct input, time, space, and for CPUs that have a - * decent number of registers. + * Ideally, the compiler will inline parse_utf8_char(). */ - for (cp = (const unsigned char *) str; cp < end; cp++) { - /* Single-byte encodings. */ - if (EXPECTED((c0 = *cp) <= 0x7f) /* we know that c0 >= 0x0 */ ) { - /* void */ ; - } - /* Two-byte encodings. */ - else if (EXPECTED(c0 <= 0xdf) /* we know that c0 >= 0x80 */ ) { - /* Exclude over-long encodings. */ - if (UNEXPECTED(c0 < 0xc2) - || UNEXPECTED(cp + 1 >= end) - /* Require UTF-8 tail byte. */ - || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80)) - return (0); - } - /* Three-byte encodings. */ - else if (EXPECTED(c0 <= 0xef) /* we know that c0 >= 0xe0 */ ) { - if (UNEXPECTED(cp + 2 >= end) - /* Exclude over-long encodings. */ - || UNEXPECTED((ch = *++cp) < (c0 == 0xe0 ? 0xa0 : 0x80)) - /* Exclude U+D800..U+DFFF. */ - || UNEXPECTED(ch > (c0 == 0xed ? 0x9f : 0xbf)) - /* Require UTF-8 tail byte. */ - || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80)) - return (0); - } - /* Four-byte encodings. */ - else if (EXPECTED(c0 <= 0xf4) /* we know that c0 >= 0xf0 */ ) { - if (UNEXPECTED(cp + 3 >= end) - /* Exclude over-long encodings. */ - || UNEXPECTED((ch = *++cp) < (c0 == 0xf0 ? 0x90 : 0x80)) - /* Exclude code points above U+10FFFF. */ - || UNEXPECTED(ch > (c0 == 0xf4 ? 0x8f : 0xbf)) - /* Require UTF-8 tail byte. */ - || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80) - /* Require UTF-8 tail byte. */ - || UNEXPECTED(((ch = *++cp) & 0xc0) != 0x80)) - return (0); - } - /* Invalid: c0 >= 0xf5 */ - else { + for (cp = str; cp < ep; cp++) { + if ((last = parse_utf8_char(cp, ep)) != 0) + cp = last; + else + return (0); + } + return (1); +} + +/* valid_utf8_stringz - validate string according to RFC 3629 */ + +int valid_utf8_stringz(const char *str) +{ + const char *cp; + const char *last; + + /* + * Ideally, the compiler will inline parse_utf8_char(), propagate the + * null pointer constant value, and eliminate code branches that test + * whether 0 != 0. + */ + for (cp = str; *cp; cp++) { + if ((last = parse_utf8_char(cp, 0)) != 0) + cp = last; + else return (0); - } } return (1); } @@ -114,26 +102,139 @@ int valid_utf8_string(const char *str, ssize_t len) */ #ifdef TEST #include +#include +#include #include -#include -#include +#include + + /* + * Test cases for 1-, 2-, and 3-byte encodings. See printable.c for UTF8 + * parser resychronization tests. + * + * XXX Need a test for 4-byte encodings, preferably with strings that can be + * displayed. + * + * XXX Need tests with hand-crafted over-long encodings and surrogates. + */ +struct testcase { + const char *name; + const char *input; + int expected; +}; -#define STR(x) vstring_str(x) -#define LEN(x) VSTRING_LEN(x) +#define T_VALID (1) +#define T_INVALID (0) +#define valid_to_str(v) ((v) ? "VALID" : "INVALID") -int main(void) +static const struct testcase testcases[] = { + {"Printable ASCII", + "printable", T_VALID, + }, + {"Latin script, accented, no error", + "na\303\257ve", T_VALID, + }, + {"Latin script, accented, missing non-leading byte", + "na\303ve", T_INVALID, + }, + {"Latin script, accented, missing leading byte", + "na\257ve", T_INVALID, + }, + {"Viktor, Cyrillic, no error", + "\320\262\320\270\320\272\321\202\320\276\321\200", T_VALID, + }, + {"Viktor, Cyrillic, missing non-leading byte", + "\320\262\320\320\272\321\202\320\276\321\200", T_INVALID, + }, + {"Viktor, Cyrillic, missing leading byte", + "\320\262\270\320\272\321\202\320\276\321\200", T_INVALID, + }, + {"Viktor, Cyrillic, truncated", + "\320\262\320\270\320\272\321\202\320\276\321", T_INVALID, + }, + {"Viktor, Hebrew, no error", + "\327\225\327\231\327\247\327\230\327\225\326\274\327\250", T_VALID, + }, + {"Viktor, Hebrew, missing leading byte", + "\327\225\231\327\247\327\230\327\225\326\274\327\250", T_INVALID, + }, + {"Chinese (Simplified), no error", + "\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347" + "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273" + "\237\350\256\241\346\212\245\345\221\212", T_VALID, + }, + {"Chinese (Simplified), missing leading byte", + "\344\270\255\345\233\275\344\272\222\350\201\224\275\221\347" + "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273" + "\237\350\256\241\346\212\245\345\221\212", T_INVALID, + }, + {"Chinese (Simplified), missing first non-leading byte", + "\344\270\255\345\233\275\344\272\222\350\201\224\347\221\347" + "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273" + "\237\350\256\241\346\212\245\345\221\212", T_INVALID, + }, + {"Chinese (Simplified), missing second non-leading byte", + "\344\270\255\345\233\275\344\272\222\350\201\224\347\275\347" + "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273" + "\237\350\256\241\346\212\245\345\221\212", T_INVALID, + }, + {"Chinese (Simplified), truncated", + "\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347" + "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273" + "\237\350\256\241\346\212\245\345", T_INVALID, + }, +}; + +int main(int argc, char **argv) { - VSTRING *buf = vstring_alloc(1); + const struct testcase *tp; + int pass; + int fail; + +#define NUM_TESTS sizeof(testcases)/sizeof(testcases[0]) + + msg_vstream_init(basename(argv[0]), VSTREAM_ERR); + util_utf8_enable = 1; + + for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) { + int actual_l; + int actual_z; + int ok = 0; - while (vstring_get_nonl(buf, VSTREAM_IN) != VSTREAM_EOF) { - vstream_printf("%c", (LEN(buf) && !valid_utf8_string(STR(buf), LEN(buf))) ? - '!' : ' '); - vstream_fwrite(VSTREAM_OUT, STR(buf), LEN(buf)); - vstream_printf("\n"); + /* + * Notes: + * + * - The msg(3) functions use printable() which interferes when logging + * inputs and outputs. Use vstream_fprintf() instead. + */ + vstream_fprintf(VSTREAM_ERR, "RUN %s\n", tp->name); + actual_l = valid_utf8_string(tp->input, strlen(tp->input)); + actual_z = valid_utf8_stringz(tp->input); + + if (actual_l != tp->expected) { + vstream_fprintf(VSTREAM_ERR, + "input: >%s<, 'actual_l' got: >%s<, want: >%s<\n", + tp->input, valid_to_str(actual_l), + valid_to_str(tp->expected)); + } else if (actual_z != tp->expected) { + vstream_fprintf(VSTREAM_ERR, + "input: >%s<, 'actual_z' got: >%s<, want: >%s<\n", + tp->input, valid_to_str(actual_z), + valid_to_str(tp->expected)); + } else { + vstream_fprintf(VSTREAM_ERR, "input: >%s<, got and want: >%s<\n", + tp->input, valid_to_str(actual_l)); + ok = 1; + } + if (ok) { + vstream_fprintf(VSTREAM_ERR, "PASS %s\n", tp->name); + pass++; + } else { + vstream_fprintf(VSTREAM_ERR, "FAIL %s\n", tp->name); + fail++; + } } - vstream_fflush(VSTREAM_OUT); - vstring_free(buf); - exit(0); + msg_info("PASS=%d FAIL=%d", pass, fail); + return (fail > 0); } #endif diff --git a/src/util/vstream.c b/src/util/vstream.c index b4f9fbb..affbcc0 100644 --- a/src/util/vstream.c +++ b/src/util/vstream.c @@ -522,6 +522,7 @@ /* System library. */ #include +#include #include /* 44BSD stdarg.h uses abort() */ #include #include @@ -1386,7 +1387,38 @@ VSTREAM *vstream_fopen(const char *path, int flags, mode_t mode) VSTREAM *stream; int fd; - if ((fd = open(path, flags, mode)) < 0) { + /* + * To set permissions on new files only, we need to distinguish between + * creating a new file and opening an existing one. + */ +#define open_create(path, flags, mode) \ + open((path), (flags) | (O_CREAT | O_EXCL), (mode)) +#define open_exist(path, flags, mode) \ + open((path), (flags) & ~(O_CREAT | O_EXCL), (mode)) + + switch (flags & (O_CREAT | O_EXCL)) { + case O_CREAT: + fd = open_exist(path, flags, mode); + if (fd < 0 && errno == ENOENT) { + fd = open_create(path, flags, mode); + if (fd >= 0) { + if (fchmod(fd, mode) < 0) /* can't uncreate */ + msg_warn("fchmod %s 0%o: %m", path, (unsigned) mode); + } else if ( /* fd < 0 && */ errno == EEXIST) + fd = open_exist(path, flags, mode); + } + break; + case O_CREAT | O_EXCL: + fd = open(path, flags, mode); + if (fd >= 0) + if (fchmod(fd, mode) < 0) /* can't uncreate */ + msg_warn("fchmod %s 0%o: %m", path, (unsigned) mode); + break; + default: + fd = open(path, flags, mode); + break; + } + if (fd < 0) { return (0); } else { stream = vstream_fdopen(fd, flags); -- cgit v1.2.3