From b5896ba9f6047e7031e2bdee0622d543e11a6734 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 6 May 2024 03:46:30 +0200 Subject: Adding upstream version 3.4.23. Signed-off-by: Daniel Baumann --- src/util/dict_utf8.c | 300 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 300 insertions(+) create mode 100644 src/util/dict_utf8.c (limited to 'src/util/dict_utf8.c') diff --git a/src/util/dict_utf8.c b/src/util/dict_utf8.c new file mode 100644 index 0000000..f1fc65a --- /dev/null +++ b/src/util/dict_utf8.c @@ -0,0 +1,300 @@ +/*++ +/* NAME +/* dict_utf8 3 +/* SUMMARY +/* dictionary UTF-8 helpers +/* SYNOPSIS +/* #include +/* +/* DICT *dict_utf8_activate( +/* DICT *dict) +/* DESCRIPTION +/* dict_utf8_activate() wraps a dictionary's lookup/update/delete +/* methods with code that enforces UTF-8 checks on keys and +/* values, and that logs a warning when incorrect UTF-8 is +/* encountered. The original dictionary handle becomes invalid. +/* +/* The wrapper code enforces a policy that maximizes application +/* robustness (it avoids the need for new error-handling code +/* paths in application code). Attempts to store non-UTF-8 +/* keys or values are skipped while reporting a non-error +/* status, attempts to look up or delete non-UTF-8 keys are +/* skipped while reporting a non-error status, and lookup +/* results that contain a non-UTF-8 value are blocked while +/* reporting a configuration error. +/* BUGS +/* dict_utf8_activate() does not nest. +/* LICENSE +/* .ad +/* .fi +/* The Secure Mailer license must be distributed with this software. +/* AUTHOR(S) +/* Wietse Venema +/* IBM T.J. Watson Research +/* P.O. Box 704 +/* Yorktown Heights, NY 10598, USA +/* +/* Wietse Venema +/* Google, Inc. +/* 111 8th Avenue +/* New York, NY 10011, USA +/*--*/ + + /* + * System library. + */ +#include +#include + + /* + * Utility library. + */ +#include +#include +#include +#include +#include + + /* + * The goal is to maximize robustness: bad UTF-8 should not appear in keys, + * because those are derived from controlled inputs, and values should be + * printable before they are stored. But if we failed to check something + * then it should not result in fatal errors and thus open up the system for + * a denial-of-service attack. + * + * Proposed over-all policy: skip attempts to store invalid UTF-8 lookup keys + * or values. Rationale: some storage may not permit malformed UTF-8. This + * maximizes program robustness. If we get an invalid lookup result, report + * a configuration error. + * + * LOOKUP + * + * If the key is invalid, log a warning and skip the request. Rationale: the + * item cannot exist. + * + * If the lookup result is invalid, log a warning and return a configuration + * error. + * + * UPDATE + * + * If the key is invalid, then log a warning and skip the request. Rationale: + * the item cannot exist. + * + * If the value is invalid, log a warning and skip the request. Rationale: + * storage may not permit malformed UTF-8. This maximizes program + * robustness. + * + * DELETE + * + * If the key is invalid, then skip the request. Rationale: the item cannot + * exist. + */ + +/* dict_utf8_check_fold - casefold or validate string */ + +static char *dict_utf8_check_fold(DICT *dict, const char *string, + CONST_CHAR_STAR *err) +{ + int fold_flag = (dict->flags & DICT_FLAG_FOLD_ANY); + + /* + * Validate UTF-8 without casefolding. + */ + if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) { + if (err) + *err = "malformed UTF-8 or invalid codepoint"; + return (0); + } + + /* + * Casefold UTF-8. + */ + if (fold_flag != 0 + && (fold_flag & ((dict->flags & DICT_FLAG_FIXED) ? + DICT_FLAG_FOLD_FIX : DICT_FLAG_FOLD_MUL))) { + if (dict->fold_buf == 0) + dict->fold_buf = vstring_alloc(10); + return (casefold(dict->fold_buf, string)); + } + return ((char *) string); +} + +/* dict_utf8_check validate UTF-8 string */ + +static int dict_utf8_check(const char *string, CONST_CHAR_STAR *err) +{ + if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) { + if (err) + *err = "malformed UTF-8 or invalid codepoint"; + return (0); + } + return (1); +} + +/* dict_utf8_lookup - UTF-8 lookup method wrapper */ + +static const char *dict_utf8_lookup(DICT *dict, const char *key) +{ + DICT_UTF8_BACKUP *backup; + const char *utf8_err; + const char *fold_res; + const char *value; + int saved_flags; + + /* + * Validate and optionally fold the key, and if invalid skip the request. + */ + if ((fold_res = dict_utf8_check_fold(dict, key, &utf8_err)) == 0) { + msg_warn("%s:%s: non-UTF-8 key \"%s\": %s", + dict->type, dict->name, key, utf8_err); + dict->error = DICT_ERR_NONE; + return (0); + } + + /* + * Proxy the request with casefolding turned off. + */ + saved_flags = (dict->flags & DICT_FLAG_FOLD_ANY); + dict->flags &= ~DICT_FLAG_FOLD_ANY; + backup = dict->utf8_backup; + value = backup->lookup(dict, fold_res); + dict->flags |= saved_flags; + + /* + * Validate the result, and if invalid fail the request. + */ + if (value != 0 && dict_utf8_check(value, &utf8_err) == 0) { + msg_warn("%s:%s: key \"%s\": non-UTF-8 value \"%s\": %s", + dict->type, dict->name, key, value, utf8_err); + dict->error = DICT_ERR_CONFIG; + return (0); + } else { + return (value); + } +} + +/* dict_utf8_update - UTF-8 update method wrapper */ + +static int dict_utf8_update(DICT *dict, const char *key, const char *value) +{ + DICT_UTF8_BACKUP *backup; + const char *utf8_err; + const char *fold_res; + int saved_flags; + int status; + + /* + * Validate or fold the key, and if invalid skip the request. + */ + if ((fold_res = dict_utf8_check_fold(dict, key, &utf8_err)) == 0) { + msg_warn("%s:%s: non-UTF-8 key \"%s\": %s", + dict->type, dict->name, key, utf8_err); + dict->error = DICT_ERR_NONE; + return (DICT_STAT_SUCCESS); + } + + /* + * Validate the value, and if invalid skip the request. + */ + else if (dict_utf8_check(value, &utf8_err) == 0) { + msg_warn("%s:%s: key \"%s\": non-UTF-8 value \"%s\": %s", + dict->type, dict->name, key, value, utf8_err); + dict->error = DICT_ERR_NONE; + return (DICT_STAT_SUCCESS); + } + + /* + * Proxy the request with casefolding turned off. + */ + else { + saved_flags = (dict->flags & DICT_FLAG_FOLD_ANY); + dict->flags &= ~DICT_FLAG_FOLD_ANY; + backup = dict->utf8_backup; + status = backup->update(dict, fold_res, value); + dict->flags |= saved_flags; + return (status); + } +} + +/* dict_utf8_delete - UTF-8 delete method wrapper */ + +static int dict_utf8_delete(DICT *dict, const char *key) +{ + DICT_UTF8_BACKUP *backup; + const char *utf8_err; + const char *fold_res; + int saved_flags; + int status; + + /* + * Validate and optionally fold the key, and if invalid skip the request. + */ + if ((fold_res = dict_utf8_check_fold(dict, key, &utf8_err)) == 0) { + msg_warn("%s:%s: non-UTF-8 key \"%s\": %s", + dict->type, dict->name, key, utf8_err); + dict->error = DICT_ERR_NONE; + return (DICT_STAT_SUCCESS); + } + + /* + * Proxy the request with casefolding turned off. + */ + else { + saved_flags = (dict->flags & DICT_FLAG_FOLD_ANY); + dict->flags &= ~DICT_FLAG_FOLD_ANY; + backup = dict->utf8_backup; + status = backup->delete(dict, fold_res); + dict->flags |= saved_flags; + return (status); + } +} + +/* dict_utf8_activate - wrap a legacy dict object for UTF-8 processing */ + +DICT *dict_utf8_activate(DICT *dict) +{ + const char myname[] = "dict_utf8_activate"; + DICT_UTF8_BACKUP *backup; + + /* + * Sanity check. + */ + if (util_utf8_enable == 0) + msg_panic("%s: Unicode support is not available", myname); + if ((dict->flags & DICT_FLAG_UTF8_REQUEST) == 0) + msg_panic("%s: %s:%s does not request Unicode support", + myname, dict->type, dict->name); + if ((dict->flags & DICT_FLAG_UTF8_ACTIVE) || dict->utf8_backup != 0) + msg_panic("%s: %s:%s Unicode support is already activated", + myname, dict->type, dict->name); + + /* + * Unlike dict_debug(3) we do not put a proxy dict object in front of the + * encapsulated object, because then we would have to bidirectionally + * propagate changes in the data members (errors, flags, jbuf, and so on) + * between proxy object and encapsulated object. + * + * Instead we attach ourselves behind the encapsulated dict object, and + * redirect some function pointers to ourselves. + */ + backup = dict->utf8_backup = (DICT_UTF8_BACKUP *) mymalloc(sizeof(*backup)); + + /* + * Interpose on the lookup/update/delete methods. It is a conscious + * decision not to tinker with the iterator or destructor. + */ + backup->lookup = dict->lookup; + backup->update = dict->update; + backup->delete = dict->delete; + + dict->lookup = dict_utf8_lookup; + dict->update = dict_utf8_update; + dict->delete = dict_utf8_delete; + + /* + * Leave our mark. See sanity check above. + */ + dict->flags |= DICT_FLAG_UTF8_ACTIVE; + + return (dict); +} -- cgit v1.2.3