summaryrefslogtreecommitdiffstats
path: root/src/util/dict_utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/util/dict_utf8.c')
-rw-r--r--src/util/dict_utf8.c300
1 files changed, 300 insertions, 0 deletions
diff --git a/src/util/dict_utf8.c b/src/util/dict_utf8.c
new file mode 100644
index 0000000..f1fc65a
--- /dev/null
+++ b/src/util/dict_utf8.c
@@ -0,0 +1,300 @@
+/*++
+/* NAME
+/* dict_utf8 3
+/* SUMMARY
+/* dictionary UTF-8 helpers
+/* SYNOPSIS
+/* #include <dict.h>
+/*
+/* DICT *dict_utf8_activate(
+/* DICT *dict)
+/* DESCRIPTION
+/* dict_utf8_activate() wraps a dictionary's lookup/update/delete
+/* methods with code that enforces UTF-8 checks on keys and
+/* values, and that logs a warning when incorrect UTF-8 is
+/* encountered. The original dictionary handle becomes invalid.
+/*
+/* The wrapper code enforces a policy that maximizes application
+/* robustness (it avoids the need for new error-handling code
+/* paths in application code). Attempts to store non-UTF-8
+/* keys or values are skipped while reporting a non-error
+/* status, attempts to look up or delete non-UTF-8 keys are
+/* skipped while reporting a non-error status, and lookup
+/* results that contain a non-UTF-8 value are blocked while
+/* reporting a configuration error.
+/* BUGS
+/* dict_utf8_activate() does not nest.
+/* LICENSE
+/* .ad
+/* .fi
+/* The Secure Mailer license must be distributed with this software.
+/* AUTHOR(S)
+/* Wietse Venema
+/* IBM T.J. Watson Research
+/* P.O. Box 704
+/* Yorktown Heights, NY 10598, USA
+/*
+/* Wietse Venema
+/* Google, Inc.
+/* 111 8th Avenue
+/* New York, NY 10011, USA
+/*--*/
+
+ /*
+ * System library.
+ */
+#include <sys_defs.h>
+#include <string.h>
+
+ /*
+ * Utility library.
+ */
+#include <msg.h>
+#include <stringops.h>
+#include <dict.h>
+#include <mymalloc.h>
+#include <msg.h>
+
+ /*
+ * The goal is to maximize robustness: bad UTF-8 should not appear in keys,
+ * because those are derived from controlled inputs, and values should be
+ * printable before they are stored. But if we failed to check something
+ * then it should not result in fatal errors and thus open up the system for
+ * a denial-of-service attack.
+ *
+ * Proposed over-all policy: skip attempts to store invalid UTF-8 lookup keys
+ * or values. Rationale: some storage may not permit malformed UTF-8. This
+ * maximizes program robustness. If we get an invalid lookup result, report
+ * a configuration error.
+ *
+ * LOOKUP
+ *
+ * If the key is invalid, log a warning and skip the request. Rationale: the
+ * item cannot exist.
+ *
+ * If the lookup result is invalid, log a warning and return a configuration
+ * error.
+ *
+ * UPDATE
+ *
+ * If the key is invalid, then log a warning and skip the request. Rationale:
+ * the item cannot exist.
+ *
+ * If the value is invalid, log a warning and skip the request. Rationale:
+ * storage may not permit malformed UTF-8. This maximizes program
+ * robustness.
+ *
+ * DELETE
+ *
+ * If the key is invalid, then skip the request. Rationale: the item cannot
+ * exist.
+ */
+
+/* dict_utf8_check_fold - casefold or validate string */
+
+static char *dict_utf8_check_fold(DICT *dict, const char *string,
+ CONST_CHAR_STAR *err)
+{
+ int fold_flag = (dict->flags & DICT_FLAG_FOLD_ANY);
+
+ /*
+ * Validate UTF-8 without casefolding.
+ */
+ if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) {
+ if (err)
+ *err = "malformed UTF-8 or invalid codepoint";
+ return (0);
+ }
+
+ /*
+ * Casefold UTF-8.
+ */
+ if (fold_flag != 0
+ && (fold_flag & ((dict->flags & DICT_FLAG_FIXED) ?
+ DICT_FLAG_FOLD_FIX : DICT_FLAG_FOLD_MUL))) {
+ if (dict->fold_buf == 0)
+ dict->fold_buf = vstring_alloc(10);
+ return (casefold(dict->fold_buf, string));
+ }
+ return ((char *) string);
+}
+
+/* dict_utf8_check validate UTF-8 string */
+
+static int dict_utf8_check(const char *string, CONST_CHAR_STAR *err)
+{
+ if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) {
+ if (err)
+ *err = "malformed UTF-8 or invalid codepoint";
+ return (0);
+ }
+ return (1);
+}
+
+/* dict_utf8_lookup - UTF-8 lookup method wrapper */
+
+static const char *dict_utf8_lookup(DICT *dict, const char *key)
+{
+ DICT_UTF8_BACKUP *backup;
+ const char *utf8_err;
+ const char *fold_res;
+ const char *value;
+ int saved_flags;
+
+ /*
+ * Validate and optionally fold the key, and if invalid skip the request.
+ */
+ if ((fold_res = dict_utf8_check_fold(dict, key, &utf8_err)) == 0) {
+ msg_warn("%s:%s: non-UTF-8 key \"%s\": %s",
+ dict->type, dict->name, key, utf8_err);
+ dict->error = DICT_ERR_NONE;
+ return (0);
+ }
+
+ /*
+ * Proxy the request with casefolding turned off.
+ */
+ saved_flags = (dict->flags & DICT_FLAG_FOLD_ANY);
+ dict->flags &= ~DICT_FLAG_FOLD_ANY;
+ backup = dict->utf8_backup;
+ value = backup->lookup(dict, fold_res);
+ dict->flags |= saved_flags;
+
+ /*
+ * Validate the result, and if invalid fail the request.
+ */
+ if (value != 0 && dict_utf8_check(value, &utf8_err) == 0) {
+ msg_warn("%s:%s: key \"%s\": non-UTF-8 value \"%s\": %s",
+ dict->type, dict->name, key, value, utf8_err);
+ dict->error = DICT_ERR_CONFIG;
+ return (0);
+ } else {
+ return (value);
+ }
+}
+
+/* dict_utf8_update - UTF-8 update method wrapper */
+
+static int dict_utf8_update(DICT *dict, const char *key, const char *value)
+{
+ DICT_UTF8_BACKUP *backup;
+ const char *utf8_err;
+ const char *fold_res;
+ int saved_flags;
+ int status;
+
+ /*
+ * Validate or fold the key, and if invalid skip the request.
+ */
+ if ((fold_res = dict_utf8_check_fold(dict, key, &utf8_err)) == 0) {
+ msg_warn("%s:%s: non-UTF-8 key \"%s\": %s",
+ dict->type, dict->name, key, utf8_err);
+ dict->error = DICT_ERR_NONE;
+ return (DICT_STAT_SUCCESS);
+ }
+
+ /*
+ * Validate the value, and if invalid skip the request.
+ */
+ else if (dict_utf8_check(value, &utf8_err) == 0) {
+ msg_warn("%s:%s: key \"%s\": non-UTF-8 value \"%s\": %s",
+ dict->type, dict->name, key, value, utf8_err);
+ dict->error = DICT_ERR_NONE;
+ return (DICT_STAT_SUCCESS);
+ }
+
+ /*
+ * Proxy the request with casefolding turned off.
+ */
+ else {
+ saved_flags = (dict->flags & DICT_FLAG_FOLD_ANY);
+ dict->flags &= ~DICT_FLAG_FOLD_ANY;
+ backup = dict->utf8_backup;
+ status = backup->update(dict, fold_res, value);
+ dict->flags |= saved_flags;
+ return (status);
+ }
+}
+
+/* dict_utf8_delete - UTF-8 delete method wrapper */
+
+static int dict_utf8_delete(DICT *dict, const char *key)
+{
+ DICT_UTF8_BACKUP *backup;
+ const char *utf8_err;
+ const char *fold_res;
+ int saved_flags;
+ int status;
+
+ /*
+ * Validate and optionally fold the key, and if invalid skip the request.
+ */
+ if ((fold_res = dict_utf8_check_fold(dict, key, &utf8_err)) == 0) {
+ msg_warn("%s:%s: non-UTF-8 key \"%s\": %s",
+ dict->type, dict->name, key, utf8_err);
+ dict->error = DICT_ERR_NONE;
+ return (DICT_STAT_SUCCESS);
+ }
+
+ /*
+ * Proxy the request with casefolding turned off.
+ */
+ else {
+ saved_flags = (dict->flags & DICT_FLAG_FOLD_ANY);
+ dict->flags &= ~DICT_FLAG_FOLD_ANY;
+ backup = dict->utf8_backup;
+ status = backup->delete(dict, fold_res);
+ dict->flags |= saved_flags;
+ return (status);
+ }
+}
+
+/* dict_utf8_activate - wrap a legacy dict object for UTF-8 processing */
+
+DICT *dict_utf8_activate(DICT *dict)
+{
+ const char myname[] = "dict_utf8_activate";
+ DICT_UTF8_BACKUP *backup;
+
+ /*
+ * Sanity check.
+ */
+ if (util_utf8_enable == 0)
+ msg_panic("%s: Unicode support is not available", myname);
+ if ((dict->flags & DICT_FLAG_UTF8_REQUEST) == 0)
+ msg_panic("%s: %s:%s does not request Unicode support",
+ myname, dict->type, dict->name);
+ if ((dict->flags & DICT_FLAG_UTF8_ACTIVE) || dict->utf8_backup != 0)
+ msg_panic("%s: %s:%s Unicode support is already activated",
+ myname, dict->type, dict->name);
+
+ /*
+ * Unlike dict_debug(3) we do not put a proxy dict object in front of the
+ * encapsulated object, because then we would have to bidirectionally
+ * propagate changes in the data members (errors, flags, jbuf, and so on)
+ * between proxy object and encapsulated object.
+ *
+ * Instead we attach ourselves behind the encapsulated dict object, and
+ * redirect some function pointers to ourselves.
+ */
+ backup = dict->utf8_backup = (DICT_UTF8_BACKUP *) mymalloc(sizeof(*backup));
+
+ /*
+ * Interpose on the lookup/update/delete methods. It is a conscious
+ * decision not to tinker with the iterator or destructor.
+ */
+ backup->lookup = dict->lookup;
+ backup->update = dict->update;
+ backup->delete = dict->delete;
+
+ dict->lookup = dict_utf8_lookup;
+ dict->update = dict_utf8_update;
+ dict->delete = dict_utf8_delete;
+
+ /*
+ * Leave our mark. See sanity check above.
+ */
+ dict->flags |= DICT_FLAG_UTF8_ACTIVE;
+
+ return (dict);
+}