diff options
Diffstat (limited to 'src/util/dict_thash.c')
-rw-r--r-- | src/util/dict_thash.c | 255 |
1 files changed, 255 insertions, 0 deletions
diff --git a/src/util/dict_thash.c b/src/util/dict_thash.c new file mode 100644 index 0000000..69eb17b --- /dev/null +++ b/src/util/dict_thash.c @@ -0,0 +1,255 @@ +/*++ +/* NAME +/* dict_thash 3 +/* SUMMARY +/* dictionary manager interface to hashed flat text files +/* SYNOPSIS +/* #include <dict_thash.h> +/* +/* DICT *dict_thash_open(path, open_flags, dict_flags) +/* const char *name; +/* const char *path; +/* int open_flags; +/* int dict_flags; +/* DESCRIPTION +/* dict_thash_open() opens the named flat text file, creates +/* an in-memory hash table, and makes it available via the +/* generic interface described in dict_open(3). The input +/* format is as with postmap(1). +/* DIAGNOSTICS +/* Fatal errors: cannot open file, out of memory. +/* SEE ALSO +/* dict(3) generic dictionary manager +/* LICENSE +/* .ad +/* .fi +/* The Secure Mailer license must be distributed with this software. +/* AUTHOR(S) +/* Wietse Venema +/* IBM T.J. Watson Research +/* P.O. Box 704 +/* Yorktown Heights, NY 10598, USA +/* +/* Wietse Venema +/* Google, Inc. +/* 111 8th Avenue +/* New York, NY 10011, USA +/*--*/ + +/* System library. */ + +#include <sys_defs.h> +#include <sys/stat.h> +#include <ctype.h> +#include <string.h> + +/* Utility library. */ + +#include <msg.h> +#include <mymalloc.h> +#include <iostuff.h> +#include <vstring.h> +#include <stringops.h> +#include <readlline.h> +#include <dict.h> +#include <dict_ht.h> +#include <dict_thash.h> + +/* Application-specific. */ + +#define STR vstring_str +#define LEN VSTRING_LEN + +/* dict_thash_open - open flat text data base */ + +DICT *dict_thash_open(const char *path, int open_flags, int dict_flags) +{ + DICT *dict; + VSTREAM *fp = 0; /* DICT_THASH_OPEN_RETURN() */ + struct stat st; + time_t before; + time_t after; + VSTRING *line_buffer = 0; /* DICT_THASH_OPEN_RETURN() */ + int lineno; + int last_line; + char *key; + char *value; + + /* + * Let the optimizer worry about eliminating redundant code. + */ +#define DICT_THASH_OPEN_RETURN(d) do { \ + DICT *__d = (d); \ + if (fp != 0) \ + vstream_fclose(fp); \ + if (line_buffer != 0) \ + vstring_free(line_buffer); \ + return (__d); \ + } while (0) + + /* + * Sanity checks. + */ + if (open_flags != O_RDONLY) + DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path, + open_flags, dict_flags, + "%s:%s map requires O_RDONLY access mode", + DICT_TYPE_THASH, path)); + + /* + * Read the flat text file into in-memory hash. Read the file again if it + * may have changed while we were reading. + */ + for (before = time((time_t *) 0); /* see below */ ; before = after) { + if ((fp = vstream_fopen(path, open_flags, 0644)) == 0) { + DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path, + open_flags, dict_flags, + "open database %s: %m", path)); + } + + /* + * Reuse the "internal" dictionary type. + */ + dict = dict_open3(DICT_TYPE_HT, path, open_flags, dict_flags); + dict_type_override(dict, DICT_TYPE_THASH); + + /* + * XXX This duplicates the parser in postmap.c. + */ + if (line_buffer == 0) + line_buffer = vstring_alloc(100); + last_line = 0; + while (readllines(line_buffer, fp, &last_line, &lineno)) { + int in_quotes = 0; + + /* + * First some UTF-8 checks sans casefolding. + */ + if ((dict->flags & DICT_FLAG_UTF8_ACTIVE) + && allascii(STR(line_buffer)) == 0 + && valid_utf8_string(STR(line_buffer), LEN(line_buffer)) == 0) { + msg_warn("%s, line %d: non-UTF-8 input \"%s\"" + " -- ignoring this line", + VSTREAM_PATH(fp), lineno, STR(line_buffer)); + continue; + } + + /* + * Split on the first whitespace character, then trim leading and + * trailing whitespace from key and value. + */ + for (value = STR(line_buffer); *value; value++) { + if (*value == '\\') { + if (*++value == 0) + break; + } else if (ISSPACE(*value)) { + if (!in_quotes) + break; + } else if (*value == '"') { + in_quotes = !in_quotes; + } + } + if (in_quotes) { + msg_warn("%s, line %d: unbalanced '\"' in '%s'" + " -- ignoring this line", + VSTREAM_PATH(fp), lineno, STR(line_buffer)); + continue; + } + if (*value) + *value++ = 0; + while (ISSPACE(*value)) + value++; + trimblanks(value, 0)[0] = 0; + + /* + * Leave the key in quoted form, for consistency with postmap.c + * and dict_inline.c. + */ + key = STR(line_buffer); + + /* + * Enforce the "key whitespace value" format. Disallow missing + * keys or missing values. + */ + if (*key == 0 || *value == 0) { + msg_warn("%s, line %d: expected format: key whitespace value" + " -- ignoring this line", path, lineno); + continue; + } + if (key[strlen(key) - 1] == ':') + msg_warn("%s, line %d: record is in \"key: value\" format;" + " is this an alias file?", path, lineno); + + /* + * Optionally treat the value as a filename, and replace the value + * with the BASE64-encoded content of the named file. + */ + if (dict_flags & DICT_FLAG_SRC_RHS_IS_FILE) { + VSTRING *base64_buf; + char *err; + + if ((base64_buf = dict_file_to_b64(dict, value)) == 0) { + err = dict_file_get_error(dict); + msg_warn("%s, line %d: %s: skipping this entry", + VSTREAM_PATH(fp), lineno, err); + myfree(err); + continue; + } + value = vstring_str(base64_buf); + } + + /* + * Store the value under the key. Handle duplicates + * appropriately. XXX Move this into dict_ht, but 1) that map + * ignores duplicates by default and we would have to check that + * we won't break existing code that depends on such behavior; 2) + * by inlining the checks here we can degrade gracefully instead + * of terminating with a fatal error. See comment in + * dict_inline.c. + */ + if (dict->lookup(dict, key) != 0) { + if (dict_flags & DICT_FLAG_DUP_IGNORE) { + /* void */ ; + } else if (dict_flags & DICT_FLAG_DUP_REPLACE) { + dict->update(dict, key, value); + } else if (dict_flags & DICT_FLAG_DUP_WARN) { + msg_warn("%s, line %d: duplicate entry: \"%s\"", + path, lineno, key); + } else { + dict->close(dict); + DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path, + open_flags, dict_flags, + "%s, line %d: duplicate entry: \"%s\"", + path, lineno, key)); + } + } else { + dict->update(dict, key, value); + } + } + + /* + * See if the source file is hot. + */ + if (fstat(vstream_fileno(fp), &st) < 0) + msg_fatal("fstat %s: %m", path); + if (vstream_fclose(fp)) + msg_fatal("read %s: %m", path); + fp = 0; /* DICT_THASH_OPEN_RETURN() */ + after = time((time_t *) 0); + if (st.st_mtime < before - 1 || st.st_mtime > after) + break; + + /* + * Yes, it is hot. Discard the result and read the file again. + */ + dict->close(dict); + if (msg_verbose > 1) + msg_info("pausing to let file %s cool down", path); + doze(300000); + } + + dict->owner.uid = st.st_uid; + dict->owner.status = (st.st_uid != 0); + + DICT_THASH_OPEN_RETURN(DICT_DEBUG (dict)); +} |