summaryrefslogtreecommitdiffstats
path: root/src/util/dict_thash.c
blob: 69eb17b6ff52f76378d1928aaaaf945646c63cb6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
/*++
/* NAME
/*	dict_thash 3
/* SUMMARY
/*	dictionary manager interface to hashed flat text files
/* SYNOPSIS
/*	#include <dict_thash.h>
/*
/*	DICT	*dict_thash_open(path, open_flags, dict_flags)
/*	const char *name;
/*	const char *path;
/*	int	open_flags;
/*	int	dict_flags;
/* DESCRIPTION
/*	dict_thash_open() opens the named flat text file, creates
/*	an in-memory hash table, and makes it available via the
/*	generic interface described in dict_open(3). The input
/*	format is as with postmap(1).
/* DIAGNOSTICS
/*	Fatal errors: cannot open file, out of memory.
/* SEE ALSO
/*	dict(3) generic dictionary manager
/* LICENSE
/* .ad
/* .fi
/*	The Secure Mailer license must be distributed with this software.
/* AUTHOR(S)
/*	Wietse Venema
/*	IBM T.J. Watson Research
/*	P.O. Box 704
/*	Yorktown Heights, NY 10598, USA
/*
/*	Wietse Venema
/*	Google, Inc.
/*	111 8th Avenue
/*	New York, NY 10011, USA
/*--*/

/* System library. */

#include <sys_defs.h>
#include <sys/stat.h>
#include <ctype.h>
#include <string.h>

/* Utility library. */

#include <msg.h>
#include <mymalloc.h>
#include <iostuff.h>
#include <vstring.h>
#include <stringops.h>
#include <readlline.h>
#include <dict.h>
#include <dict_ht.h>
#include <dict_thash.h>

/* Application-specific. */

#define STR	vstring_str
#define LEN	VSTRING_LEN

/* dict_thash_open - open flat text data base */

DICT   *dict_thash_open(const char *path, int open_flags, int dict_flags)
{
    DICT   *dict;
    VSTREAM *fp = 0;			/* DICT_THASH_OPEN_RETURN() */
    struct stat st;
    time_t  before;
    time_t  after;
    VSTRING *line_buffer = 0;		/* DICT_THASH_OPEN_RETURN() */
    int     lineno;
    int     last_line;
    char   *key;
    char   *value;

    /*
     * Let the optimizer worry about eliminating redundant code.
     */
#define DICT_THASH_OPEN_RETURN(d) do { \
	DICT *__d = (d); \
	if (fp != 0) \
	    vstream_fclose(fp); \
	if (line_buffer != 0) \
	    vstring_free(line_buffer); \
	return (__d); \
    } while (0)

    /*
     * Sanity checks.
     */
    if (open_flags != O_RDONLY)
	DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
					      open_flags, dict_flags,
				  "%s:%s map requires O_RDONLY access mode",
					      DICT_TYPE_THASH, path));

    /*
     * Read the flat text file into in-memory hash. Read the file again if it
     * may have changed while we were reading.
     */
    for (before = time((time_t *) 0); /* see below */ ; before = after) {
	if ((fp = vstream_fopen(path, open_flags, 0644)) == 0) {
	    DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
						  open_flags, dict_flags,
					     "open database %s: %m", path));
	}

	/*
	 * Reuse the "internal" dictionary type.
	 */
	dict = dict_open3(DICT_TYPE_HT, path, open_flags, dict_flags);
	dict_type_override(dict, DICT_TYPE_THASH);

	/*
	 * XXX This duplicates the parser in postmap.c.
	 */
	if (line_buffer == 0)
	    line_buffer = vstring_alloc(100);
	last_line = 0;
	while (readllines(line_buffer, fp, &last_line, &lineno)) {
	    int     in_quotes = 0;

	    /*
	     * First some UTF-8 checks sans casefolding.
	     */
	    if ((dict->flags & DICT_FLAG_UTF8_ACTIVE)
		&& allascii(STR(line_buffer)) == 0
	    && valid_utf8_string(STR(line_buffer), LEN(line_buffer)) == 0) {
		msg_warn("%s, line %d: non-UTF-8 input \"%s\""
			 " -- ignoring this line",
			 VSTREAM_PATH(fp), lineno, STR(line_buffer));
		continue;
	    }

	    /*
	     * Split on the first whitespace character, then trim leading and
	     * trailing whitespace from key and value.
	     */
	    for (value = STR(line_buffer); *value; value++) {
		if (*value == '\\') {
		    if (*++value == 0)
			break;
		} else if (ISSPACE(*value)) {
		    if (!in_quotes)
			break;
		} else if (*value == '"') {
		    in_quotes = !in_quotes;
		}
	    }
	    if (in_quotes) {
		msg_warn("%s, line %d: unbalanced '\"' in '%s'"
			 " -- ignoring this line",
			 VSTREAM_PATH(fp), lineno, STR(line_buffer));
		continue;
	    }
	    if (*value)
		*value++ = 0;
	    while (ISSPACE(*value))
		value++;
	    trimblanks(value, 0)[0] = 0;

	    /*
	     * Leave the key in quoted form, for consistency with postmap.c
	     * and dict_inline.c.
	     */
	    key = STR(line_buffer);

	    /*
	     * Enforce the "key whitespace value" format. Disallow missing
	     * keys or missing values.
	     */
	    if (*key == 0 || *value == 0) {
		msg_warn("%s, line %d: expected format: key whitespace value"
			 " -- ignoring this line", path, lineno);
		continue;
	    }
	    if (key[strlen(key) - 1] == ':')
		msg_warn("%s, line %d: record is in \"key: value\" format;"
			 " is this an alias file?", path, lineno);

	    /*
	     * Optionally treat the value as a filename, and replace the value
	     * with the BASE64-encoded content of the named file.
	     */
	    if (dict_flags & DICT_FLAG_SRC_RHS_IS_FILE) {
		VSTRING *base64_buf;
		char   *err;

		if ((base64_buf = dict_file_to_b64(dict, value)) == 0) {
		    err = dict_file_get_error(dict);
		    msg_warn("%s, line %d: %s: skipping this entry",
			     VSTREAM_PATH(fp), lineno, err);
		    myfree(err);
		    continue;
		}
		value = vstring_str(base64_buf);
	    }

	    /*
	     * Store the value under the key. Handle duplicates
	     * appropriately. XXX Move this into dict_ht, but 1) that map
	     * ignores duplicates by default and we would have to check that
	     * we won't break existing code that depends on such behavior; 2)
	     * by inlining the checks here we can degrade gracefully instead
	     * of terminating with a fatal error. See comment in
	     * dict_inline.c.
	     */
	    if (dict->lookup(dict, key) != 0) {
		if (dict_flags & DICT_FLAG_DUP_IGNORE) {
		     /* void */ ;
		} else if (dict_flags & DICT_FLAG_DUP_REPLACE) {
		    dict->update(dict, key, value);
		} else if (dict_flags & DICT_FLAG_DUP_WARN) {
		    msg_warn("%s, line %d: duplicate entry: \"%s\"",
			     path, lineno, key);
		} else {
		    dict->close(dict);
		    DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
						     open_flags, dict_flags,
				     "%s, line %d: duplicate entry: \"%s\"",
							path, lineno, key));
		}
	    } else {
		dict->update(dict, key, value);
	    }
	}

	/*
	 * See if the source file is hot.
	 */
	if (fstat(vstream_fileno(fp), &st) < 0)
	    msg_fatal("fstat %s: %m", path);
	if (vstream_fclose(fp))
	    msg_fatal("read %s: %m", path);
	fp = 0;					/* DICT_THASH_OPEN_RETURN() */
	after = time((time_t *) 0);
	if (st.st_mtime < before - 1 || st.st_mtime > after)
	    break;

	/*
	 * Yes, it is hot. Discard the result and read the file again.
	 */
	dict->close(dict);
	if (msg_verbose > 1)
	    msg_info("pausing to let file %s cool down", path);
	doze(300000);
    }

    dict->owner.uid = st.st_uid;
    dict->owner.status = (st.st_uid != 0);

    DICT_THASH_OPEN_RETURN(DICT_DEBUG (dict));
}