summaryrefslogtreecommitdiffstats
path: root/src/hash_table.h
blob: 91ae63dfebbba7d4a1e15f10d969a5a5b236777c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
/**
 * @file hash_table.h
 * @author Radek Krejci <rkrejci@cesnet.cz>
 * @author Michal Vasko <mvasko@cesnet.cz>
 * @brief libyang hash table
 *
 * Copyright (c) 2015 - 2022 CESNET, z.s.p.o.
 *
 * This source code is licensed under BSD 3-Clause License (the "License").
 * You may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     https://opensource.org/licenses/BSD-3-Clause
 */

#ifndef LY_HASH_TABLE_H_
#define LY_HASH_TABLE_H_

#include <pthread.h>
#include <stddef.h>
#include <stdint.h>

#include "compat.h"
#include "log.h"

/**
 * @brief Compute hash from (several) string(s).
 *
 * Usage:
 * - init hash to 0
 * - repeatedly call ::dict_hash_multi(), provide hash from the last call
 * - call ::dict_hash_multi() with key_part = NULL to finish the hash
 */
uint32_t dict_hash_multi(uint32_t hash, const char *key_part, size_t len);

/**
 * @brief Compute hash from a string.
 */
uint32_t dict_hash(const char *key, size_t len);

/**
 * @brief Callback for checking hash table values equivalence.
 *
 * @param[in] val1_p Pointer to the first value, the one being searched (inserted/removed).
 * @param[in] val2_p Pointer to the second value, the one stored in the hash table.
 * @param[in] mod Whether the operation modifies the hash table (insert or remove) or not (find).
 * @param[in] cb_data User callback data.
 * @return false (non-equal) or true (equal).
 */
typedef ly_bool (*lyht_value_equal_cb)(void *val1_p, void *val2_p, ly_bool mod, void *cb_data);

/** reference value for 100% */
#define LYHT_HUNDRED_PERCENTAGE 100

/** when the table is at least this much percent full, it is enlarged (double the size) */
#define LYHT_ENLARGE_PERCENTAGE 75

/** only once the table is this much percent full, enable shrinking */
#define LYHT_FIRST_SHRINK_PERCENTAGE 50

/** when the table is less than this much percent full, it is shrunk (half the size) */
#define LYHT_SHRINK_PERCENTAGE 25

/** when the table has less than this much percent empty records, it is rehashed to get rid of all the invalid records */
#define LYHT_REHASH_PERCENTAGE 2

/** never shrink beyond this size */
#define LYHT_MIN_SIZE 8

/**
 * @brief Generic hash table record.
 */
struct ht_rec {
    uint32_t hash;        /* hash of the value */
    int32_t hits;         /* collision/overflow value count - 1 (a filled entry has 1 hit,
                           * special value -1 means a deleted record) */
    unsigned char val[1]; /* arbitrary-size value */
} _PACKED;

/**
 * @brief (Very) generic hash table.
 *
 * Hash table with open addressing collision resolution and
 * linear probing of interval 1 (next free record is used).
 * Removal is lazy (removed records are only marked), but
 * if possible, they are fully emptied.
 */
struct hash_table {
    uint32_t used;        /* number of values stored in the hash table (filled records) */
    uint32_t size;        /* always holds 2^x == size (is power of 2), actually number of records allocated */
    uint32_t invalid;     /* number of invalid records (deleted) */
    lyht_value_equal_cb val_equal; /* callback for testing value equivalence */
    void *cb_data;        /* user data callback arbitrary value */
    uint16_t resize;      /* 0 - resizing is disabled, *
                           * 1 - enlarging is enabled, *
                           * 2 - both shrinking and enlarging is enabled */
    uint16_t rec_size;    /* real size (in bytes) of one record for accessing recs array */
    unsigned char *recs;  /* pointer to the hash table itself (array of struct ht_rec) */
};

struct dict_rec {
    char *value;
    uint32_t refcount;
};

/**
 * dictionary to store repeating strings
 */
struct dict_table {
    struct hash_table *hash_tab;
    pthread_mutex_t lock;
};

/**
 * @brief Initiate content (non-zero values) of the dictionary
 *
 * @param[in] dict Dictionary table to initiate
 */
void lydict_init(struct dict_table *dict);

/**
 * @brief Cleanup the dictionary content
 *
 * @param[in] dict Dictionary table to cleanup
 */
void lydict_clean(struct dict_table *dict);

/**
 * @brief Create new hash table.
 *
 * @param[in] size Starting size of the hash table (capacity of values), must be power of 2.
 * @param[in] val_size Size in bytes of value (the stored hashed item).
 * @param[in] val_equal Callback for checking value equivalence.
 * @param[in] cb_data User data always passed to @p val_equal.
 * @param[in] resize Whether to resize the table on too few/too many records taken.
 * @return Empty hash table, NULL on error.
 */
struct hash_table *lyht_new(uint32_t size, uint16_t val_size, lyht_value_equal_cb val_equal, void *cb_data, uint16_t resize);

/**
 * @brief Set hash table value equal callback.
 *
 * @param[in] ht Hash table to modify.
 * @param[in] new_val_equal New callback for checking value equivalence.
 * @return Previous callback for checking value equivalence.
 */
lyht_value_equal_cb lyht_set_cb(struct hash_table *ht, lyht_value_equal_cb new_val_equal);

/**
 * @brief Set hash table value equal callback user data.
 *
 * @param[in] ht Hash table to modify.
 * @param[in] new_cb_data New data for values callback.
 * @return Previous data for values callback.
 */
void *lyht_set_cb_data(struct hash_table *ht, void *new_cb_data);

/**
 * @brief Make a duplicate of an existing hash table.
 *
 * @param[in] orig Original hash table to duplicate.
 * @return Duplicated hash table @p orig, NULL on error.
 */
struct hash_table *lyht_dup(const struct hash_table *orig);

/**
 * @brief Free a hash table.
 *
 * @param[in] ht Hash table to be freed.
 */
void lyht_free(struct hash_table *ht);

/**
 * @brief Find a value in a hash table.
 *
 * @param[in] ht Hash table to search in.
 * @param[in] val_p Pointer to the value to find.
 * @param[in] hash Hash of the stored value.
 * @param[out] match_p Pointer to the matching value, optional.
 * @return LY_SUCCESS if value was found,
 * @return LY_ENOTFOUND if not found.
 */
LY_ERR lyht_find(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p);

/**
 * @brief Find another equal value in the hash table.
 *
 * @param[in] ht Hash table to search in.
 * @param[in] val_p Pointer to the previously found value in @p ht.
 * @param[in] hash Hash of the previously found value.
 * @param[out] match_p Pointer to the matching value, optional.
 * @return LY_SUCCESS if value was found,
 * @return LY_ENOTFOUND if not found.
 */
LY_ERR lyht_find_next(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p);

/**
 * @brief Find another equal value in the hash table. Same functionality as ::lyht_find_next()
 * but allows to specify a collision val equal callback to be used for checking for matching colliding values.
 *
 * @param[in] ht Hash table to search in.
 * @param[in] val_p Pointer to the previously found value in @p ht.
 * @param[in] hash Hash of the previously found value.
 * @param[in] collision_val_equal Val equal callback to use for checking collisions.
 * @param[out] match_p Pointer to the matching value, optional.
 * @return LY_SUCCESS if value was found,
 * @return LY_ENOTFOUND if not found.
 */
LY_ERR lyht_find_next_with_collision_cb(struct hash_table *ht, void *val_p, uint32_t hash,
        lyht_value_equal_cb collision_val_equal, void **match_p);

/**
 * @brief Insert a value into a hash table.
 *
 * @param[in] ht Hash table to insert into.
 * @param[in] val_p Pointer to the value to insert. Be careful, if the values stored in the hash table
 * are pointers, @p val_p must be a pointer to a pointer.
 * @param[in] hash Hash of the stored value.
 * @param[out] match_p Pointer to the stored value, optional
 * @return LY_SUCCESS on success,
 * @return LY_EEXIST in case the value is already present.
 * @return LY_EMEM in case of memory allocation failure.
 */
LY_ERR lyht_insert(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p);

/**
 * @brief Insert a value into hash table. Same functionality as ::lyht_insert()
 * but allows to specify a temporary val equal callback to be used in case the hash table
 * will be resized after successful insertion.
 *
 * @param[in] ht Hash table to insert into.
 * @param[in] val_p Pointer to the value to insert. Be careful, if the values stored in the hash table
 * are pointers, @p val_p must be a pointer to a pointer.
 * @param[in] hash Hash of the stored value.
 * @param[in] resize_val_equal Val equal callback to use for resizing.
 * @param[out] match_p Pointer to the stored value, optional
 * @return LY_SUCCESS on success,
 * @return LY_EEXIST in case the value is already present.
 * @return LY_EMEM in case of memory allocation failure.
 */
LY_ERR lyht_insert_with_resize_cb(struct hash_table *ht, void *val_p, uint32_t hash, lyht_value_equal_cb resize_val_equal,
        void **match_p);

/**
 * @brief Remove a value from a hash table.
 *
 * @param[in] ht Hash table to remove from.
 * @param[in] val_p Pointer to value to be removed. Be careful, if the values stored in the hash table
 * are pointers, @p val_p must be a pointer to a pointer.
 * @param[in] hash Hash of the stored value.
 * @return LY_SUCCESS on success,
 * @return LY_ENOTFOUND if value was not found.
 */
LY_ERR lyht_remove(struct hash_table *ht, void *val_p, uint32_t hash);

/**
 * @brief Remove a value from a hash table. Same functionality as ::lyht_remove()
 * but allows to specify a temporary val equal callback to be used in case the hash table
 * will be resized after successful removal.
 *
 * @param[in] ht Hash table to remove from.
 * @param[in] val_p Pointer to value to be removed. Be careful, if the values stored in the hash table
 * are pointers, @p val_p must be a pointer to a pointer.
 * @param[in] hash Hash of the stored value.
 * @param[in] resize_val_equal Val equal callback to use for resizing.
 * @return LY_SUCCESS on success,
 * @return LY_ENOTFOUND if value was not found.
 */
LY_ERR lyht_remove_with_resize_cb(struct hash_table *ht, void *val_p, uint32_t hash, lyht_value_equal_cb resize_val_equal);

/**
 * @brief Get suitable size of a hash table for a fixed number of items.
 *
 * @param[in] item_count Number of stored items.
 * @return Hash table size.
 */
uint32_t lyht_get_fixed_size(uint32_t item_count);

#endif /* LY_HASH_TABLE_H_ */