summaryrefslogtreecommitdiffstats
path: root/lib/di-set.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/di-set.c')
-rw-r--r--lib/di-set.c260
1 files changed, 260 insertions, 0 deletions
diff --git a/lib/di-set.c b/lib/di-set.c
new file mode 100644
index 0000000..f8d2558
--- /dev/null
+++ b/lib/di-set.c
@@ -0,0 +1,260 @@
+/* Set operations for device-inode pairs stored in a space-efficient manner.
+
+ Copyright 2009-2022 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* written by Paul Eggert and Jim Meyering */
+
+#include <config.h>
+#include "di-set.h"
+
+#include "hash.h"
+#include "ino-map.h"
+
+#include <limits.h>
+#include <stdlib.h>
+
+/* The hash package hashes "void *", but this package wants to hash
+ integers. Use integers that are as large as possible, but no
+ larger than void *, so that they can be cast to void * and back
+ without losing information. */
+typedef size_t hashint;
+#define HASHINT_MAX ((hashint) -1)
+
+/* Integers represent inode numbers. Integers in the range
+ 1..(LARGE_INO_MIN-1) represent inode numbers directly. (The hash
+ package does not work with null pointers, so inode 0 cannot be used
+ as a key.) To find the representations of other inode numbers, map
+ them through INO_MAP. */
+#define LARGE_INO_MIN (HASHINT_MAX / 2)
+
+/* Set operations for device-inode pairs stored in a space-efficient
+ manner. Use a two-level hash table. The top level hashes by
+ device number, as there are typically a small number of devices.
+ The lower level hashes by mapped inode numbers. In the typical
+ case where the inode number is positive and small, the inode number
+ maps to itself, masquerading as a void * value; otherwise, its
+ value is the result of hashing the inode value through INO_MAP. */
+
+/* A pair that maps a device number to a set of inode numbers. */
+struct di_ent
+{
+ dev_t dev;
+ struct hash_table *ino_set;
+};
+
+/* A two-level hash table that manages and indexes these pairs. */
+struct di_set
+{
+ /* Map device numbers to sets of inode number representatives. */
+ struct hash_table *dev_map;
+
+ /* If nonnull, map large inode numbers to their small
+ representatives. If null, there are no large inode numbers in
+ this set. */
+ struct ino_map *ino_map;
+
+ /* Cache of the most recently allocated and otherwise-unused storage
+ for probing this table. */
+ struct di_ent *probe;
+};
+
+/* Hash a device-inode-set entry. */
+static size_t
+di_ent_hash (void const *x, size_t table_size)
+{
+ struct di_ent const *p = x;
+ dev_t dev = p->dev;
+
+ /* When DEV is wider than size_t, exclusive-OR the words of DEV into H.
+ This avoids loss of info, without applying % to the wider type,
+ which could be quite slow on some systems. */
+ size_t h = dev;
+ unsigned int i;
+ unsigned int n_words = sizeof dev / sizeof h + (sizeof dev % sizeof h != 0);
+ for (i = 1; i < n_words; i++)
+ h ^= dev >> CHAR_BIT * sizeof h * i;
+
+ return h % table_size;
+}
+
+/* Return true if two device-inode-set entries are the same. */
+static bool
+di_ent_compare (void const *x, void const *y)
+{
+ struct di_ent const *a = x;
+ struct di_ent const *b = y;
+ return a->dev == b->dev;
+}
+
+/* Free a device-inode-set entry. */
+static void
+di_ent_free (void *v)
+{
+ struct di_ent *a = v;
+ hash_free (a->ino_set);
+ free (a);
+}
+
+/* Create a set of device-inode pairs. Return NULL on allocation failure. */
+struct di_set *
+di_set_alloc (void)
+{
+ struct di_set *dis = malloc (sizeof *dis);
+ if (dis)
+ {
+ enum { INITIAL_DEV_MAP_SIZE = 11 };
+ dis->dev_map = hash_initialize (INITIAL_DEV_MAP_SIZE, NULL,
+ di_ent_hash, di_ent_compare,
+ di_ent_free);
+ if (! dis->dev_map)
+ {
+ free (dis);
+ return NULL;
+ }
+ dis->ino_map = NULL;
+ dis->probe = NULL;
+ }
+
+ return dis;
+}
+
+/* Free a set of device-inode pairs. */
+void
+di_set_free (struct di_set *dis)
+{
+ hash_free (dis->dev_map);
+ if (dis->ino_map)
+ ino_map_free (dis->ino_map);
+ free (dis->probe);
+ free (dis);
+}
+
+/* Hash an encoded inode number I. */
+static size_t
+di_ino_hash (void const *i, size_t table_size)
+{
+ return (hashint) i % table_size;
+}
+
+/* Using the DIS table, map a device to a hash table that represents
+ a set of inode numbers. Return NULL on error. */
+static struct hash_table *
+map_device (struct di_set *dis, dev_t dev)
+{
+ /* Find space for the probe, reusing the cache if available. */
+ struct di_ent *ent;
+ struct di_ent *probe = dis->probe;
+ if (probe)
+ {
+ /* If repeating a recent query, return the cached result. */
+ if (probe->dev == dev)
+ return probe->ino_set;
+ }
+ else
+ {
+ dis->probe = probe = malloc (sizeof *probe);
+ if (! probe)
+ return NULL;
+ }
+
+ /* Probe for the device. */
+ probe->dev = dev;
+ ent = hash_insert (dis->dev_map, probe);
+ if (! ent)
+ return NULL;
+
+ if (ent != probe)
+ {
+ /* Use the existing entry. */
+ probe->ino_set = ent->ino_set;
+ }
+ else
+ {
+ enum { INITIAL_INO_SET_SIZE = 1021 };
+
+ /* Prepare to allocate a new probe next time; this one is in use. */
+ dis->probe = NULL;
+
+ /* DEV is new; allocate an inode set for it. */
+ probe->ino_set = hash_initialize (INITIAL_INO_SET_SIZE, NULL,
+ di_ino_hash, NULL, NULL);
+ }
+
+ return probe->ino_set;
+}
+
+/* Using the DIS table, map an inode number to a mapped value.
+ Return INO_MAP_INSERT_FAILURE on error. */
+static hashint
+map_inode_number (struct di_set *dis, ino_t ino)
+{
+ if (0 < ino && ino < LARGE_INO_MIN)
+ return ino;
+
+ if (! dis->ino_map)
+ {
+ dis->ino_map = ino_map_alloc (LARGE_INO_MIN);
+ if (! dis->ino_map)
+ return INO_MAP_INSERT_FAILURE;
+ }
+
+ return ino_map_insert (dis->ino_map, ino);
+}
+
+/* Attempt to insert the DEV,INO pair into the set DIS.
+ If it matches a pair already in DIS, keep that pair and return 0.
+ Otherwise, if insertion is successful, return 1.
+ Upon any failure return -1. */
+int
+di_set_insert (struct di_set *dis, dev_t dev, ino_t ino)
+{
+ hashint i;
+
+ /* Map the device number to a set of inodes. */
+ struct hash_table *ino_set = map_device (dis, dev);
+ if (! ino_set)
+ return -1;
+
+ /* Map the inode number to a small representative I. */
+ i = map_inode_number (dis, ino);
+ if (i == INO_MAP_INSERT_FAILURE)
+ return -1;
+
+ /* Put I into the inode set. */
+ return hash_insert_if_absent (ino_set, (void const *) i, NULL);
+}
+
+/* Look up the DEV,INO pair in the set DIS.
+ If found, return 1; if not found, return 0.
+ Upon any failure return -1. */
+int
+di_set_lookup (struct di_set *dis, dev_t dev, ino_t ino)
+{
+ hashint i;
+
+ /* Map the device number to a set of inodes. */
+ struct hash_table *ino_set = map_device (dis, dev);
+ if (! ino_set)
+ return -1;
+
+ /* Map the inode number to a small representative I. */
+ i = map_inode_number (dis, ino);
+ if (i == INO_MAP_INSERT_FAILURE)
+ return -1;
+
+ /* Perform the look-up. */
+ return !!hash_lookup (ino_set, (void const *) i);
+}