summaryrefslogtreecommitdiffstats
path: root/src/include/lib
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:19:15 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:19:15 +0000
commit6eb9c5a5657d1fe77b55cc261450f3538d35a94d (patch)
tree657d8194422a5daccecfd42d654b8a245ef7b4c8 /src/include/lib
parentInitial commit. (diff)
downloadpostgresql-13-upstream.tar.xz
postgresql-13-upstream.zip
Adding upstream version 13.4.upstream/13.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/include/lib')
-rw-r--r--src/include/lib/binaryheap.h54
-rw-r--r--src/include/lib/bipartite_match.h46
-rw-r--r--src/include/lib/bloomfilter.h27
-rw-r--r--src/include/lib/dshash.h90
-rw-r--r--src/include/lib/hyperloglog.h68
-rw-r--r--src/include/lib/ilist.h727
-rw-r--r--src/include/lib/integerset.h24
-rw-r--r--src/include/lib/knapsack.h16
-rw-r--r--src/include/lib/pairingheap.h102
-rw-r--r--src/include/lib/qunique.h67
-rw-r--r--src/include/lib/rbtree.h79
-rw-r--r--src/include/lib/simplehash.h1059
-rw-r--r--src/include/lib/stringinfo.h161
13 files changed, 2520 insertions, 0 deletions
diff --git a/src/include/lib/binaryheap.h b/src/include/lib/binaryheap.h
new file mode 100644
index 0000000..8d5db56
--- /dev/null
+++ b/src/include/lib/binaryheap.h
@@ -0,0 +1,54 @@
+/*
+ * binaryheap.h
+ *
+ * A simple binary heap implementation
+ *
+ * Portions Copyright (c) 2012-2020, PostgreSQL Global Development Group
+ *
+ * src/include/lib/binaryheap.h
+ */
+
+#ifndef BINARYHEAP_H
+#define BINARYHEAP_H
+
+/*
+ * For a max-heap, the comparator must return <0 iff a < b, 0 iff a == b,
+ * and >0 iff a > b. For a min-heap, the conditions are reversed.
+ */
+typedef int (*binaryheap_comparator) (Datum a, Datum b, void *arg);
+
+/*
+ * binaryheap
+ *
+ * bh_size how many nodes are currently in "nodes"
+ * bh_space how many nodes can be stored in "nodes"
+ * bh_has_heap_property no unordered operations since last heap build
+ * bh_compare comparison function to define the heap property
+ * bh_arg user data for comparison function
+ * bh_nodes variable-length array of "space" nodes
+ */
+typedef struct binaryheap
+{
+ int bh_size;
+ int bh_space;
+ bool bh_has_heap_property; /* debugging cross-check */
+ binaryheap_comparator bh_compare;
+ void *bh_arg;
+ Datum bh_nodes[FLEXIBLE_ARRAY_MEMBER];
+} binaryheap;
+
+extern binaryheap *binaryheap_allocate(int capacity,
+ binaryheap_comparator compare,
+ void *arg);
+extern void binaryheap_reset(binaryheap *heap);
+extern void binaryheap_free(binaryheap *heap);
+extern void binaryheap_add_unordered(binaryheap *heap, Datum d);
+extern void binaryheap_build(binaryheap *heap);
+extern void binaryheap_add(binaryheap *heap, Datum d);
+extern Datum binaryheap_first(binaryheap *heap);
+extern Datum binaryheap_remove_first(binaryheap *heap);
+extern void binaryheap_replace_first(binaryheap *heap, Datum d);
+
+#define binaryheap_empty(h) ((h)->bh_size == 0)
+
+#endif /* BINARYHEAP_H */
diff --git a/src/include/lib/bipartite_match.h b/src/include/lib/bipartite_match.h
new file mode 100644
index 0000000..a4c3e7d
--- /dev/null
+++ b/src/include/lib/bipartite_match.h
@@ -0,0 +1,46 @@
+/*
+ * bipartite_match.h
+ *
+ * Copyright (c) 2015-2020, PostgreSQL Global Development Group
+ *
+ * src/include/lib/bipartite_match.h
+ */
+#ifndef BIPARTITE_MATCH_H
+#define BIPARTITE_MATCH_H
+
+/*
+ * Given a bipartite graph consisting of nodes U numbered 1..nU, nodes V
+ * numbered 1..nV, and an adjacency map of undirected edges in the form
+ * adjacency[u] = [k, v1, v2, v3, ... vk], we wish to find a "maximum
+ * cardinality matching", which is defined as follows: a matching is a subset
+ * of the original edges such that no node has more than one edge, and a
+ * matching has maximum cardinality if there exists no other matching with a
+ * greater number of edges.
+ *
+ * This matching has various applications in graph theory, but the motivating
+ * example here is Dilworth's theorem: a partially-ordered set can be divided
+ * into the minimum number of chains (i.e. subsets X where x1 < x2 < x3 ...) by
+ * a bipartite graph construction. This gives us a polynomial-time solution to
+ * the problem of planning a collection of grouping sets with the provably
+ * minimal number of sort operations.
+ */
+typedef struct BipartiteMatchState
+{
+ /* inputs: */
+ int u_size; /* size of U */
+ int v_size; /* size of V */
+ short **adjacency; /* adjacency[u] = [k, v1,v2,v3,...,vk] */
+ /* outputs: */
+ int matching; /* number of edges in matching */
+ short *pair_uv; /* pair_uv[u] -> v */
+ short *pair_vu; /* pair_vu[v] -> u */
+ /* private state for matching algorithm: */
+ short *distance; /* distance[u] */
+ short *queue; /* queue storage for breadth search */
+} BipartiteMatchState;
+
+extern BipartiteMatchState *BipartiteMatch(int u_size, int v_size, short **adjacency);
+
+extern void BipartiteMatchFree(BipartiteMatchState *state);
+
+#endif /* BIPARTITE_MATCH_H */
diff --git a/src/include/lib/bloomfilter.h b/src/include/lib/bloomfilter.h
new file mode 100644
index 0000000..52343ba
--- /dev/null
+++ b/src/include/lib/bloomfilter.h
@@ -0,0 +1,27 @@
+/*-------------------------------------------------------------------------
+ *
+ * bloomfilter.h
+ * Space-efficient set membership testing
+ *
+ * Copyright (c) 2018-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/include/lib/bloomfilter.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef BLOOMFILTER_H
+#define BLOOMFILTER_H
+
+typedef struct bloom_filter bloom_filter;
+
+extern bloom_filter *bloom_create(int64 total_elems, int bloom_work_mem,
+ uint64 seed);
+extern void bloom_free(bloom_filter *filter);
+extern void bloom_add_element(bloom_filter *filter, unsigned char *elem,
+ size_t len);
+extern bool bloom_lacks_element(bloom_filter *filter, unsigned char *elem,
+ size_t len);
+extern double bloom_prop_bits_set(bloom_filter *filter);
+
+#endif /* BLOOMFILTER_H */
diff --git a/src/include/lib/dshash.h b/src/include/lib/dshash.h
new file mode 100644
index 0000000..b86df68
--- /dev/null
+++ b/src/include/lib/dshash.h
@@ -0,0 +1,90 @@
+/*-------------------------------------------------------------------------
+ *
+ * dshash.h
+ * Concurrent hash tables backed by dynamic shared memory areas.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/include/lib/dshash.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef DSHASH_H
+#define DSHASH_H
+
+#include "utils/dsa.h"
+
+/* The opaque type representing a hash table. */
+struct dshash_table;
+typedef struct dshash_table dshash_table;
+
+/* A handle for a dshash_table which can be shared with other processes. */
+typedef dsa_pointer dshash_table_handle;
+
+/* The type for hash values. */
+typedef uint32 dshash_hash;
+
+/* A function type for comparing keys. */
+typedef int (*dshash_compare_function) (const void *a, const void *b,
+ size_t size, void *arg);
+
+/* A function type for computing hash values for keys. */
+typedef dshash_hash (*dshash_hash_function) (const void *v, size_t size,
+ void *arg);
+
+/*
+ * The set of parameters needed to create or attach to a hash table. The
+ * members tranche_id and tranche_name do not need to be initialized when
+ * attaching to an existing hash table.
+ *
+ * Compare and hash functions must be supplied even when attaching, because we
+ * can't safely share function pointers between backends in general. Either
+ * the arg variants or the non-arg variants should be supplied; the other
+ * function pointers should be NULL. If the arg variants are supplied then the
+ * user data pointer supplied to the create and attach functions will be
+ * passed to the hash and compare functions.
+ */
+typedef struct dshash_parameters
+{
+ size_t key_size; /* Size of the key (initial bytes of entry) */
+ size_t entry_size; /* Total size of entry */
+ dshash_compare_function compare_function; /* Compare function */
+ dshash_hash_function hash_function; /* Hash function */
+ int tranche_id; /* The tranche ID to use for locks */
+} dshash_parameters;
+
+/* Forward declaration of private types for use only by dshash.c. */
+struct dshash_table_item;
+typedef struct dshash_table_item dshash_table_item;
+
+/* Creating, sharing and destroying from hash tables. */
+extern dshash_table *dshash_create(dsa_area *area,
+ const dshash_parameters *params,
+ void *arg);
+extern dshash_table *dshash_attach(dsa_area *area,
+ const dshash_parameters *params,
+ dshash_table_handle handle,
+ void *arg);
+extern void dshash_detach(dshash_table *hash_table);
+extern dshash_table_handle dshash_get_hash_table_handle(dshash_table *hash_table);
+extern void dshash_destroy(dshash_table *hash_table);
+
+/* Finding, creating, deleting entries. */
+extern void *dshash_find(dshash_table *hash_table,
+ const void *key, bool exclusive);
+extern void *dshash_find_or_insert(dshash_table *hash_table,
+ const void *key, bool *found);
+extern bool dshash_delete_key(dshash_table *hash_table, const void *key);
+extern void dshash_delete_entry(dshash_table *hash_table, void *entry);
+extern void dshash_release_lock(dshash_table *hash_table, void *entry);
+
+/* Convenience hash and compare functions wrapping memcmp and tag_hash. */
+extern int dshash_memcmp(const void *a, const void *b, size_t size, void *arg);
+extern dshash_hash dshash_memhash(const void *v, size_t size, void *arg);
+
+/* Debugging support. */
+extern void dshash_dump(dshash_table *hash_table);
+
+#endif /* DSHASH_H */
diff --git a/src/include/lib/hyperloglog.h b/src/include/lib/hyperloglog.h
new file mode 100644
index 0000000..dea4138
--- /dev/null
+++ b/src/include/lib/hyperloglog.h
@@ -0,0 +1,68 @@
+/*
+ * hyperloglog.h
+ *
+ * A simple HyperLogLog cardinality estimator implementation
+ *
+ * Portions Copyright (c) 2014-2020, PostgreSQL Global Development Group
+ *
+ * Based on Hideaki Ohno's C++ implementation. The copyright terms of Ohno's
+ * original version (the MIT license) follow.
+ *
+ * src/include/lib/hyperloglog.h
+ */
+
+/*
+ * Copyright (c) 2013 Hideaki Ohno <hide.o.j55{at}gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the 'Software'), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef HYPERLOGLOG_H
+#define HYPERLOGLOG_H
+
+/*
+ * HyperLogLog is an approximate technique for computing the number of distinct
+ * entries in a set. Importantly, it does this by using a fixed amount of
+ * memory. See the 2007 paper "HyperLogLog: the analysis of a near-optimal
+ * cardinality estimation algorithm" for more.
+ *
+ * hyperLogLogState
+ *
+ * registerWidth register width, in bits ("k")
+ * nRegisters number of registers
+ * alphaMM alpha * m ^ 2 (see initHyperLogLog())
+ * hashesArr array of hashes
+ * arrSize size of hashesArr
+ */
+typedef struct hyperLogLogState
+{
+ uint8 registerWidth;
+ Size nRegisters;
+ double alphaMM;
+ uint8 *hashesArr;
+ Size arrSize;
+} hyperLogLogState;
+
+extern void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth);
+extern void initHyperLogLogError(hyperLogLogState *cState, double error);
+extern void addHyperLogLog(hyperLogLogState *cState, uint32 hash);
+extern double estimateHyperLogLog(hyperLogLogState *cState);
+extern void freeHyperLogLog(hyperLogLogState *cState);
+
+#endif /* HYPERLOGLOG_H */
diff --git a/src/include/lib/ilist.h b/src/include/lib/ilist.h
new file mode 100644
index 0000000..98db885
--- /dev/null
+++ b/src/include/lib/ilist.h
@@ -0,0 +1,727 @@
+/*-------------------------------------------------------------------------
+ *
+ * ilist.h
+ * integrated/inline doubly- and singly-linked lists
+ *
+ * These list types are useful when there are only a predetermined set of
+ * lists that an object could be in. List links are embedded directly into
+ * the objects, and thus no extra memory management overhead is required.
+ * (Of course, if only a small proportion of existing objects are in a list,
+ * the link fields in the remainder would be wasted space. But usually,
+ * it saves space to not have separately-allocated list nodes.)
+ *
+ * None of the functions here allocate any memory; they just manipulate
+ * externally managed memory. The APIs for singly and doubly linked lists
+ * are identical as far as capabilities of both allow.
+ *
+ * Each list has a list header, which exists even when the list is empty.
+ * An empty singly-linked list has a NULL pointer in its header.
+ * There are two kinds of empty doubly linked lists: those that have been
+ * initialized to NULL, and those that have been initialized to circularity.
+ * (If a dlist is modified and then all its elements are deleted, it will be
+ * in the circular state.) We prefer circular dlists because there are some
+ * operations that can be done without branches (and thus faster) on lists
+ * that use circular representation. However, it is often convenient to
+ * initialize list headers to zeroes rather than setting them up with an
+ * explicit initialization function, so we also allow the other case.
+ *
+ * EXAMPLES
+ *
+ * Here's a simple example demonstrating how this can be used. Let's assume
+ * we want to store information about the tables contained in a database.
+ *
+ * #include "lib/ilist.h"
+ *
+ * // Define struct for the databases including a list header that will be
+ * // used to access the nodes in the table list later on.
+ * typedef struct my_database
+ * {
+ * char *datname;
+ * dlist_head tables;
+ * // ...
+ * } my_database;
+ *
+ * // Define struct for the tables. Note the list_node element which stores
+ * // prev/next list links. The list_node element need not be first.
+ * typedef struct my_table
+ * {
+ * char *tablename;
+ * dlist_node list_node;
+ * perm_t permissions;
+ * // ...
+ * } my_table;
+ *
+ * // create a database
+ * my_database *db = create_database();
+ *
+ * // and add a few tables to its table list
+ * dlist_push_head(&db->tables, &create_table(db, "a")->list_node);
+ * ...
+ * dlist_push_head(&db->tables, &create_table(db, "b")->list_node);
+ *
+ *
+ * To iterate over the table list, we allocate an iterator variable and use
+ * a specialized looping construct. Inside a dlist_foreach, the iterator's
+ * 'cur' field can be used to access the current element. iter.cur points to
+ * a 'dlist_node', but most of the time what we want is the actual table
+ * information; dlist_container() gives us that, like so:
+ *
+ * dlist_iter iter;
+ * dlist_foreach(iter, &db->tables)
+ * {
+ * my_table *tbl = dlist_container(my_table, list_node, iter.cur);
+ * printf("we have a table: %s in database %s\n",
+ * tbl->tablename, db->datname);
+ * }
+ *
+ *
+ * While a simple iteration is useful, we sometimes also want to manipulate
+ * the list while iterating. There is a different iterator element and looping
+ * construct for that. Suppose we want to delete tables that meet a certain
+ * criterion:
+ *
+ * dlist_mutable_iter miter;
+ * dlist_foreach_modify(miter, &db->tables)
+ * {
+ * my_table *tbl = dlist_container(my_table, list_node, miter.cur);
+ *
+ * if (!tbl->to_be_deleted)
+ * continue; // don't touch this one
+ *
+ * // unlink the current table from the linked list
+ * dlist_delete(miter.cur);
+ * // as these lists never manage memory, we can still access the table
+ * // after it's been unlinked
+ * drop_table(db, tbl);
+ * }
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/include/lib/ilist.h
+ *-------------------------------------------------------------------------
+ */
+#ifndef ILIST_H
+#define ILIST_H
+
+/*
+ * Enable for extra debugging. This is rather expensive, so it's not enabled by
+ * default even when USE_ASSERT_CHECKING.
+ */
+/* #define ILIST_DEBUG */
+
+/*
+ * Node of a doubly linked list.
+ *
+ * Embed this in structs that need to be part of a doubly linked list.
+ */
+typedef struct dlist_node dlist_node;
+struct dlist_node
+{
+ dlist_node *prev;
+ dlist_node *next;
+};
+
+/*
+ * Head of a doubly linked list.
+ *
+ * Non-empty lists are internally circularly linked. Circular lists have the
+ * advantage of not needing any branches in the most common list manipulations.
+ * An empty list can also be represented as a pair of NULL pointers, making
+ * initialization easier.
+ */
+typedef struct dlist_head
+{
+ /*
+ * head.next either points to the first element of the list; to &head if
+ * it's a circular empty list; or to NULL if empty and not circular.
+ *
+ * head.prev either points to the last element of the list; to &head if
+ * it's a circular empty list; or to NULL if empty and not circular.
+ */
+ dlist_node head;
+} dlist_head;
+
+
+/*
+ * Doubly linked list iterator.
+ *
+ * Used as state in dlist_foreach() and dlist_reverse_foreach(). To get the
+ * current element of the iteration use the 'cur' member.
+ *
+ * Iterations using this are *not* allowed to change the list while iterating!
+ *
+ * NB: We use an extra "end" field here to avoid multiple evaluations of
+ * arguments in the dlist_foreach() macro.
+ */
+typedef struct dlist_iter
+{
+ dlist_node *cur; /* current element */
+ dlist_node *end; /* last node we'll iterate to */
+} dlist_iter;
+
+/*
+ * Doubly linked list iterator allowing some modifications while iterating.
+ *
+ * Used as state in dlist_foreach_modify(). To get the current element of the
+ * iteration use the 'cur' member.
+ *
+ * Iterations using this are only allowed to change the list at the current
+ * point of iteration. It is fine to delete the current node, but it is *not*
+ * fine to insert or delete adjacent nodes.
+ *
+ * NB: We need a separate type for mutable iterations so that we can store
+ * the 'next' node of the current node in case it gets deleted or modified.
+ */
+typedef struct dlist_mutable_iter
+{
+ dlist_node *cur; /* current element */
+ dlist_node *next; /* next node we'll iterate to */
+ dlist_node *end; /* last node we'll iterate to */
+} dlist_mutable_iter;
+
+/*
+ * Node of a singly linked list.
+ *
+ * Embed this in structs that need to be part of a singly linked list.
+ */
+typedef struct slist_node slist_node;
+struct slist_node
+{
+ slist_node *next;
+};
+
+/*
+ * Head of a singly linked list.
+ *
+ * Singly linked lists are not circularly linked, in contrast to doubly linked
+ * lists; we just set head.next to NULL if empty. This doesn't incur any
+ * additional branches in the usual manipulations.
+ */
+typedef struct slist_head
+{
+ slist_node head;
+} slist_head;
+
+/*
+ * Singly linked list iterator.
+ *
+ * Used as state in slist_foreach(). To get the current element of the
+ * iteration use the 'cur' member.
+ *
+ * It's allowed to modify the list while iterating, with the exception of
+ * deleting the iterator's current node; deletion of that node requires
+ * care if the iteration is to be continued afterward. (Doing so and also
+ * deleting or inserting adjacent list elements might misbehave; also, if
+ * the user frees the current node's storage, continuing the iteration is
+ * not safe.)
+ *
+ * NB: this wouldn't really need to be an extra struct, we could use an
+ * slist_node * directly. We prefer a separate type for consistency.
+ */
+typedef struct slist_iter
+{
+ slist_node *cur;
+} slist_iter;
+
+/*
+ * Singly linked list iterator allowing some modifications while iterating.
+ *
+ * Used as state in slist_foreach_modify(). To get the current element of the
+ * iteration use the 'cur' member.
+ *
+ * The only list modification allowed while iterating is to remove the current
+ * node via slist_delete_current() (*not* slist_delete()). Insertion or
+ * deletion of nodes adjacent to the current node would misbehave.
+ */
+typedef struct slist_mutable_iter
+{
+ slist_node *cur; /* current element */
+ slist_node *next; /* next node we'll iterate to */
+ slist_node *prev; /* prev node, for deletions */
+} slist_mutable_iter;
+
+
+/* Static initializers */
+#define DLIST_STATIC_INIT(name) {{&(name).head, &(name).head}}
+#define SLIST_STATIC_INIT(name) {{NULL}}
+
+
+/* Prototypes for functions too big to be inline */
+
+/* Caution: this is O(n); consider using slist_delete_current() instead */
+extern void slist_delete(slist_head *head, slist_node *node);
+
+#ifdef ILIST_DEBUG
+extern void dlist_check(dlist_head *head);
+extern void slist_check(slist_head *head);
+#else
+/*
+ * These seemingly useless casts to void are here to keep the compiler quiet
+ * about the argument being unused in many functions in a non-debug compile,
+ * in which functions the only point of passing the list head pointer is to be
+ * able to run these checks.
+ */
+#define dlist_check(head) ((void) (head))
+#define slist_check(head) ((void) (head))
+#endif /* ILIST_DEBUG */
+
+/* doubly linked list implementation */
+
+/*
+ * Initialize a doubly linked list.
+ * Previous state will be thrown away without any cleanup.
+ */
+static inline void
+dlist_init(dlist_head *head)
+{
+ head->head.next = head->head.prev = &head->head;
+}
+
+/*
+ * Is the list empty?
+ *
+ * An empty list has either its first 'next' pointer set to NULL, or to itself.
+ */
+static inline bool
+dlist_is_empty(dlist_head *head)
+{
+ dlist_check(head);
+
+ return head->head.next == NULL || head->head.next == &(head->head);
+}
+
+/*
+ * Insert a node at the beginning of the list.
+ */
+static inline void
+dlist_push_head(dlist_head *head, dlist_node *node)
+{
+ if (head->head.next == NULL) /* convert NULL header to circular */
+ dlist_init(head);
+
+ node->next = head->head.next;
+ node->prev = &head->head;
+ node->next->prev = node;
+ head->head.next = node;
+
+ dlist_check(head);
+}
+
+/*
+ * Insert a node at the end of the list.
+ */
+static inline void
+dlist_push_tail(dlist_head *head, dlist_node *node)
+{
+ if (head->head.next == NULL) /* convert NULL header to circular */
+ dlist_init(head);
+
+ node->next = &head->head;
+ node->prev = head->head.prev;
+ node->prev->next = node;
+ head->head.prev = node;
+
+ dlist_check(head);
+}
+
+/*
+ * Insert a node after another *in the same list*
+ */
+static inline void
+dlist_insert_after(dlist_node *after, dlist_node *node)
+{
+ node->prev = after;
+ node->next = after->next;
+ after->next = node;
+ node->next->prev = node;
+}
+
+/*
+ * Insert a node before another *in the same list*
+ */
+static inline void
+dlist_insert_before(dlist_node *before, dlist_node *node)
+{
+ node->prev = before->prev;
+ node->next = before;
+ before->prev = node;
+ node->prev->next = node;
+}
+
+/*
+ * Delete 'node' from its list (it must be in one).
+ */
+static inline void
+dlist_delete(dlist_node *node)
+{
+ node->prev->next = node->next;
+ node->next->prev = node->prev;
+}
+
+/*
+ * Remove and return the first node from a list (there must be one).
+ */
+static inline dlist_node *
+dlist_pop_head_node(dlist_head *head)
+{
+ dlist_node *node;
+
+ Assert(!dlist_is_empty(head));
+ node = head->head.next;
+ dlist_delete(node);
+ return node;
+}
+
+/*
+ * Move element from its current position in the list to the head position in
+ * the same list.
+ *
+ * Undefined behaviour if 'node' is not already part of the list.
+ */
+static inline void
+dlist_move_head(dlist_head *head, dlist_node *node)
+{
+ /* fast path if it's already at the head */
+ if (head->head.next == node)
+ return;
+
+ dlist_delete(node);
+ dlist_push_head(head, node);
+
+ dlist_check(head);
+}
+
+/*
+ * Check whether 'node' has a following node.
+ * Caution: unreliable if 'node' is not in the list.
+ */
+static inline bool
+dlist_has_next(dlist_head *head, dlist_node *node)
+{
+ return node->next != &head->head;
+}
+
+/*
+ * Check whether 'node' has a preceding node.
+ * Caution: unreliable if 'node' is not in the list.
+ */
+static inline bool
+dlist_has_prev(dlist_head *head, dlist_node *node)
+{
+ return node->prev != &head->head;
+}
+
+/*
+ * Return the next node in the list (there must be one).
+ */
+static inline dlist_node *
+dlist_next_node(dlist_head *head, dlist_node *node)
+{
+ Assert(dlist_has_next(head, node));
+ return node->next;
+}
+
+/*
+ * Return previous node in the list (there must be one).
+ */
+static inline dlist_node *
+dlist_prev_node(dlist_head *head, dlist_node *node)
+{
+ Assert(dlist_has_prev(head, node));
+ return node->prev;
+}
+
+/* internal support function to get address of head element's struct */
+static inline void *
+dlist_head_element_off(dlist_head *head, size_t off)
+{
+ Assert(!dlist_is_empty(head));
+ return (char *) head->head.next - off;
+}
+
+/*
+ * Return the first node in the list (there must be one).
+ */
+static inline dlist_node *
+dlist_head_node(dlist_head *head)
+{
+ return (dlist_node *) dlist_head_element_off(head, 0);
+}
+
+/* internal support function to get address of tail element's struct */
+static inline void *
+dlist_tail_element_off(dlist_head *head, size_t off)
+{
+ Assert(!dlist_is_empty(head));
+ return (char *) head->head.prev - off;
+}
+
+/*
+ * Return the last node in the list (there must be one).
+ */
+static inline dlist_node *
+dlist_tail_node(dlist_head *head)
+{
+ return (dlist_node *) dlist_tail_element_off(head, 0);
+}
+
+/*
+ * Return the containing struct of 'type' where 'membername' is the dlist_node
+ * pointed at by 'ptr'.
+ *
+ * This is used to convert a dlist_node * back to its containing struct.
+ */
+#define dlist_container(type, membername, ptr) \
+ (AssertVariableIsOfTypeMacro(ptr, dlist_node *), \
+ AssertVariableIsOfTypeMacro(((type *) NULL)->membername, dlist_node), \
+ ((type *) ((char *) (ptr) - offsetof(type, membername))))
+
+/*
+ * Return the address of the first element in the list.
+ *
+ * The list must not be empty.
+ */
+#define dlist_head_element(type, membername, lhead) \
+ (AssertVariableIsOfTypeMacro(((type *) NULL)->membername, dlist_node), \
+ (type *) dlist_head_element_off(lhead, offsetof(type, membername)))
+
+/*
+ * Return the address of the last element in the list.
+ *
+ * The list must not be empty.
+ */
+#define dlist_tail_element(type, membername, lhead) \
+ (AssertVariableIsOfTypeMacro(((type *) NULL)->membername, dlist_node), \
+ ((type *) dlist_tail_element_off(lhead, offsetof(type, membername))))
+
+/*
+ * Iterate through the list pointed at by 'lhead' storing the state in 'iter'.
+ *
+ * Access the current element with iter.cur.
+ *
+ * It is *not* allowed to manipulate the list during iteration.
+ */
+#define dlist_foreach(iter, lhead) \
+ for (AssertVariableIsOfTypeMacro(iter, dlist_iter), \
+ AssertVariableIsOfTypeMacro(lhead, dlist_head *), \
+ (iter).end = &(lhead)->head, \
+ (iter).cur = (iter).end->next ? (iter).end->next : (iter).end; \
+ (iter).cur != (iter).end; \
+ (iter).cur = (iter).cur->next)
+
+/*
+ * Iterate through the list pointed at by 'lhead' storing the state in 'iter'.
+ *
+ * Access the current element with iter.cur.
+ *
+ * Iterations using this are only allowed to change the list at the current
+ * point of iteration. It is fine to delete the current node, but it is *not*
+ * fine to insert or delete adjacent nodes.
+ */
+#define dlist_foreach_modify(iter, lhead) \
+ for (AssertVariableIsOfTypeMacro(iter, dlist_mutable_iter), \
+ AssertVariableIsOfTypeMacro(lhead, dlist_head *), \
+ (iter).end = &(lhead)->head, \
+ (iter).cur = (iter).end->next ? (iter).end->next : (iter).end, \
+ (iter).next = (iter).cur->next; \
+ (iter).cur != (iter).end; \
+ (iter).cur = (iter).next, (iter).next = (iter).cur->next)
+
+/*
+ * Iterate through the list in reverse order.
+ *
+ * It is *not* allowed to manipulate the list during iteration.
+ */
+#define dlist_reverse_foreach(iter, lhead) \
+ for (AssertVariableIsOfTypeMacro(iter, dlist_iter), \
+ AssertVariableIsOfTypeMacro(lhead, dlist_head *), \
+ (iter).end = &(lhead)->head, \
+ (iter).cur = (iter).end->prev ? (iter).end->prev : (iter).end; \
+ (iter).cur != (iter).end; \
+ (iter).cur = (iter).cur->prev)
+
+
+/* singly linked list implementation */
+
+/*
+ * Initialize a singly linked list.
+ * Previous state will be thrown away without any cleanup.
+ */
+static inline void
+slist_init(slist_head *head)
+{
+ head->head.next = NULL;
+}
+
+/*
+ * Is the list empty?
+ */
+static inline bool
+slist_is_empty(slist_head *head)
+{
+ slist_check(head);
+
+ return head->head.next == NULL;
+}
+
+/*
+ * Insert a node at the beginning of the list.
+ */
+static inline void
+slist_push_head(slist_head *head, slist_node *node)
+{
+ node->next = head->head.next;
+ head->head.next = node;
+
+ slist_check(head);
+}
+
+/*
+ * Insert a node after another *in the same list*
+ */
+static inline void
+slist_insert_after(slist_node *after, slist_node *node)
+{
+ node->next = after->next;
+ after->next = node;
+}
+
+/*
+ * Remove and return the first node from a list (there must be one).
+ */
+static inline slist_node *
+slist_pop_head_node(slist_head *head)
+{
+ slist_node *node;
+
+ Assert(!slist_is_empty(head));
+ node = head->head.next;
+ head->head.next = node->next;
+ slist_check(head);
+ return node;
+}
+
+/*
+ * Check whether 'node' has a following node.
+ */
+static inline bool
+slist_has_next(slist_head *head, slist_node *node)
+{
+ slist_check(head);
+
+ return node->next != NULL;
+}
+
+/*
+ * Return the next node in the list (there must be one).
+ */
+static inline slist_node *
+slist_next_node(slist_head *head, slist_node *node)
+{
+ Assert(slist_has_next(head, node));
+ return node->next;
+}
+
+/* internal support function to get address of head element's struct */
+static inline void *
+slist_head_element_off(slist_head *head, size_t off)
+{
+ Assert(!slist_is_empty(head));
+ return (char *) head->head.next - off;
+}
+
+/*
+ * Return the first node in the list (there must be one).
+ */
+static inline slist_node *
+slist_head_node(slist_head *head)
+{
+ return (slist_node *) slist_head_element_off(head, 0);
+}
+
+/*
+ * Delete the list element the iterator currently points to.
+ *
+ * Caution: this modifies iter->cur, so don't use that again in the current
+ * loop iteration.
+ */
+static inline void
+slist_delete_current(slist_mutable_iter *iter)
+{
+ /*
+ * Update previous element's forward link. If the iteration is at the
+ * first list element, iter->prev will point to the list header's "head"
+ * field, so we don't need a special case for that.
+ */
+ iter->prev->next = iter->next;
+
+ /*
+ * Reset cur to prev, so that prev will continue to point to the prior
+ * valid list element after slist_foreach_modify() advances to the next.
+ */
+ iter->cur = iter->prev;
+}
+
+/*
+ * Return the containing struct of 'type' where 'membername' is the slist_node
+ * pointed at by 'ptr'.
+ *
+ * This is used to convert a slist_node * back to its containing struct.
+ */
+#define slist_container(type, membername, ptr) \
+ (AssertVariableIsOfTypeMacro(ptr, slist_node *), \
+ AssertVariableIsOfTypeMacro(((type *) NULL)->membername, slist_node), \
+ ((type *) ((char *) (ptr) - offsetof(type, membername))))
+
+/*
+ * Return the address of the first element in the list.
+ *
+ * The list must not be empty.
+ */
+#define slist_head_element(type, membername, lhead) \
+ (AssertVariableIsOfTypeMacro(((type *) NULL)->membername, slist_node), \
+ (type *) slist_head_element_off(lhead, offsetof(type, membername)))
+
+/*
+ * Iterate through the list pointed at by 'lhead' storing the state in 'iter'.
+ *
+ * Access the current element with iter.cur.
+ *
+ * It's allowed to modify the list while iterating, with the exception of
+ * deleting the iterator's current node; deletion of that node requires
+ * care if the iteration is to be continued afterward. (Doing so and also
+ * deleting or inserting adjacent list elements might misbehave; also, if
+ * the user frees the current node's storage, continuing the iteration is
+ * not safe.)
+ */
+#define slist_foreach(iter, lhead) \
+ for (AssertVariableIsOfTypeMacro(iter, slist_iter), \
+ AssertVariableIsOfTypeMacro(lhead, slist_head *), \
+ (iter).cur = (lhead)->head.next; \
+ (iter).cur != NULL; \
+ (iter).cur = (iter).cur->next)
+
+/*
+ * Iterate through the list pointed at by 'lhead' storing the state in 'iter'.
+ *
+ * Access the current element with iter.cur.
+ *
+ * The only list modification allowed while iterating is to remove the current
+ * node via slist_delete_current() (*not* slist_delete()). Insertion or
+ * deletion of nodes adjacent to the current node would misbehave.
+ */
+#define slist_foreach_modify(iter, lhead) \
+ for (AssertVariableIsOfTypeMacro(iter, slist_mutable_iter), \
+ AssertVariableIsOfTypeMacro(lhead, slist_head *), \
+ (iter).prev = &(lhead)->head, \
+ (iter).cur = (iter).prev->next, \
+ (iter).next = (iter).cur ? (iter).cur->next : NULL; \
+ (iter).cur != NULL; \
+ (iter).prev = (iter).cur, \
+ (iter).cur = (iter).next, \
+ (iter).next = (iter).next ? (iter).next->next : NULL)
+
+#endif /* ILIST_H */
diff --git a/src/include/lib/integerset.h b/src/include/lib/integerset.h
new file mode 100644
index 0000000..67bf5c5
--- /dev/null
+++ b/src/include/lib/integerset.h
@@ -0,0 +1,24 @@
+/*
+ * integerset.h
+ * In-memory data structure to hold a large set of integers efficiently
+ *
+ * Portions Copyright (c) 2012-2020, PostgreSQL Global Development Group
+ *
+ * src/include/lib/integerset.h
+ */
+#ifndef INTEGERSET_H
+#define INTEGERSET_H
+
+typedef struct IntegerSet IntegerSet;
+
+extern IntegerSet *intset_create(void);
+extern void intset_add_member(IntegerSet *intset, uint64 x);
+extern bool intset_is_member(IntegerSet *intset, uint64 x);
+
+extern uint64 intset_num_entries(IntegerSet *intset);
+extern uint64 intset_memory_usage(IntegerSet *intset);
+
+extern void intset_begin_iterate(IntegerSet *intset);
+extern bool intset_iterate_next(IntegerSet *intset, uint64 *next);
+
+#endif /* INTEGERSET_H */
diff --git a/src/include/lib/knapsack.h b/src/include/lib/knapsack.h
new file mode 100644
index 0000000..2d90dc4
--- /dev/null
+++ b/src/include/lib/knapsack.h
@@ -0,0 +1,16 @@
+/*
+ * knapsack.h
+ *
+ * Copyright (c) 2017-2020, PostgreSQL Global Development Group
+ *
+ * src/include/lib/knapsack.h
+ */
+#ifndef KNAPSACK_H
+#define KNAPSACK_H
+
+#include "nodes/bitmapset.h"
+
+extern Bitmapset *DiscreteKnapsack(int max_weight, int num_items,
+ int *item_weights, double *item_values);
+
+#endif /* KNAPSACK_H */
diff --git a/src/include/lib/pairingheap.h b/src/include/lib/pairingheap.h
new file mode 100644
index 0000000..7c5c0f7
--- /dev/null
+++ b/src/include/lib/pairingheap.h
@@ -0,0 +1,102 @@
+/*
+ * pairingheap.h
+ *
+ * A Pairing Heap implementation
+ *
+ * Portions Copyright (c) 2012-2020, PostgreSQL Global Development Group
+ *
+ * src/include/lib/pairingheap.h
+ */
+
+#ifndef PAIRINGHEAP_H
+#define PAIRINGHEAP_H
+
+#include "lib/stringinfo.h"
+
+/* Enable if you need the pairingheap_dump() debug function */
+/* #define PAIRINGHEAP_DEBUG */
+
+/*
+ * This represents an element stored in the heap. Embed this in a larger
+ * struct containing the actual data you're storing.
+ *
+ * A node can have multiple children, which form a double-linked list.
+ * first_child points to the node's first child, and the subsequent children
+ * can be found by following the next_sibling pointers. The last child has
+ * next_sibling == NULL. The prev_or_parent pointer points to the node's
+ * previous sibling, or if the node is its parent's first child, to the
+ * parent.
+ */
+typedef struct pairingheap_node
+{
+ struct pairingheap_node *first_child;
+ struct pairingheap_node *next_sibling;
+ struct pairingheap_node *prev_or_parent;
+} pairingheap_node;
+
+/*
+ * Return the containing struct of 'type' where 'membername' is the
+ * pairingheap_node pointed at by 'ptr'.
+ *
+ * This is used to convert a pairingheap_node * back to its containing struct.
+ */
+#define pairingheap_container(type, membername, ptr) \
+ (AssertVariableIsOfTypeMacro(ptr, pairingheap_node *), \
+ AssertVariableIsOfTypeMacro(((type *) NULL)->membername, pairingheap_node), \
+ ((type *) ((char *) (ptr) - offsetof(type, membername))))
+
+/*
+ * Like pairingheap_container, but used when the pointer is 'const ptr'
+ */
+#define pairingheap_const_container(type, membername, ptr) \
+ (AssertVariableIsOfTypeMacro(ptr, const pairingheap_node *), \
+ AssertVariableIsOfTypeMacro(((type *) NULL)->membername, pairingheap_node), \
+ ((const type *) ((const char *) (ptr) - offsetof(type, membername))))
+
+/*
+ * For a max-heap, the comparator must return <0 iff a < b, 0 iff a == b,
+ * and >0 iff a > b. For a min-heap, the conditions are reversed.
+ */
+typedef int (*pairingheap_comparator) (const pairingheap_node *a,
+ const pairingheap_node *b,
+ void *arg);
+
+/*
+ * A pairing heap.
+ *
+ * You can use pairingheap_allocate() to create a new palloc'd heap, or embed
+ * this in a larger struct, set ph_compare and ph_arg directly and initialize
+ * ph_root to NULL.
+ */
+typedef struct pairingheap
+{
+ pairingheap_comparator ph_compare; /* comparison function */
+ void *ph_arg; /* opaque argument to ph_compare */
+ pairingheap_node *ph_root; /* current root of the heap */
+} pairingheap;
+
+extern pairingheap *pairingheap_allocate(pairingheap_comparator compare,
+ void *arg);
+extern void pairingheap_free(pairingheap *heap);
+extern void pairingheap_add(pairingheap *heap, pairingheap_node *node);
+extern pairingheap_node *pairingheap_first(pairingheap *heap);
+extern pairingheap_node *pairingheap_remove_first(pairingheap *heap);
+extern void pairingheap_remove(pairingheap *heap, pairingheap_node *node);
+
+#ifdef PAIRINGHEAP_DEBUG
+extern char *pairingheap_dump(pairingheap *heap,
+ void (*dumpfunc) (pairingheap_node *node, StringInfo buf, void *opaque),
+ void *opaque);
+#endif
+
+/* Resets the heap to be empty. */
+#define pairingheap_reset(h) ((h)->ph_root = NULL)
+
+/* Is the heap empty? */
+#define pairingheap_is_empty(h) ((h)->ph_root == NULL)
+
+/* Is there exactly one node in the heap? */
+#define pairingheap_is_singular(h) \
+ ((h)->ph_root && (h)->ph_root->first_child == NULL)
+
+#endif /* PAIRINGHEAP_H */
diff --git a/src/include/lib/qunique.h b/src/include/lib/qunique.h
new file mode 100644
index 0000000..95d9fef
--- /dev/null
+++ b/src/include/lib/qunique.h
@@ -0,0 +1,67 @@
+/*-------------------------------------------------------------------------
+ *
+ * qunique.h
+ * inline array unique functions
+ * Portions Copyright (c) 2019-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/include/lib/qunique.h
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef QUNIQUE_H
+#define QUNIQUE_H
+
+/*
+ * Remove duplicates from a pre-sorted array, according to a user-supplied
+ * comparator. Usually the array should have been sorted with qsort() using
+ * the same arguments. Return the new size.
+ */
+static inline size_t
+qunique(void *array, size_t elements, size_t width,
+ int (*compare) (const void *, const void *))
+{
+ char *bytes = (char *) array;
+ size_t i,
+ j;
+
+ if (elements <= 1)
+ return elements;
+
+ for (i = 1, j = 0; i < elements; ++i)
+ {
+ if (compare(bytes + i * width, bytes + j * width) != 0 &&
+ ++j != i)
+ memcpy(bytes + j * width, bytes + i * width, width);
+ }
+
+ return j + 1;
+}
+
+/*
+ * Like qunique(), but takes a comparator with an extra user data argument
+ * which is passed through, for compatibility with qsort_arg().
+ */
+static inline size_t
+qunique_arg(void *array, size_t elements, size_t width,
+ int (*compare) (const void *, const void *, void *),
+ void *arg)
+{
+ char *bytes = (char *) array;
+ size_t i,
+ j;
+
+ if (elements <= 1)
+ return elements;
+
+ for (i = 1, j = 0; i < elements; ++i)
+ {
+ if (compare(bytes + i * width, bytes + j * width, arg) != 0 &&
+ ++j != i)
+ memcpy(bytes + j * width, bytes + i * width, width);
+ }
+
+ return j + 1;
+}
+
+#endif /* QUNIQUE_H */
diff --git a/src/include/lib/rbtree.h b/src/include/lib/rbtree.h
new file mode 100644
index 0000000..cd2c2fd
--- /dev/null
+++ b/src/include/lib/rbtree.h
@@ -0,0 +1,79 @@
+/*-------------------------------------------------------------------------
+ *
+ * rbtree.h
+ * interface for PostgreSQL generic Red-Black binary tree package
+ *
+ * Copyright (c) 2009-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/include/lib/rbtree.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef RBTREE_H
+#define RBTREE_H
+
+/*
+ * RBTNode is intended to be used as the first field of a larger struct,
+ * whose additional fields carry whatever payload data the caller needs
+ * for a tree entry. (The total size of that larger struct is passed to
+ * rbt_create.) RBTNode is declared here to support this usage, but
+ * callers must treat it as an opaque struct.
+ */
+typedef struct RBTNode
+{
+ char color; /* node's current color, red or black */
+ struct RBTNode *left; /* left child, or RBTNIL if none */
+ struct RBTNode *right; /* right child, or RBTNIL if none */
+ struct RBTNode *parent; /* parent, or NULL (not RBTNIL!) if none */
+} RBTNode;
+
+/* Opaque struct representing a whole tree */
+typedef struct RBTree RBTree;
+
+/* Available tree iteration orderings */
+typedef enum RBTOrderControl
+{
+ LeftRightWalk, /* inorder: left child, node, right child */
+ RightLeftWalk /* reverse inorder: right, node, left */
+} RBTOrderControl;
+
+/*
+ * RBTreeIterator holds state while traversing a tree. This is declared
+ * here so that callers can stack-allocate this, but must otherwise be
+ * treated as an opaque struct.
+ */
+typedef struct RBTreeIterator RBTreeIterator;
+
+struct RBTreeIterator
+{
+ RBTree *rbt;
+ RBTNode *(*iterate) (RBTreeIterator *iter);
+ RBTNode *last_visited;
+ bool is_over;
+};
+
+/* Support functions to be provided by caller */
+typedef int (*rbt_comparator) (const RBTNode *a, const RBTNode *b, void *arg);
+typedef void (*rbt_combiner) (RBTNode *existing, const RBTNode *newdata, void *arg);
+typedef RBTNode *(*rbt_allocfunc) (void *arg);
+typedef void (*rbt_freefunc) (RBTNode *x, void *arg);
+
+extern RBTree *rbt_create(Size node_size,
+ rbt_comparator comparator,
+ rbt_combiner combiner,
+ rbt_allocfunc allocfunc,
+ rbt_freefunc freefunc,
+ void *arg);
+
+extern RBTNode *rbt_find(RBTree *rbt, const RBTNode *data);
+extern RBTNode *rbt_leftmost(RBTree *rbt);
+
+extern RBTNode *rbt_insert(RBTree *rbt, const RBTNode *data, bool *isNew);
+extern void rbt_delete(RBTree *rbt, RBTNode *node);
+
+extern void rbt_begin_iterate(RBTree *rbt, RBTOrderControl ctrl,
+ RBTreeIterator *iter);
+extern RBTNode *rbt_iterate(RBTreeIterator *iter);
+
+#endif /* RBTREE_H */
diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h
new file mode 100644
index 0000000..90dfa8a
--- /dev/null
+++ b/src/include/lib/simplehash.h
@@ -0,0 +1,1059 @@
+/*
+ * simplehash.h
+ *
+ * Hash table implementation which will be specialized to user-defined
+ * types, by including this file to generate the required code. It's
+ * probably not worthwhile to do so for hash tables that aren't performance
+ * or space sensitive.
+ *
+ * Usage notes:
+ *
+ * To generate a hash-table and associated functions for a use case several
+ * macros have to be #define'ed before this file is included. Including
+ * the file #undef's all those, so a new hash table can be generated
+ * afterwards.
+ * The relevant parameters are:
+ * - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
+ * will result in hash table type 'foo_hash' and functions like
+ * 'foo_insert'/'foo_lookup' and so forth.
+ * - SH_ELEMENT_TYPE - type of the contained elements
+ * - SH_KEY_TYPE - type of the hashtable's key
+ * - SH_DECLARE - if defined function prototypes and type declarations are
+ * generated
+ * - SH_DEFINE - if defined function definitions are generated
+ * - SH_SCOPE - in which scope (e.g. extern, static inline) do function
+ * declarations reside
+ * - SH_RAW_ALLOCATOR - if defined, memory contexts are not used; instead,
+ * use this to allocate bytes
+ * - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
+ * are defined, so you can supply your own
+ * The following parameters are only relevant when SH_DEFINE is defined:
+ * - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
+ * - SH_EQUAL(table, a, b) - compare two table keys
+ * - SH_HASH_KEY(table, key) - generate hash for the key
+ * - SH_STORE_HASH - if defined the hash is stored in the elements
+ * - SH_GET_HASH(tb, a) - return the field to store the hash in
+ *
+ * For examples of usage look at tidbitmap.c (file local definition) and
+ * execnodes.h/execGrouping.c (exposed declaration, file local
+ * implementation).
+ *
+ * Hash table design:
+ *
+ * The hash table design chosen is a variant of linear open-addressing. The
+ * reason for doing so is that linear addressing is CPU cache & pipeline
+ * friendly. The biggest disadvantage of simple linear addressing schemes
+ * are highly variable lookup times due to clustering, and deletions
+ * leaving a lot of tombstones around. To address these issues a variant
+ * of "robin hood" hashing is employed. Robin hood hashing optimizes
+ * chaining lengths by moving elements close to their optimal bucket
+ * ("rich" elements), out of the way if a to-be-inserted element is further
+ * away from its optimal position (i.e. it's "poor"). While that can make
+ * insertions slower, the average lookup performance is a lot better, and
+ * higher fill factors can be used in a still performant manner. To avoid
+ * tombstones - which normally solve the issue that a deleted node's
+ * presence is relevant to determine whether a lookup needs to continue
+ * looking or is done - buckets following a deleted element are shifted
+ * backwards, unless they're empty or already at their optimal position.
+ */
+
+#include "port/pg_bitutils.h"
+
+/* helpers */
+#define SH_MAKE_PREFIX(a) CppConcat(a,_)
+#define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
+#define SH_MAKE_NAME_(a,b) CppConcat(a,b)
+
+/* name macros for: */
+
+/* type declarations */
+#define SH_TYPE SH_MAKE_NAME(hash)
+#define SH_STATUS SH_MAKE_NAME(status)
+#define SH_STATUS_EMPTY SH_MAKE_NAME(SH_EMPTY)
+#define SH_STATUS_IN_USE SH_MAKE_NAME(SH_IN_USE)
+#define SH_ITERATOR SH_MAKE_NAME(iterator)
+
+/* function declarations */
+#define SH_CREATE SH_MAKE_NAME(create)
+#define SH_DESTROY SH_MAKE_NAME(destroy)
+#define SH_RESET SH_MAKE_NAME(reset)
+#define SH_INSERT SH_MAKE_NAME(insert)
+#define SH_INSERT_HASH SH_MAKE_NAME(insert_hash)
+#define SH_DELETE SH_MAKE_NAME(delete)
+#define SH_LOOKUP SH_MAKE_NAME(lookup)
+#define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash)
+#define SH_GROW SH_MAKE_NAME(grow)
+#define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
+#define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
+#define SH_ITERATE SH_MAKE_NAME(iterate)
+#define SH_ALLOCATE SH_MAKE_NAME(allocate)
+#define SH_FREE SH_MAKE_NAME(free)
+#define SH_STAT SH_MAKE_NAME(stat)
+
+/* internal helper functions (no externally visible prototypes) */
+#define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters)
+#define SH_NEXT SH_MAKE_NAME(next)
+#define SH_PREV SH_MAKE_NAME(prev)
+#define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
+#define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
+#define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
+#define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal)
+#define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal)
+
+/* generate forward declarations necessary to use the hash table */
+#ifdef SH_DECLARE
+
+/* type definitions */
+typedef struct SH_TYPE
+{
+ /*
+ * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
+ * tables. Note that the maximum number of elements is lower
+ * (SH_MAX_FILLFACTOR)
+ */
+ uint64 size;
+
+ /* how many elements have valid contents */
+ uint32 members;
+
+ /* mask for bucket and size calculations, based on size */
+ uint32 sizemask;
+
+ /* boundary after which to grow hashtable */
+ uint32 grow_threshold;
+
+ /* hash buckets */
+ SH_ELEMENT_TYPE *data;
+
+#ifndef SH_RAW_ALLOCATOR
+ /* memory context to use for allocations */
+ MemoryContext ctx;
+#endif
+
+ /* user defined data, useful for callbacks */
+ void *private_data;
+} SH_TYPE;
+
+typedef enum SH_STATUS
+{
+ SH_STATUS_EMPTY = 0x00,
+ SH_STATUS_IN_USE = 0x01
+} SH_STATUS;
+
+typedef struct SH_ITERATOR
+{
+ uint32 cur; /* current element */
+ uint32 end;
+ bool done; /* iterator exhausted? */
+} SH_ITERATOR;
+
+/* externally visible function prototypes */
+#ifdef SH_RAW_ALLOCATOR
+SH_SCOPE SH_TYPE *SH_CREATE(uint32 nelements, void *private_data);
+#else
+SH_SCOPE SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
+ void *private_data);
+#endif
+SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
+SH_SCOPE void SH_RESET(SH_TYPE * tb);
+SH_SCOPE void SH_GROW(SH_TYPE * tb, uint32 newsize);
+SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
+SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
+ uint32 hash, bool *found);
+SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
+SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
+ uint32 hash);
+SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
+SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
+SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
+SH_SCOPE SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
+SH_SCOPE void SH_STAT(SH_TYPE * tb);
+
+#endif /* SH_DECLARE */
+
+
+/* generate implementation of the hash table */
+#ifdef SH_DEFINE
+
+#ifndef SH_RAW_ALLOCATOR
+#include "utils/memutils.h"
+#endif
+
+/* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
+#define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
+
+/* normal fillfactor, unless already close to maximum */
+#ifndef SH_FILLFACTOR
+#define SH_FILLFACTOR (0.9)
+#endif
+/* increase fillfactor if we otherwise would error out */
+#define SH_MAX_FILLFACTOR (0.98)
+/* grow if actual and optimal location bigger than */
+#ifndef SH_GROW_MAX_DIB
+#define SH_GROW_MAX_DIB 25
+#endif
+/* grow if more than elements to move when inserting */
+#ifndef SH_GROW_MAX_MOVE
+#define SH_GROW_MAX_MOVE 150
+#endif
+#ifndef SH_GROW_MIN_FILLFACTOR
+/* but do not grow due to SH_GROW_MAX_* if below */
+#define SH_GROW_MIN_FILLFACTOR 0.1
+#endif
+
+#ifdef SH_STORE_HASH
+#define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
+#else
+#define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
+#endif
+
+/*
+ * Wrap the following definitions in include guards, to avoid multiple
+ * definition errors if this header is included more than once. The rest of
+ * the file deliberately has no include guards, because it can be included
+ * with different parameters to define functions and types with non-colliding
+ * names.
+ */
+#ifndef SIMPLEHASH_H
+#define SIMPLEHASH_H
+
+#ifdef FRONTEND
+#define sh_error(...) pg_log_error(__VA_ARGS__)
+#define sh_log(...) pg_log_info(__VA_ARGS__)
+#else
+#define sh_error(...) elog(ERROR, __VA_ARGS__)
+#define sh_log(...) elog(LOG, __VA_ARGS__)
+#endif
+
+#endif
+
+/*
+ * Compute sizing parameters for hashtable. Called when creating and growing
+ * the hashtable.
+ */
+static inline void
+SH_COMPUTE_PARAMETERS(SH_TYPE * tb, uint32 newsize)
+{
+ uint64 size;
+
+ /* supporting zero sized hashes would complicate matters */
+ size = Max(newsize, 2);
+
+ /* round up size to the next power of 2, that's how bucketing works */
+ size = pg_nextpower2_64(size);
+ Assert(size <= SH_MAX_SIZE);
+
+ /*
+ * Verify that allocation of ->data is possible on this platform, without
+ * overflowing Size.
+ */
+ if ((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= SIZE_MAX / 2)
+ sh_error("hash table too large");
+
+ /* now set size */
+ tb->size = size;
+
+ if (tb->size == SH_MAX_SIZE)
+ tb->sizemask = 0;
+ else
+ tb->sizemask = tb->size - 1;
+
+ /*
+ * Compute the next threshold at which we need to grow the hash table
+ * again.
+ */
+ if (tb->size == SH_MAX_SIZE)
+ tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
+ else
+ tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
+}
+
+/* return the optimal bucket for the hash */
+static inline uint32
+SH_INITIAL_BUCKET(SH_TYPE * tb, uint32 hash)
+{
+ return hash & tb->sizemask;
+}
+
+/* return next bucket after the current, handling wraparound */
+static inline uint32
+SH_NEXT(SH_TYPE * tb, uint32 curelem, uint32 startelem)
+{
+ curelem = (curelem + 1) & tb->sizemask;
+
+ Assert(curelem != startelem);
+
+ return curelem;
+}
+
+/* return bucket before the current, handling wraparound */
+static inline uint32
+SH_PREV(SH_TYPE * tb, uint32 curelem, uint32 startelem)
+{
+ curelem = (curelem - 1) & tb->sizemask;
+
+ Assert(curelem != startelem);
+
+ return curelem;
+}
+
+/* return distance between bucket and its optimal position */
+static inline uint32
+SH_DISTANCE_FROM_OPTIMAL(SH_TYPE * tb, uint32 optimal, uint32 bucket)
+{
+ if (optimal <= bucket)
+ return bucket - optimal;
+ else
+ return (tb->size + bucket) - optimal;
+}
+
+static inline uint32
+SH_ENTRY_HASH(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
+{
+#ifdef SH_STORE_HASH
+ return SH_GET_HASH(tb, entry);
+#else
+ return SH_HASH_KEY(tb, entry->SH_KEY);
+#endif
+}
+
+/* default memory allocator function */
+static inline void *SH_ALLOCATE(SH_TYPE * type, Size size);
+static inline void SH_FREE(SH_TYPE * type, void *pointer);
+
+#ifndef SH_USE_NONDEFAULT_ALLOCATOR
+
+/* default memory allocator function */
+static inline void *
+SH_ALLOCATE(SH_TYPE * type, Size size)
+{
+#ifdef SH_RAW_ALLOCATOR
+ return SH_RAW_ALLOCATOR(size);
+#else
+ return MemoryContextAllocExtended(type->ctx, size,
+ MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
+#endif
+}
+
+/* default memory free function */
+static inline void
+SH_FREE(SH_TYPE * type, void *pointer)
+{
+ pfree(pointer);
+}
+
+#endif
+
+/*
+ * Create a hash table with enough space for `nelements` distinct members.
+ * Memory for the hash table is allocated from the passed-in context. If
+ * desired, the array of elements can be allocated using a passed-in allocator;
+ * this could be useful in order to place the array of elements in a shared
+ * memory, or in a context that will outlive the rest of the hash table.
+ * Memory other than for the array of elements will still be allocated from
+ * the passed-in context.
+ */
+#ifdef SH_RAW_ALLOCATOR
+SH_SCOPE SH_TYPE *
+SH_CREATE(uint32 nelements, void *private_data)
+#else
+SH_SCOPE SH_TYPE *
+SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
+#endif
+{
+ SH_TYPE *tb;
+ uint64 size;
+
+#ifdef SH_RAW_ALLOCATOR
+ tb = SH_RAW_ALLOCATOR(sizeof(SH_TYPE));
+#else
+ tb = MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
+ tb->ctx = ctx;
+#endif
+ tb->private_data = private_data;
+
+ /* increase nelements by fillfactor, want to store nelements elements */
+ size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
+
+ SH_COMPUTE_PARAMETERS(tb, size);
+
+ tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
+
+ return tb;
+}
+
+/* destroy a previously created hash table */
+SH_SCOPE void
+SH_DESTROY(SH_TYPE * tb)
+{
+ SH_FREE(tb, tb->data);
+ pfree(tb);
+}
+
+/* reset the contents of a previously created hash table */
+SH_SCOPE void
+SH_RESET(SH_TYPE * tb)
+{
+ memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
+ tb->members = 0;
+}
+
+/*
+ * Grow a hash table to at least `newsize` buckets.
+ *
+ * Usually this will automatically be called by insertions/deletions, when
+ * necessary. But resizing to the exact input size can be advantageous
+ * performance-wise, when known at some point.
+ */
+SH_SCOPE void
+SH_GROW(SH_TYPE * tb, uint32 newsize)
+{
+ uint64 oldsize = tb->size;
+ SH_ELEMENT_TYPE *olddata = tb->data;
+ SH_ELEMENT_TYPE *newdata;
+ uint32 i;
+ uint32 startelem = 0;
+ uint32 copyelem;
+
+ Assert(oldsize == pg_nextpower2_64(oldsize));
+ Assert(oldsize != SH_MAX_SIZE);
+ Assert(oldsize < newsize);
+
+ /* compute parameters for new table */
+ SH_COMPUTE_PARAMETERS(tb, newsize);
+
+ tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
+
+ newdata = tb->data;
+
+ /*
+ * Copy entries from the old data to newdata. We theoretically could use
+ * SH_INSERT here, to avoid code duplication, but that's more general than
+ * we need. We neither want tb->members increased, nor do we need to do
+ * deal with deleted elements, nor do we need to compare keys. So a
+ * special-cased implementation is lot faster. As resizing can be time
+ * consuming and frequent, that's worthwhile to optimize.
+ *
+ * To be able to simply move entries over, we have to start not at the
+ * first bucket (i.e olddata[0]), but find the first bucket that's either
+ * empty, or is occupied by an entry at its optimal position. Such a
+ * bucket has to exist in any table with a load factor under 1, as not all
+ * buckets are occupied, i.e. there always has to be an empty bucket. By
+ * starting at such a bucket we can move the entries to the larger table,
+ * without having to deal with conflicts.
+ */
+
+ /* search for the first element in the hash that's not wrapped around */
+ for (i = 0; i < oldsize; i++)
+ {
+ SH_ELEMENT_TYPE *oldentry = &olddata[i];
+ uint32 hash;
+ uint32 optimal;
+
+ if (oldentry->status != SH_STATUS_IN_USE)
+ {
+ startelem = i;
+ break;
+ }
+
+ hash = SH_ENTRY_HASH(tb, oldentry);
+ optimal = SH_INITIAL_BUCKET(tb, hash);
+
+ if (optimal == i)
+ {
+ startelem = i;
+ break;
+ }
+ }
+
+ /* and copy all elements in the old table */
+ copyelem = startelem;
+ for (i = 0; i < oldsize; i++)
+ {
+ SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
+
+ if (oldentry->status == SH_STATUS_IN_USE)
+ {
+ uint32 hash;
+ uint32 startelem;
+ uint32 curelem;
+ SH_ELEMENT_TYPE *newentry;
+
+ hash = SH_ENTRY_HASH(tb, oldentry);
+ startelem = SH_INITIAL_BUCKET(tb, hash);
+ curelem = startelem;
+
+ /* find empty element to put data into */
+ while (true)
+ {
+ newentry = &newdata[curelem];
+
+ if (newentry->status == SH_STATUS_EMPTY)
+ {
+ break;
+ }
+
+ curelem = SH_NEXT(tb, curelem, startelem);
+ }
+
+ /* copy entry to new slot */
+ memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
+ }
+
+ /* can't use SH_NEXT here, would use new size */
+ copyelem++;
+ if (copyelem >= oldsize)
+ {
+ copyelem = 0;
+ }
+ }
+
+ SH_FREE(tb, olddata);
+}
+
+/*
+ * This is a separate static inline function, so it can be reliably be inlined
+ * into its wrapper functions even if SH_SCOPE is extern.
+ */
+static inline SH_ELEMENT_TYPE *
+SH_INSERT_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
+{
+ uint32 startelem;
+ uint32 curelem;
+ SH_ELEMENT_TYPE *data;
+ uint32 insertdist;
+
+restart:
+ insertdist = 0;
+
+ /*
+ * We do the grow check even if the key is actually present, to avoid
+ * doing the check inside the loop. This also lets us avoid having to
+ * re-find our position in the hashtable after resizing.
+ *
+ * Note that this also reached when resizing the table due to
+ * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
+ */
+ if (unlikely(tb->members >= tb->grow_threshold))
+ {
+ if (tb->size == SH_MAX_SIZE)
+ {
+ sh_error("hash table size exceeded");
+ }
+
+ /*
+ * When optimizing, it can be very useful to print these out.
+ */
+ /* SH_STAT(tb); */
+ SH_GROW(tb, tb->size * 2);
+ /* SH_STAT(tb); */
+ }
+
+ /* perform insert, start bucket search at optimal location */
+ data = tb->data;
+ startelem = SH_INITIAL_BUCKET(tb, hash);
+ curelem = startelem;
+ while (true)
+ {
+ uint32 curdist;
+ uint32 curhash;
+ uint32 curoptimal;
+ SH_ELEMENT_TYPE *entry = &data[curelem];
+
+ /* any empty bucket can directly be used */
+ if (entry->status == SH_STATUS_EMPTY)
+ {
+ tb->members++;
+ entry->SH_KEY = key;
+#ifdef SH_STORE_HASH
+ SH_GET_HASH(tb, entry) = hash;
+#endif
+ entry->status = SH_STATUS_IN_USE;
+ *found = false;
+ return entry;
+ }
+
+ /*
+ * If the bucket is not empty, we either found a match (in which case
+ * we're done), or we have to decide whether to skip over or move the
+ * colliding entry. When the colliding element's distance to its
+ * optimal position is smaller than the to-be-inserted entry's, we
+ * shift the colliding entry (and its followers) forward by one.
+ */
+
+ if (SH_COMPARE_KEYS(tb, hash, key, entry))
+ {
+ Assert(entry->status == SH_STATUS_IN_USE);
+ *found = true;
+ return entry;
+ }
+
+ curhash = SH_ENTRY_HASH(tb, entry);
+ curoptimal = SH_INITIAL_BUCKET(tb, curhash);
+ curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
+
+ if (insertdist > curdist)
+ {
+ SH_ELEMENT_TYPE *lastentry = entry;
+ uint32 emptyelem = curelem;
+ uint32 moveelem;
+ int32 emptydist = 0;
+
+ /* find next empty bucket */
+ while (true)
+ {
+ SH_ELEMENT_TYPE *emptyentry;
+
+ emptyelem = SH_NEXT(tb, emptyelem, startelem);
+ emptyentry = &data[emptyelem];
+
+ if (emptyentry->status == SH_STATUS_EMPTY)
+ {
+ lastentry = emptyentry;
+ break;
+ }
+
+ /*
+ * To avoid negative consequences from overly imbalanced
+ * hashtables, grow the hashtable if collisions would require
+ * us to move a lot of entries. The most likely cause of such
+ * imbalance is filling a (currently) small table, from a
+ * currently big one, in hash-table order. Don't grow if the
+ * hashtable would be too empty, to prevent quick space
+ * explosion for some weird edge cases.
+ */
+ if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
+ ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
+ {
+ tb->grow_threshold = 0;
+ goto restart;
+ }
+ }
+
+ /* shift forward, starting at last occupied element */
+
+ /*
+ * TODO: This could be optimized to be one memcpy in many cases,
+ * excepting wrapping around at the end of ->data. Hasn't shown up
+ * in profiles so far though.
+ */
+ moveelem = emptyelem;
+ while (moveelem != curelem)
+ {
+ SH_ELEMENT_TYPE *moveentry;
+
+ moveelem = SH_PREV(tb, moveelem, startelem);
+ moveentry = &data[moveelem];
+
+ memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
+ lastentry = moveentry;
+ }
+
+ /* and fill the now empty spot */
+ tb->members++;
+
+ entry->SH_KEY = key;
+#ifdef SH_STORE_HASH
+ SH_GET_HASH(tb, entry) = hash;
+#endif
+ entry->status = SH_STATUS_IN_USE;
+ *found = false;
+ return entry;
+ }
+
+ curelem = SH_NEXT(tb, curelem, startelem);
+ insertdist++;
+
+ /*
+ * To avoid negative consequences from overly imbalanced hashtables,
+ * grow the hashtable if collisions lead to large runs. The most
+ * likely cause of such imbalance is filling a (currently) small
+ * table, from a currently big one, in hash-table order. Don't grow
+ * if the hashtable would be too empty, to prevent quick space
+ * explosion for some weird edge cases.
+ */
+ if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
+ ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
+ {
+ tb->grow_threshold = 0;
+ goto restart;
+ }
+ }
+}
+
+/*
+ * Insert the key key into the hash-table, set *found to true if the key
+ * already exists, false otherwise. Returns the hash-table entry in either
+ * case.
+ */
+SH_SCOPE SH_ELEMENT_TYPE *
+SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
+{
+ uint32 hash = SH_HASH_KEY(tb, key);
+
+ return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
+}
+
+/*
+ * Insert the key key into the hash-table using an already-calculated
+ * hash. Set *found to true if the key already exists, false
+ * otherwise. Returns the hash-table entry in either case.
+ */
+SH_SCOPE SH_ELEMENT_TYPE *
+SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
+{
+ return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
+}
+
+/*
+ * This is a separate static inline function, so it can be reliably be inlined
+ * into its wrapper functions even if SH_SCOPE is extern.
+ */
+static inline SH_ELEMENT_TYPE *
+SH_LOOKUP_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
+{
+ const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
+ uint32 curelem = startelem;
+
+ while (true)
+ {
+ SH_ELEMENT_TYPE *entry = &tb->data[curelem];
+
+ if (entry->status == SH_STATUS_EMPTY)
+ {
+ return NULL;
+ }
+
+ Assert(entry->status == SH_STATUS_IN_USE);
+
+ if (SH_COMPARE_KEYS(tb, hash, key, entry))
+ return entry;
+
+ /*
+ * TODO: we could stop search based on distance. If the current
+ * buckets's distance-from-optimal is smaller than what we've skipped
+ * already, the entry doesn't exist. Probably only do so if
+ * SH_STORE_HASH is defined, to avoid re-computing hashes?
+ */
+
+ curelem = SH_NEXT(tb, curelem, startelem);
+ }
+}
+
+/*
+ * Lookup up entry in hash table. Returns NULL if key not present.
+ */
+SH_SCOPE SH_ELEMENT_TYPE *
+SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key)
+{
+ uint32 hash = SH_HASH_KEY(tb, key);
+
+ return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
+}
+
+/*
+ * Lookup up entry in hash table using an already-calculated hash.
+ *
+ * Returns NULL if key not present.
+ */
+SH_SCOPE SH_ELEMENT_TYPE *
+SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
+{
+ return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
+}
+
+/*
+ * Delete entry from hash table. Returns whether to-be-deleted key was
+ * present.
+ */
+SH_SCOPE bool
+SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
+{
+ uint32 hash = SH_HASH_KEY(tb, key);
+ uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
+ uint32 curelem = startelem;
+
+ while (true)
+ {
+ SH_ELEMENT_TYPE *entry = &tb->data[curelem];
+
+ if (entry->status == SH_STATUS_EMPTY)
+ return false;
+
+ if (entry->status == SH_STATUS_IN_USE &&
+ SH_COMPARE_KEYS(tb, hash, key, entry))
+ {
+ SH_ELEMENT_TYPE *lastentry = entry;
+
+ tb->members--;
+
+ /*
+ * Backward shift following elements till either an empty element
+ * or an element at its optimal position is encountered.
+ *
+ * While that sounds expensive, the average chain length is short,
+ * and deletions would otherwise require tombstones.
+ */
+ while (true)
+ {
+ SH_ELEMENT_TYPE *curentry;
+ uint32 curhash;
+ uint32 curoptimal;
+
+ curelem = SH_NEXT(tb, curelem, startelem);
+ curentry = &tb->data[curelem];
+
+ if (curentry->status != SH_STATUS_IN_USE)
+ {
+ lastentry->status = SH_STATUS_EMPTY;
+ break;
+ }
+
+ curhash = SH_ENTRY_HASH(tb, curentry);
+ curoptimal = SH_INITIAL_BUCKET(tb, curhash);
+
+ /* current is at optimal position, done */
+ if (curoptimal == curelem)
+ {
+ lastentry->status = SH_STATUS_EMPTY;
+ break;
+ }
+
+ /* shift */
+ memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
+
+ lastentry = curentry;
+ }
+
+ return true;
+ }
+
+ /* TODO: return false; if distance too big */
+
+ curelem = SH_NEXT(tb, curelem, startelem);
+ }
+}
+
+/*
+ * Initialize iterator.
+ */
+SH_SCOPE void
+SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
+{
+ int i;
+ uint64 startelem = PG_UINT64_MAX;
+
+ /*
+ * Search for the first empty element. As deletions during iterations are
+ * supported, we want to start/end at an element that cannot be affected
+ * by elements being shifted.
+ */
+ for (i = 0; i < tb->size; i++)
+ {
+ SH_ELEMENT_TYPE *entry = &tb->data[i];
+
+ if (entry->status != SH_STATUS_IN_USE)
+ {
+ startelem = i;
+ break;
+ }
+ }
+
+ Assert(startelem < SH_MAX_SIZE);
+
+ /*
+ * Iterate backwards, that allows the current element to be deleted, even
+ * if there are backward shifts
+ */
+ iter->cur = startelem;
+ iter->end = iter->cur;
+ iter->done = false;
+}
+
+/*
+ * Initialize iterator to a specific bucket. That's really only useful for
+ * cases where callers are partially iterating over the hashspace, and that
+ * iteration deletes and inserts elements based on visited entries. Doing that
+ * repeatedly could lead to an unbalanced keyspace when always starting at the
+ * same position.
+ */
+SH_SCOPE void
+SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at)
+{
+ /*
+ * Iterate backwards, that allows the current element to be deleted, even
+ * if there are backward shifts.
+ */
+ iter->cur = at & tb->sizemask; /* ensure at is within a valid range */
+ iter->end = iter->cur;
+ iter->done = false;
+}
+
+/*
+ * Iterate over all entries in the hash-table. Return the next occupied entry,
+ * or NULL if done.
+ *
+ * During iteration the current entry in the hash table may be deleted,
+ * without leading to elements being skipped or returned twice. Additionally
+ * the rest of the table may be modified (i.e. there can be insertions or
+ * deletions), but if so, there's neither a guarantee that all nodes are
+ * visited at least once, nor a guarantee that a node is visited at most once.
+ */
+SH_SCOPE SH_ELEMENT_TYPE *
+SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
+{
+ while (!iter->done)
+ {
+ SH_ELEMENT_TYPE *elem;
+
+ elem = &tb->data[iter->cur];
+
+ /* next element in backward direction */
+ iter->cur = (iter->cur - 1) & tb->sizemask;
+
+ if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
+ iter->done = true;
+ if (elem->status == SH_STATUS_IN_USE)
+ {
+ return elem;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Report some statistics about the state of the hashtable. For
+ * debugging/profiling purposes only.
+ */
+SH_SCOPE void
+SH_STAT(SH_TYPE * tb)
+{
+ uint32 max_chain_length = 0;
+ uint32 total_chain_length = 0;
+ double avg_chain_length;
+ double fillfactor;
+ uint32 i;
+
+ uint32 *collisions = palloc0(tb->size * sizeof(uint32));
+ uint32 total_collisions = 0;
+ uint32 max_collisions = 0;
+ double avg_collisions;
+
+ for (i = 0; i < tb->size; i++)
+ {
+ uint32 hash;
+ uint32 optimal;
+ uint32 dist;
+ SH_ELEMENT_TYPE *elem;
+
+ elem = &tb->data[i];
+
+ if (elem->status != SH_STATUS_IN_USE)
+ continue;
+
+ hash = SH_ENTRY_HASH(tb, elem);
+ optimal = SH_INITIAL_BUCKET(tb, hash);
+ dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
+
+ if (dist > max_chain_length)
+ max_chain_length = dist;
+ total_chain_length += dist;
+
+ collisions[optimal]++;
+ }
+
+ for (i = 0; i < tb->size; i++)
+ {
+ uint32 curcoll = collisions[i];
+
+ if (curcoll == 0)
+ continue;
+
+ /* single contained element is not a collision */
+ curcoll--;
+ total_collisions += curcoll;
+ if (curcoll > max_collisions)
+ max_collisions = curcoll;
+ }
+
+ if (tb->members > 0)
+ {
+ fillfactor = tb->members / ((double) tb->size);
+ avg_chain_length = ((double) total_chain_length) / tb->members;
+ avg_collisions = ((double) total_collisions) / tb->members;
+ }
+ else
+ {
+ fillfactor = 0;
+ avg_chain_length = 0;
+ avg_collisions = 0;
+ }
+
+ sh_log("size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %i, avg_collisions: %f",
+ tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
+ total_collisions, max_collisions, avg_collisions);
+}
+
+#endif /* SH_DEFINE */
+
+
+/* undefine external parameters, so next hash table can be defined */
+#undef SH_PREFIX
+#undef SH_KEY_TYPE
+#undef SH_KEY
+#undef SH_ELEMENT_TYPE
+#undef SH_HASH_KEY
+#undef SH_SCOPE
+#undef SH_DECLARE
+#undef SH_DEFINE
+#undef SH_GET_HASH
+#undef SH_STORE_HASH
+#undef SH_USE_NONDEFAULT_ALLOCATOR
+#undef SH_EQUAL
+
+/* undefine locally declared macros */
+#undef SH_MAKE_PREFIX
+#undef SH_MAKE_NAME
+#undef SH_MAKE_NAME_
+#undef SH_FILLFACTOR
+#undef SH_MAX_FILLFACTOR
+#undef SH_GROW_MAX_DIB
+#undef SH_GROW_MAX_MOVE
+#undef SH_GROW_MIN_FILLFACTOR
+#undef SH_MAX_SIZE
+
+/* types */
+#undef SH_TYPE
+#undef SH_STATUS
+#undef SH_STATUS_EMPTY
+#undef SH_STATUS_IN_USE
+#undef SH_ITERATOR
+
+/* external function names */
+#undef SH_CREATE
+#undef SH_DESTROY
+#undef SH_RESET
+#undef SH_INSERT
+#undef SH_INSERT_HASH
+#undef SH_DELETE
+#undef SH_LOOKUP
+#undef SH_LOOKUP_HASH
+#undef SH_GROW
+#undef SH_START_ITERATE
+#undef SH_START_ITERATE_AT
+#undef SH_ITERATE
+#undef SH_ALLOCATE
+#undef SH_FREE
+#undef SH_STAT
+
+/* internal function names */
+#undef SH_COMPUTE_PARAMETERS
+#undef SH_COMPARE_KEYS
+#undef SH_INITIAL_BUCKET
+#undef SH_NEXT
+#undef SH_PREV
+#undef SH_DISTANCE_FROM_OPTIMAL
+#undef SH_ENTRY_HASH
+#undef SH_INSERT_HASH_INTERNAL
+#undef SH_LOOKUP_HASH_INTERNAL
diff --git a/src/include/lib/stringinfo.h b/src/include/lib/stringinfo.h
new file mode 100644
index 0000000..5a2a3db
--- /dev/null
+++ b/src/include/lib/stringinfo.h
@@ -0,0 +1,161 @@
+/*-------------------------------------------------------------------------
+ *
+ * stringinfo.h
+ * Declarations/definitions for "StringInfo" functions.
+ *
+ * StringInfo provides an extensible string data type (currently limited to a
+ * length of 1GB). It can be used to buffer either ordinary C strings
+ * (null-terminated text) or arbitrary binary data. All storage is allocated
+ * with palloc() (falling back to malloc in frontend code).
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/lib/stringinfo.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef STRINGINFO_H
+#define STRINGINFO_H
+
+/*-------------------------
+ * StringInfoData holds information about an extensible string.
+ * data is the current buffer for the string (allocated with palloc).
+ * len is the current string length. There is guaranteed to be
+ * a terminating '\0' at data[len], although this is not very
+ * useful when the string holds binary data rather than text.
+ * maxlen is the allocated size in bytes of 'data', i.e. the maximum
+ * string size (including the terminating '\0' char) that we can
+ * currently store in 'data' without having to reallocate
+ * more space. We must always have maxlen > len.
+ * cursor is initialized to zero by makeStringInfo or initStringInfo,
+ * but is not otherwise touched by the stringinfo.c routines.
+ * Some routines use it to scan through a StringInfo.
+ *-------------------------
+ */
+typedef struct StringInfoData
+{
+ char *data;
+ int len;
+ int maxlen;
+ int cursor;
+} StringInfoData;
+
+typedef StringInfoData *StringInfo;
+
+
+/*------------------------
+ * There are two ways to create a StringInfo object initially:
+ *
+ * StringInfo stringptr = makeStringInfo();
+ * Both the StringInfoData and the data buffer are palloc'd.
+ *
+ * StringInfoData string;
+ * initStringInfo(&string);
+ * The data buffer is palloc'd but the StringInfoData is just local.
+ * This is the easiest approach for a StringInfo object that will
+ * only live as long as the current routine.
+ *
+ * To destroy a StringInfo, pfree() the data buffer, and then pfree() the
+ * StringInfoData if it was palloc'd. There's no special support for this.
+ *
+ * NOTE: some routines build up a string using StringInfo, and then
+ * release the StringInfoData but return the data string itself to their
+ * caller. At that point the data string looks like a plain palloc'd
+ * string.
+ *-------------------------
+ */
+
+/*------------------------
+ * makeStringInfo
+ * Create an empty 'StringInfoData' & return a pointer to it.
+ */
+extern StringInfo makeStringInfo(void);
+
+/*------------------------
+ * initStringInfo
+ * Initialize a StringInfoData struct (with previously undefined contents)
+ * to describe an empty string.
+ */
+extern void initStringInfo(StringInfo str);
+
+/*------------------------
+ * resetStringInfo
+ * Clears the current content of the StringInfo, if any. The
+ * StringInfo remains valid.
+ */
+extern void resetStringInfo(StringInfo str);
+
+/*------------------------
+ * appendStringInfo
+ * Format text data under the control of fmt (an sprintf-style format string)
+ * and append it to whatever is already in str. More space is allocated
+ * to str if necessary. This is sort of like a combination of sprintf and
+ * strcat.
+ */
+extern void appendStringInfo(StringInfo str, const char *fmt,...) pg_attribute_printf(2, 3);
+
+/*------------------------
+ * appendStringInfoVA
+ * Attempt to format text data under the control of fmt (an sprintf-style
+ * format string) and append it to whatever is already in str. If successful
+ * return zero; if not (because there's not enough space), return an estimate
+ * of the space needed, without modifying str. Typically the caller should
+ * pass the return value to enlargeStringInfo() before trying again; see
+ * appendStringInfo for standard usage pattern.
+ */
+extern int appendStringInfoVA(StringInfo str, const char *fmt, va_list args) pg_attribute_printf(2, 0);
+
+/*------------------------
+ * appendStringInfoString
+ * Append a null-terminated string to str.
+ * Like appendStringInfo(str, "%s", s) but faster.
+ */
+extern void appendStringInfoString(StringInfo str, const char *s);
+
+/*------------------------
+ * appendStringInfoChar
+ * Append a single byte to str.
+ * Like appendStringInfo(str, "%c", ch) but much faster.
+ */
+extern void appendStringInfoChar(StringInfo str, char ch);
+
+/*------------------------
+ * appendStringInfoCharMacro
+ * As above, but a macro for even more speed where it matters.
+ * Caution: str argument will be evaluated multiple times.
+ */
+#define appendStringInfoCharMacro(str,ch) \
+ (((str)->len + 1 >= (str)->maxlen) ? \
+ appendStringInfoChar(str, ch) : \
+ (void)((str)->data[(str)->len] = (ch), (str)->data[++(str)->len] = '\0'))
+
+/*------------------------
+ * appendStringInfoSpaces
+ * Append a given number of spaces to str.
+ */
+extern void appendStringInfoSpaces(StringInfo str, int count);
+
+/*------------------------
+ * appendBinaryStringInfo
+ * Append arbitrary binary data to a StringInfo, allocating more space
+ * if necessary.
+ */
+extern void appendBinaryStringInfo(StringInfo str,
+ const char *data, int datalen);
+
+/*------------------------
+ * appendBinaryStringInfoNT
+ * Append arbitrary binary data to a StringInfo, allocating more space
+ * if necessary. Does not ensure a trailing null-byte exists.
+ */
+extern void appendBinaryStringInfoNT(StringInfo str,
+ const char *data, int datalen);
+
+/*------------------------
+ * enlargeStringInfo
+ * Make sure a StringInfo's buffer can hold at least 'needed' more bytes.
+ */
+extern void enlargeStringInfo(StringInfo str, int needed);
+
+#endif /* STRINGINFO_H */