diff options
Diffstat (limited to 'src/test/modules/test_integerset')
8 files changed, 707 insertions, 0 deletions
diff --git a/src/test/modules/test_integerset/.gitignore b/src/test/modules/test_integerset/.gitignore new file mode 100644 index 0000000..5dcb3ff --- /dev/null +++ b/src/test/modules/test_integerset/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/src/test/modules/test_integerset/Makefile b/src/test/modules/test_integerset/Makefile new file mode 100644 index 0000000..799c17c --- /dev/null +++ b/src/test/modules/test_integerset/Makefile @@ -0,0 +1,23 @@ +# src/test/modules/test_integerset/Makefile + +MODULE_big = test_integerset +OBJS = \ + $(WIN32RES) \ + test_integerset.o +PGFILEDESC = "test_integerset - test code for src/backend/lib/integerset.c" + +EXTENSION = test_integerset +DATA = test_integerset--1.0.sql + +REGRESS = test_integerset + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_integerset +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_integerset/README b/src/test/modules/test_integerset/README new file mode 100644 index 0000000..a8b2718 --- /dev/null +++ b/src/test/modules/test_integerset/README @@ -0,0 +1,7 @@ +test_integerset contains unit tests for testing the integer set implementation +in src/backend/lib/integerset.c. + +The tests verify the correctness of the implementation, but they can also be +used as a micro-benchmark. If you set the 'intset_test_stats' flag in +test_integerset.c, the tests will print extra information about execution time +and memory usage. diff --git a/src/test/modules/test_integerset/expected/test_integerset.out b/src/test/modules/test_integerset/expected/test_integerset.out new file mode 100644 index 0000000..822dd03 --- /dev/null +++ b/src/test/modules/test_integerset/expected/test_integerset.out @@ -0,0 +1,31 @@ +CREATE EXTENSION test_integerset; +-- +-- All the logic is in the test_integerset() function. It will throw +-- an error if something fails. +-- +SELECT test_integerset(); +NOTICE: testing intset with empty set +NOTICE: testing intset with distances > 2^60 between values +NOTICE: testing intset with single value 0 +NOTICE: testing intset with single value 1 +NOTICE: testing intset with single value 18446744073709551614 +NOTICE: testing intset with single value 18446744073709551615 +NOTICE: testing intset with value 0, and all between 1000 and 2000 +NOTICE: testing intset with value 1, and all between 1000 and 2000 +NOTICE: testing intset with value 1, and all between 1000 and 2000000 +NOTICE: testing intset with value 18446744073709551614, and all between 1000 and 2000 +NOTICE: testing intset with value 18446744073709551615, and all between 1000 and 2000 +NOTICE: testing intset with pattern "all ones" +NOTICE: testing intset with pattern "alternating bits" +NOTICE: testing intset with pattern "clusters of ten" +NOTICE: testing intset with pattern "clusters of hundred" +NOTICE: testing intset with pattern "one-every-64k" +NOTICE: testing intset with pattern "sparse" +NOTICE: testing intset with pattern "single values, distance > 2^32" +NOTICE: testing intset with pattern "clusters, distance > 2^32" +NOTICE: testing intset with pattern "clusters, distance > 2^60" + test_integerset +----------------- + +(1 row) + diff --git a/src/test/modules/test_integerset/sql/test_integerset.sql b/src/test/modules/test_integerset/sql/test_integerset.sql new file mode 100644 index 0000000..9d970dd --- /dev/null +++ b/src/test/modules/test_integerset/sql/test_integerset.sql @@ -0,0 +1,7 @@ +CREATE EXTENSION test_integerset; + +-- +-- All the logic is in the test_integerset() function. It will throw +-- an error if something fails. +-- +SELECT test_integerset(); diff --git a/src/test/modules/test_integerset/test_integerset--1.0.sql b/src/test/modules/test_integerset/test_integerset--1.0.sql new file mode 100644 index 0000000..d6d5a3f --- /dev/null +++ b/src/test/modules/test_integerset/test_integerset--1.0.sql @@ -0,0 +1,8 @@ +/* src/test/modules/test_integerset/test_integerset--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION test_integerset" to load this file. \quit + +CREATE FUNCTION test_integerset() +RETURNS pg_catalog.void STRICT +AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/src/test/modules/test_integerset/test_integerset.c b/src/test/modules/test_integerset/test_integerset.c new file mode 100644 index 0000000..21c6f49 --- /dev/null +++ b/src/test/modules/test_integerset/test_integerset.c @@ -0,0 +1,623 @@ +/*-------------------------------------------------------------------------- + * + * test_integerset.c + * Test integer set data structure. + * + * Copyright (c) 2019-2021, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/test/modules/test_integerset/test_integerset.c + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "fmgr.h" +#include "lib/integerset.h" +#include "miscadmin.h" +#include "nodes/bitmapset.h" +#include "storage/block.h" +#include "storage/itemptr.h" +#include "utils/memutils.h" +#include "utils/timestamp.h" + +/* + * If you enable this, the "pattern" tests will print information about + * how long populating, probing, and iterating the test set takes, and + * how much memory the test set consumed. That can be used as + * micro-benchmark of various operations and input patterns (you might + * want to increase the number of values used in each of the test, if + * you do that, to reduce noise). + * + * The information is printed to the server's stderr, mostly because + * that's where MemoryContextStats() output goes. + */ +static const bool intset_test_stats = false; + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(test_integerset); + +/* + * A struct to define a pattern of integers, for use with the test_pattern() + * function. + */ +typedef struct +{ + char *test_name; /* short name of the test, for humans */ + char *pattern_str; /* a bit pattern */ + uint64 spacing; /* pattern repeats at this interval */ + uint64 num_values; /* number of integers to set in total */ +} test_spec; + +static const test_spec test_specs[] = { + { + "all ones", "1111111111", + 10, 10000000 + }, + { + "alternating bits", "0101010101", + 10, 10000000 + }, + { + "clusters of ten", "1111111111", + 10000, 10000000 + }, + { + "clusters of hundred", + "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111", + 10000, 100000000 + }, + { + "one-every-64k", "1", + 65536, 10000000 + }, + { + "sparse", "100000000000000000000000000000001", + 10000000, 10000000 + }, + { + "single values, distance > 2^32", "1", + UINT64CONST(10000000000), 1000000 + }, + { + "clusters, distance > 2^32", "10101010", + UINT64CONST(10000000000), 10000000 + }, + { + "clusters, distance > 2^60", "10101010", + UINT64CONST(2000000000000000000), + 23 /* can't be much higher than this, or we + * overflow uint64 */ + } +}; + +static void test_pattern(const test_spec *spec); +static void test_empty(void); +static void test_single_value(uint64 value); +static void check_with_filler(IntegerSet *intset, uint64 x, uint64 value, uint64 filler_min, uint64 filler_max); +static void test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max); +static void test_huge_distances(void); + +/* + * SQL-callable entry point to perform all tests. + */ +Datum +test_integerset(PG_FUNCTION_ARGS) +{ + /* Tests for various corner cases */ + test_empty(); + test_huge_distances(); + test_single_value(0); + test_single_value(1); + test_single_value(PG_UINT64_MAX - 1); + test_single_value(PG_UINT64_MAX); + test_single_value_and_filler(0, 1000, 2000); + test_single_value_and_filler(1, 1000, 2000); + test_single_value_and_filler(1, 1000, 2000000); + test_single_value_and_filler(PG_UINT64_MAX - 1, 1000, 2000); + test_single_value_and_filler(PG_UINT64_MAX, 1000, 2000); + + /* Test different test patterns, with lots of entries */ + for (int i = 0; i < lengthof(test_specs); i++) + { + test_pattern(&test_specs[i]); + } + + PG_RETURN_VOID(); +} + +/* + * Test with a repeating pattern, defined by the 'spec'. + */ +static void +test_pattern(const test_spec *spec) +{ + IntegerSet *intset; + MemoryContext intset_ctx; + MemoryContext old_ctx; + TimestampTz starttime; + TimestampTz endtime; + uint64 n; + uint64 last_int; + int patternlen; + uint64 *pattern_values; + uint64 pattern_num_values; + + elog(NOTICE, "testing intset with pattern \"%s\"", spec->test_name); + if (intset_test_stats) + fprintf(stderr, "-----\ntesting intset with pattern \"%s\"\n", spec->test_name); + + /* Pre-process the pattern, creating an array of integers from it. */ + patternlen = strlen(spec->pattern_str); + pattern_values = palloc(patternlen * sizeof(uint64)); + pattern_num_values = 0; + for (int i = 0; i < patternlen; i++) + { + if (spec->pattern_str[i] == '1') + pattern_values[pattern_num_values++] = i; + } + + /* + * Allocate the integer set. + * + * Allocate it in a separate memory context, so that we can print its + * memory usage easily. (intset_create() creates a memory context of its + * own, too, but we don't have direct access to it, so we cannot call + * MemoryContextStats() on it directly). + */ + intset_ctx = AllocSetContextCreate(CurrentMemoryContext, + "intset test", + ALLOCSET_SMALL_SIZES); + MemoryContextSetIdentifier(intset_ctx, spec->test_name); + old_ctx = MemoryContextSwitchTo(intset_ctx); + intset = intset_create(); + MemoryContextSwitchTo(old_ctx); + + /* + * Add values to the set. + */ + starttime = GetCurrentTimestamp(); + + n = 0; + last_int = 0; + while (n < spec->num_values) + { + uint64 x = 0; + + for (int i = 0; i < pattern_num_values && n < spec->num_values; i++) + { + x = last_int + pattern_values[i]; + + intset_add_member(intset, x); + n++; + } + last_int += spec->spacing; + } + + endtime = GetCurrentTimestamp(); + + if (intset_test_stats) + fprintf(stderr, "added " UINT64_FORMAT " values in %d ms\n", + spec->num_values, (int) (endtime - starttime) / 1000); + + /* + * Print stats on the amount of memory used. + * + * We print the usage reported by intset_memory_usage(), as well as the + * stats from the memory context. They should be in the same ballpark, + * but it's hard to automate testing that, so if you're making changes to + * the implementation, just observe that manually. + */ + if (intset_test_stats) + { + uint64 mem_usage; + + /* + * Also print memory usage as reported by intset_memory_usage(). It + * should be in the same ballpark as the usage reported by + * MemoryContextStats(). + */ + mem_usage = intset_memory_usage(intset); + fprintf(stderr, "intset_memory_usage() reported " UINT64_FORMAT " (%0.2f bytes / integer)\n", + mem_usage, (double) mem_usage / spec->num_values); + + MemoryContextStats(intset_ctx); + } + + /* Check that intset_get_num_entries works */ + n = intset_num_entries(intset); + if (n != spec->num_values) + elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, n, spec->num_values); + + /* + * Test random-access probes with intset_is_member() + */ + starttime = GetCurrentTimestamp(); + + for (n = 0; n < 100000; n++) + { + bool b; + bool expected; + uint64 x; + + /* + * Pick next value to probe at random. We limit the probes to the + * last integer that we added to the set, plus an arbitrary constant + * (1000). There's no point in probing the whole 0 - 2^64 range, if + * only a small part of the integer space is used. We would very + * rarely hit values that are actually in the set. + */ + x = (pg_lrand48() << 31) | pg_lrand48(); + x = x % (last_int + 1000); + + /* Do we expect this value to be present in the set? */ + if (x >= last_int) + expected = false; + else + { + uint64 idx = x % spec->spacing; + + if (idx >= patternlen) + expected = false; + else if (spec->pattern_str[idx] == '1') + expected = true; + else + expected = false; + } + + /* Is it present according to intset_is_member() ? */ + b = intset_is_member(intset, x); + + if (b != expected) + elog(ERROR, "mismatch at " UINT64_FORMAT ": %d vs %d", x, b, expected); + } + endtime = GetCurrentTimestamp(); + if (intset_test_stats) + fprintf(stderr, "probed " UINT64_FORMAT " values in %d ms\n", + n, (int) (endtime - starttime) / 1000); + + /* + * Test iterator + */ + starttime = GetCurrentTimestamp(); + + intset_begin_iterate(intset); + n = 0; + last_int = 0; + while (n < spec->num_values) + { + for (int i = 0; i < pattern_num_values && n < spec->num_values; i++) + { + uint64 expected = last_int + pattern_values[i]; + uint64 x; + + if (!intset_iterate_next(intset, &x)) + break; + + if (x != expected) + elog(ERROR, "iterate returned wrong value; got " UINT64_FORMAT ", expected " UINT64_FORMAT, x, expected); + n++; + } + last_int += spec->spacing; + } + endtime = GetCurrentTimestamp(); + if (intset_test_stats) + fprintf(stderr, "iterated " UINT64_FORMAT " values in %d ms\n", + n, (int) (endtime - starttime) / 1000); + + if (n < spec->num_values) + elog(ERROR, "iterator stopped short after " UINT64_FORMAT " entries, expected " UINT64_FORMAT, n, spec->num_values); + if (n > spec->num_values) + elog(ERROR, "iterator returned " UINT64_FORMAT " entries, " UINT64_FORMAT " was expected", n, spec->num_values); + + MemoryContextDelete(intset_ctx); +} + +/* + * Test with a set containing a single integer. + */ +static void +test_single_value(uint64 value) +{ + IntegerSet *intset; + uint64 x; + uint64 num_entries; + bool found; + + elog(NOTICE, "testing intset with single value " UINT64_FORMAT, value); + + /* Create the set. */ + intset = intset_create(); + intset_add_member(intset, value); + + /* Test intset_get_num_entries() */ + num_entries = intset_num_entries(intset); + if (num_entries != 1) + elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected 1", num_entries); + + /* + * Test intset_is_member() at various special values, like 0 and maximum + * possible 64-bit integer, as well as the value itself. + */ + if (intset_is_member(intset, 0) != (value == 0)) + elog(ERROR, "intset_is_member failed for 0"); + if (intset_is_member(intset, 1) != (value == 1)) + elog(ERROR, "intset_is_member failed for 1"); + if (intset_is_member(intset, PG_UINT64_MAX) != (value == PG_UINT64_MAX)) + elog(ERROR, "intset_is_member failed for PG_UINT64_MAX"); + if (intset_is_member(intset, value) != true) + elog(ERROR, "intset_is_member failed for the tested value"); + + /* + * Test iterator + */ + intset_begin_iterate(intset); + found = intset_iterate_next(intset, &x); + if (!found || x != value) + elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x); + + found = intset_iterate_next(intset, &x); + if (found) + elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x); +} + +/* + * Test with an integer set that contains: + * + * - a given single 'value', and + * - all integers between 'filler_min' and 'filler_max'. + * + * This exercises different codepaths than testing just with a single value, + * because the implementation buffers newly-added values. If we add just a + * single value to the set, we won't test the internal B-tree code at all, + * just the code that deals with the buffer. + */ +static void +test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max) +{ + IntegerSet *intset; + uint64 x; + bool found; + uint64 *iter_expected; + uint64 n = 0; + uint64 num_entries = 0; + uint64 mem_usage; + + elog(NOTICE, "testing intset with value " UINT64_FORMAT ", and all between " UINT64_FORMAT " and " UINT64_FORMAT, + value, filler_min, filler_max); + + intset = intset_create(); + + iter_expected = palloc(sizeof(uint64) * (filler_max - filler_min + 1)); + if (value < filler_min) + { + intset_add_member(intset, value); + iter_expected[n++] = value; + } + + for (x = filler_min; x < filler_max; x++) + { + intset_add_member(intset, x); + iter_expected[n++] = x; + } + + if (value >= filler_max) + { + intset_add_member(intset, value); + iter_expected[n++] = value; + } + + /* Test intset_get_num_entries() */ + num_entries = intset_num_entries(intset); + if (num_entries != n) + elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, num_entries, n); + + /* + * Test intset_is_member() at various spots, at and around the values that + * we expect to be set, as well as 0 and the maximum possible value. + */ + check_with_filler(intset, 0, + value, filler_min, filler_max); + check_with_filler(intset, 1, + value, filler_min, filler_max); + check_with_filler(intset, filler_min - 1, + value, filler_min, filler_max); + check_with_filler(intset, filler_min, + value, filler_min, filler_max); + check_with_filler(intset, filler_min + 1, + value, filler_min, filler_max); + check_with_filler(intset, value - 1, + value, filler_min, filler_max); + check_with_filler(intset, value, + value, filler_min, filler_max); + check_with_filler(intset, value + 1, + value, filler_min, filler_max); + check_with_filler(intset, filler_max - 1, + value, filler_min, filler_max); + check_with_filler(intset, filler_max, + value, filler_min, filler_max); + check_with_filler(intset, filler_max + 1, + value, filler_min, filler_max); + check_with_filler(intset, PG_UINT64_MAX - 1, + value, filler_min, filler_max); + check_with_filler(intset, PG_UINT64_MAX, + value, filler_min, filler_max); + + intset_begin_iterate(intset); + for (uint64 i = 0; i < n; i++) + { + found = intset_iterate_next(intset, &x); + if (!found || x != iter_expected[i]) + elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x); + } + found = intset_iterate_next(intset, &x); + if (found) + elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x); + + mem_usage = intset_memory_usage(intset); + if (mem_usage < 5000 || mem_usage > 500000000) + elog(ERROR, "intset_memory_usage() reported suspicious value: " UINT64_FORMAT, mem_usage); +} + +/* + * Helper function for test_single_value_and_filler. + * + * Calls intset_is_member() for value 'x', and checks that the result is what + * we expect. + */ +static void +check_with_filler(IntegerSet *intset, uint64 x, + uint64 value, uint64 filler_min, uint64 filler_max) +{ + bool expected; + bool actual; + + expected = (x == value || (filler_min <= x && x < filler_max)); + + actual = intset_is_member(intset, x); + + if (actual != expected) + elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x); +} + +/* + * Test empty set + */ +static void +test_empty(void) +{ + IntegerSet *intset; + uint64 x; + + elog(NOTICE, "testing intset with empty set"); + + intset = intset_create(); + + /* Test intset_is_member() */ + if (intset_is_member(intset, 0) != false) + elog(ERROR, "intset_is_member on empty set returned true"); + if (intset_is_member(intset, 1) != false) + elog(ERROR, "intset_is_member on empty set returned true"); + if (intset_is_member(intset, PG_UINT64_MAX) != false) + elog(ERROR, "intset_is_member on empty set returned true"); + + /* Test iterator */ + intset_begin_iterate(intset); + if (intset_iterate_next(intset, &x)) + elog(ERROR, "intset_iterate_next on empty set returned a value (" UINT64_FORMAT ")", x); +} + +/* + * Test with integers that are more than 2^60 apart. + * + * The Simple-8b encoding used by the set implementation can only encode + * values up to 2^60. That makes large differences like this interesting + * to test. + */ +static void +test_huge_distances(void) +{ + IntegerSet *intset; + uint64 values[1000]; + int num_values = 0; + uint64 val = 0; + bool found; + uint64 x; + + elog(NOTICE, "testing intset with distances > 2^60 between values"); + + val = 0; + values[num_values++] = val; + + /* Test differences on both sides of the 2^60 boundary. */ + val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */ + values[num_values++] = val; + + val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */ + values[num_values++] = val; + + val += UINT64CONST(1152921504606846976); /* 2^60 */ + values[num_values++] = val; + + val += UINT64CONST(1152921504606846976); /* 2^60 */ + values[num_values++] = val; + + val += UINT64CONST(1152921504606846976); /* 2^60 */ + values[num_values++] = val; + + val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */ + values[num_values++] = val; + + val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */ + values[num_values++] = val; + + val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */ + values[num_values++] = val; + + val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */ + values[num_values++] = val; + + val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */ + values[num_values++] = val; + + val += UINT64CONST(1152921504606846976); /* 2^60 */ + values[num_values++] = val; + + /* + * We're now very close to 2^64, so can't add large values anymore. But + * add more smaller values to the end, to make sure that all the above + * values get flushed and packed into the tree structure. + */ + while (num_values < 1000) + { + val += pg_lrand48(); + values[num_values++] = val; + } + + /* Create an IntegerSet using these values */ + intset = intset_create(); + for (int i = 0; i < num_values; i++) + intset_add_member(intset, values[i]); + + /* + * Test intset_is_member() around each of these values + */ + for (int i = 0; i < num_values; i++) + { + uint64 x = values[i]; + bool expected; + bool result; + + if (x > 0) + { + expected = (values[i - 1] == x - 1); + result = intset_is_member(intset, x - 1); + if (result != expected) + elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x - 1); + } + + result = intset_is_member(intset, x); + if (result != true) + elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x); + + expected = (i != num_values - 1) ? (values[i + 1] == x + 1) : false; + result = intset_is_member(intset, x + 1); + if (result != expected) + elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x + 1); + } + + /* + * Test iterator + */ + intset_begin_iterate(intset); + for (int i = 0; i < num_values; i++) + { + found = intset_iterate_next(intset, &x); + if (!found || x != values[i]) + elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x); + } + found = intset_iterate_next(intset, &x); + if (found) + elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x); +} diff --git a/src/test/modules/test_integerset/test_integerset.control b/src/test/modules/test_integerset/test_integerset.control new file mode 100644 index 0000000..7d20c2d --- /dev/null +++ b/src/test/modules/test_integerset/test_integerset.control @@ -0,0 +1,4 @@ +comment = 'Test code for integerset' +default_version = '1.0' +module_pathname = '$libdir/test_integerset' +relocatable = true |