diff options
Diffstat (limited to 'src/backend/utils/adt/mac.c')
-rw-r--r-- | src/backend/utils/adt/mac.c | 548 |
1 files changed, 548 insertions, 0 deletions
diff --git a/src/backend/utils/adt/mac.c b/src/backend/utils/adt/mac.c new file mode 100644 index 0000000..844d881 --- /dev/null +++ b/src/backend/utils/adt/mac.c @@ -0,0 +1,548 @@ +/*------------------------------------------------------------------------- + * + * mac.c + * PostgreSQL type definitions for 6 byte, EUI-48, MAC addresses. + * + * Portions Copyright (c) 1998-2021, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/mac.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "common/hashfn.h" +#include "lib/hyperloglog.h" +#include "libpq/pqformat.h" +#include "port/pg_bswap.h" +#include "utils/builtins.h" +#include "utils/guc.h" +#include "utils/inet.h" +#include "utils/sortsupport.h" + + +/* + * Utility macros used for sorting and comparing: + */ + +#define hibits(addr) \ + ((unsigned long)(((addr)->a<<16)|((addr)->b<<8)|((addr)->c))) + +#define lobits(addr) \ + ((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f))) + +/* sortsupport for macaddr */ +typedef struct +{ + int64 input_count; /* number of non-null values seen */ + bool estimating; /* true if estimating cardinality */ + + hyperLogLogState abbr_card; /* cardinality estimator */ +} macaddr_sortsupport_state; + +static int macaddr_cmp_internal(macaddr *a1, macaddr *a2); +static int macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup); +static int macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup); +static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup); +static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup); + +/* + * MAC address reader. Accepts several common notations. + */ + +Datum +macaddr_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + macaddr *result; + int a, + b, + c, + d, + e, + f; + char junk[2]; + int count; + + /* %1s matches iff there is trailing non-whitespace garbage */ + + count = sscanf(str, "%x:%x:%x:%x:%x:%x%1s", + &a, &b, &c, &d, &e, &f, junk); + if (count != 6) + count = sscanf(str, "%x-%x-%x-%x-%x-%x%1s", + &a, &b, &c, &d, &e, &f, junk); + if (count != 6) + count = sscanf(str, "%2x%2x%2x:%2x%2x%2x%1s", + &a, &b, &c, &d, &e, &f, junk); + if (count != 6) + count = sscanf(str, "%2x%2x%2x-%2x%2x%2x%1s", + &a, &b, &c, &d, &e, &f, junk); + if (count != 6) + count = sscanf(str, "%2x%2x.%2x%2x.%2x%2x%1s", + &a, &b, &c, &d, &e, &f, junk); + if (count != 6) + count = sscanf(str, "%2x%2x-%2x%2x-%2x%2x%1s", + &a, &b, &c, &d, &e, &f, junk); + if (count != 6) + count = sscanf(str, "%2x%2x%2x%2x%2x%2x%1s", + &a, &b, &c, &d, &e, &f, junk); + if (count != 6) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", "macaddr", + str))); + + if ((a < 0) || (a > 255) || (b < 0) || (b > 255) || + (c < 0) || (c > 255) || (d < 0) || (d > 255) || + (e < 0) || (e > 255) || (f < 0) || (f > 255)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("invalid octet value in \"macaddr\" value: \"%s\"", str))); + + result = (macaddr *) palloc(sizeof(macaddr)); + + result->a = a; + result->b = b; + result->c = c; + result->d = d; + result->e = e; + result->f = f; + + PG_RETURN_MACADDR_P(result); +} + +/* + * MAC address output function. Fixed format. + */ + +Datum +macaddr_out(PG_FUNCTION_ARGS) +{ + macaddr *addr = PG_GETARG_MACADDR_P(0); + char *result; + + result = (char *) palloc(32); + + snprintf(result, 32, "%02x:%02x:%02x:%02x:%02x:%02x", + addr->a, addr->b, addr->c, addr->d, addr->e, addr->f); + + PG_RETURN_CSTRING(result); +} + +/* + * macaddr_recv - converts external binary format to macaddr + * + * The external representation is just the six bytes, MSB first. + */ +Datum +macaddr_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + macaddr *addr; + + addr = (macaddr *) palloc(sizeof(macaddr)); + + addr->a = pq_getmsgbyte(buf); + addr->b = pq_getmsgbyte(buf); + addr->c = pq_getmsgbyte(buf); + addr->d = pq_getmsgbyte(buf); + addr->e = pq_getmsgbyte(buf); + addr->f = pq_getmsgbyte(buf); + + PG_RETURN_MACADDR_P(addr); +} + +/* + * macaddr_send - converts macaddr to binary format + */ +Datum +macaddr_send(PG_FUNCTION_ARGS) +{ + macaddr *addr = PG_GETARG_MACADDR_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendbyte(&buf, addr->a); + pq_sendbyte(&buf, addr->b); + pq_sendbyte(&buf, addr->c); + pq_sendbyte(&buf, addr->d); + pq_sendbyte(&buf, addr->e); + pq_sendbyte(&buf, addr->f); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + +/* + * Comparison function for sorting: + */ + +static int +macaddr_cmp_internal(macaddr *a1, macaddr *a2) +{ + if (hibits(a1) < hibits(a2)) + return -1; + else if (hibits(a1) > hibits(a2)) + return 1; + else if (lobits(a1) < lobits(a2)) + return -1; + else if (lobits(a1) > lobits(a2)) + return 1; + else + return 0; +} + +Datum +macaddr_cmp(PG_FUNCTION_ARGS) +{ + macaddr *a1 = PG_GETARG_MACADDR_P(0); + macaddr *a2 = PG_GETARG_MACADDR_P(1); + + PG_RETURN_INT32(macaddr_cmp_internal(a1, a2)); +} + +/* + * Boolean comparisons. + */ + +Datum +macaddr_lt(PG_FUNCTION_ARGS) +{ + macaddr *a1 = PG_GETARG_MACADDR_P(0); + macaddr *a2 = PG_GETARG_MACADDR_P(1); + + PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) < 0); +} + +Datum +macaddr_le(PG_FUNCTION_ARGS) +{ + macaddr *a1 = PG_GETARG_MACADDR_P(0); + macaddr *a2 = PG_GETARG_MACADDR_P(1); + + PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) <= 0); +} + +Datum +macaddr_eq(PG_FUNCTION_ARGS) +{ + macaddr *a1 = PG_GETARG_MACADDR_P(0); + macaddr *a2 = PG_GETARG_MACADDR_P(1); + + PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) == 0); +} + +Datum +macaddr_ge(PG_FUNCTION_ARGS) +{ + macaddr *a1 = PG_GETARG_MACADDR_P(0); + macaddr *a2 = PG_GETARG_MACADDR_P(1); + + PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) >= 0); +} + +Datum +macaddr_gt(PG_FUNCTION_ARGS) +{ + macaddr *a1 = PG_GETARG_MACADDR_P(0); + macaddr *a2 = PG_GETARG_MACADDR_P(1); + + PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) > 0); +} + +Datum +macaddr_ne(PG_FUNCTION_ARGS) +{ + macaddr *a1 = PG_GETARG_MACADDR_P(0); + macaddr *a2 = PG_GETARG_MACADDR_P(1); + + PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) != 0); +} + +/* + * Support function for hash indexes on macaddr. + */ +Datum +hashmacaddr(PG_FUNCTION_ARGS) +{ + macaddr *key = PG_GETARG_MACADDR_P(0); + + return hash_any((unsigned char *) key, sizeof(macaddr)); +} + +Datum +hashmacaddrextended(PG_FUNCTION_ARGS) +{ + macaddr *key = PG_GETARG_MACADDR_P(0); + + return hash_any_extended((unsigned char *) key, sizeof(macaddr), + PG_GETARG_INT64(1)); +} + +/* + * Arithmetic functions: bitwise NOT, AND, OR. + */ +Datum +macaddr_not(PG_FUNCTION_ARGS) +{ + macaddr *addr = PG_GETARG_MACADDR_P(0); + macaddr *result; + + result = (macaddr *) palloc(sizeof(macaddr)); + result->a = ~addr->a; + result->b = ~addr->b; + result->c = ~addr->c; + result->d = ~addr->d; + result->e = ~addr->e; + result->f = ~addr->f; + PG_RETURN_MACADDR_P(result); +} + +Datum +macaddr_and(PG_FUNCTION_ARGS) +{ + macaddr *addr1 = PG_GETARG_MACADDR_P(0); + macaddr *addr2 = PG_GETARG_MACADDR_P(1); + macaddr *result; + + result = (macaddr *) palloc(sizeof(macaddr)); + result->a = addr1->a & addr2->a; + result->b = addr1->b & addr2->b; + result->c = addr1->c & addr2->c; + result->d = addr1->d & addr2->d; + result->e = addr1->e & addr2->e; + result->f = addr1->f & addr2->f; + PG_RETURN_MACADDR_P(result); +} + +Datum +macaddr_or(PG_FUNCTION_ARGS) +{ + macaddr *addr1 = PG_GETARG_MACADDR_P(0); + macaddr *addr2 = PG_GETARG_MACADDR_P(1); + macaddr *result; + + result = (macaddr *) palloc(sizeof(macaddr)); + result->a = addr1->a | addr2->a; + result->b = addr1->b | addr2->b; + result->c = addr1->c | addr2->c; + result->d = addr1->d | addr2->d; + result->e = addr1->e | addr2->e; + result->f = addr1->f | addr2->f; + PG_RETURN_MACADDR_P(result); +} + +/* + * Truncation function to allow comparing mac manufacturers. + * From suggestion by Alex Pilosov <alex@pilosoft.com> + */ +Datum +macaddr_trunc(PG_FUNCTION_ARGS) +{ + macaddr *addr = PG_GETARG_MACADDR_P(0); + macaddr *result; + + result = (macaddr *) palloc(sizeof(macaddr)); + + result->a = addr->a; + result->b = addr->b; + result->c = addr->c; + result->d = 0; + result->e = 0; + result->f = 0; + + PG_RETURN_MACADDR_P(result); +} + +/* + * SortSupport strategy function. Populates a SortSupport struct with the + * information necessary to use comparison by abbreviated keys. + */ +Datum +macaddr_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + + ssup->comparator = macaddr_fast_cmp; + ssup->ssup_extra = NULL; + + if (ssup->abbreviate) + { + macaddr_sortsupport_state *uss; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + uss = palloc(sizeof(macaddr_sortsupport_state)); + uss->input_count = 0; + uss->estimating = true; + initHyperLogLog(&uss->abbr_card, 10); + + ssup->ssup_extra = uss; + + ssup->comparator = macaddr_cmp_abbrev; + ssup->abbrev_converter = macaddr_abbrev_convert; + ssup->abbrev_abort = macaddr_abbrev_abort; + ssup->abbrev_full_comparator = macaddr_fast_cmp; + + MemoryContextSwitchTo(oldcontext); + } + + PG_RETURN_VOID(); +} + +/* + * SortSupport "traditional" comparison function. Pulls two MAC addresses from + * the heap and runs a standard comparison on them. + */ +static int +macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup) +{ + macaddr *arg1 = DatumGetMacaddrP(x); + macaddr *arg2 = DatumGetMacaddrP(y); + + return macaddr_cmp_internal(arg1, arg2); +} + +/* + * SortSupport abbreviated key comparison function. Compares two MAC addresses + * quickly by treating them like integers, and without having to go to the + * heap. + */ +static int +macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup) +{ + if (x > y) + return 1; + else if (x == y) + return 0; + else + return -1; +} + +/* + * Callback for estimating effectiveness of abbreviated key optimization. + * + * We pay no attention to the cardinality of the non-abbreviated data, because + * there is no equality fast-path within authoritative macaddr comparator. + */ +static bool +macaddr_abbrev_abort(int memtupcount, SortSupport ssup) +{ + macaddr_sortsupport_state *uss = ssup->ssup_extra; + double abbr_card; + + if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating) + return false; + + abbr_card = estimateHyperLogLog(&uss->abbr_card); + + /* + * If we have >100k distinct values, then even if we were sorting many + * billion rows we'd likely still break even, and the penalty of undoing + * that many rows of abbrevs would probably not be worth it. At this point + * we stop counting because we know that we're now fully committed. + */ + if (abbr_card > 100000.0) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "macaddr_abbrev: estimation ends at cardinality %f" + " after " INT64_FORMAT " values (%d rows)", + abbr_card, uss->input_count, memtupcount); +#endif + uss->estimating = false; + return false; + } + + /* + * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row + * fudge factor allows us to abort earlier on genuinely pathological data + * where we've had exactly one abbreviated value in the first 2k + * (non-null) rows. + */ + if (abbr_card < uss->input_count / 2000.0 + 0.5) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "macaddr_abbrev: aborting abbreviation at cardinality %f" + " below threshold %f after " INT64_FORMAT " values (%d rows)", + abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count, + memtupcount); +#endif + return true; + } + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "macaddr_abbrev: cardinality %f after " INT64_FORMAT + " values (%d rows)", abbr_card, uss->input_count, memtupcount); +#endif + + return false; +} + +/* + * SortSupport conversion routine. Converts original macaddr representation + * to abbreviated key representation. + * + * Packs the bytes of a 6-byte MAC address into a Datum and treats it as an + * unsigned integer for purposes of comparison. On a 64-bit machine, there + * will be two zeroed bytes of padding. The integer is converted to native + * endianness to facilitate easy comparison. + */ +static Datum +macaddr_abbrev_convert(Datum original, SortSupport ssup) +{ + macaddr_sortsupport_state *uss = ssup->ssup_extra; + macaddr *authoritative = DatumGetMacaddrP(original); + Datum res; + + /* + * On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of + * the MAC address in. There will be two bytes of zero padding on the end + * of the least significant bits. + */ +#if SIZEOF_DATUM == 8 + memset(&res, 0, SIZEOF_DATUM); + memcpy(&res, authoritative, sizeof(macaddr)); +#else /* SIZEOF_DATUM != 8 */ + memcpy(&res, authoritative, SIZEOF_DATUM); +#endif + uss->input_count += 1; + + /* + * Cardinality estimation. The estimate uses uint32, so on a 64-bit + * architecture, XOR the two 32-bit halves together to produce slightly + * more entropy. The two zeroed bytes won't have any practical impact on + * this operation. + */ + if (uss->estimating) + { + uint32 tmp; + +#if SIZEOF_DATUM == 8 + tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); +#else /* SIZEOF_DATUM != 8 */ + tmp = (uint32) res; +#endif + + addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); + } + + /* + * Byteswap on little-endian machines. + * + * This is needed so that macaddr_cmp_abbrev() (an unsigned integer 3-way + * comparator) works correctly on all platforms. Without this, the + * comparator would have to call memcmp() with a pair of pointers to the + * first byte of each abbreviated key, which is slower. + */ + res = DatumBigEndianToNative(res); + + return res; +} |