/*------------------------------------------------------------------------- * * uuid.c * Functions for the built-in type "uuid". * * Copyright (c) 2007-2023, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/uuid.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "common/hashfn.h" #include "lib/hyperloglog.h" #include "libpq/pqformat.h" #include "port/pg_bswap.h" #include "utils/builtins.h" #include "utils/guc.h" #include "utils/sortsupport.h" #include "utils/uuid.h" /* sortsupport for uuid */ typedef struct { int64 input_count; /* number of non-null values seen */ bool estimating; /* true if estimating cardinality */ hyperLogLogState abbr_card; /* cardinality estimator */ } uuid_sortsupport_state; static void string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext); static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2); static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup); static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup); static Datum uuid_abbrev_convert(Datum original, SortSupport ssup); Datum uuid_in(PG_FUNCTION_ARGS) { char *uuid_str = PG_GETARG_CSTRING(0); pg_uuid_t *uuid; uuid = (pg_uuid_t *) palloc(sizeof(*uuid)); string_to_uuid(uuid_str, uuid, fcinfo->context); PG_RETURN_UUID_P(uuid); } Datum uuid_out(PG_FUNCTION_ARGS) { pg_uuid_t *uuid = PG_GETARG_UUID_P(0); static const char hex_chars[] = "0123456789abcdef"; StringInfoData buf; int i; initStringInfo(&buf); for (i = 0; i < UUID_LEN; i++) { int hi; int lo; /* * We print uuid values as a string of 8, 4, 4, 4, and then 12 * hexadecimal characters, with each group is separated by a hyphen * ("-"). Therefore, add the hyphens at the appropriate places here. */ if (i == 4 || i == 6 || i == 8 || i == 10) appendStringInfoChar(&buf, '-'); hi = uuid->data[i] >> 4; lo = uuid->data[i] & 0x0F; appendStringInfoChar(&buf, hex_chars[hi]); appendStringInfoChar(&buf, hex_chars[lo]); } PG_RETURN_CSTRING(buf.data); } /* * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash * after each group of 4 hexadecimal digits, and optionally surrounded by {}. * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal * digits, is the only one used for output.) */ static void string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext) { const char *src = source; bool braces = false; int i; if (src[0] == '{') { src++; braces = true; } for (i = 0; i < UUID_LEN; i++) { char str_buf[3]; if (src[0] == '\0' || src[1] == '\0') goto syntax_error; memcpy(str_buf, src, 2); if (!isxdigit((unsigned char) str_buf[0]) || !isxdigit((unsigned char) str_buf[1])) goto syntax_error; str_buf[2] = '\0'; uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16); src += 2; if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1) src++; } if (braces) { if (*src != '}') goto syntax_error; src++; } if (*src != '\0') goto syntax_error; return; syntax_error: ereturn(escontext,, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type %s: \"%s\"", "uuid", source))); } Datum uuid_recv(PG_FUNCTION_ARGS) { StringInfo buffer = (StringInfo) PG_GETARG_POINTER(0); pg_uuid_t *uuid; uuid = (pg_uuid_t *) palloc(UUID_LEN); memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN); PG_RETURN_POINTER(uuid); } Datum uuid_send(PG_FUNCTION_ARGS) { pg_uuid_t *uuid = PG_GETARG_UUID_P(0); StringInfoData buffer; pq_begintypsend(&buffer); pq_sendbytes(&buffer, uuid->data, UUID_LEN); PG_RETURN_BYTEA_P(pq_endtypsend(&buffer)); } /* internal uuid compare function */ static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2) { return memcmp(arg1->data, arg2->data, UUID_LEN); } Datum uuid_lt(PG_FUNCTION_ARGS) { pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0); } Datum uuid_le(PG_FUNCTION_ARGS) { pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0); } Datum uuid_eq(PG_FUNCTION_ARGS) { pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0); } Datum uuid_ge(PG_FUNCTION_ARGS) { pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0); } Datum uuid_gt(PG_FUNCTION_ARGS) { pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0); } Datum uuid_ne(PG_FUNCTION_ARGS) { pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0); } /* handler for btree index operator */ Datum uuid_cmp(PG_FUNCTION_ARGS) { pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2)); } /* * Sort support strategy routine */ Datum uuid_sortsupport(PG_FUNCTION_ARGS) { SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); ssup->comparator = uuid_fast_cmp; ssup->ssup_extra = NULL; if (ssup->abbreviate) { uuid_sortsupport_state *uss; MemoryContext oldcontext; oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); uss = palloc(sizeof(uuid_sortsupport_state)); uss->input_count = 0; uss->estimating = true; initHyperLogLog(&uss->abbr_card, 10); ssup->ssup_extra = uss; ssup->comparator = ssup_datum_unsigned_cmp; ssup->abbrev_converter = uuid_abbrev_convert; ssup->abbrev_abort = uuid_abbrev_abort; ssup->abbrev_full_comparator = uuid_fast_cmp; MemoryContextSwitchTo(oldcontext); } PG_RETURN_VOID(); } /* * SortSupport comparison func */ static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup) { pg_uuid_t *arg1 = DatumGetUUIDP(x); pg_uuid_t *arg2 = DatumGetUUIDP(y); return uuid_internal_cmp(arg1, arg2); } /* * Callback for estimating effectiveness of abbreviated key optimization. * * We pay no attention to the cardinality of the non-abbreviated data, because * there is no equality fast-path within authoritative uuid comparator. */ static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup) { uuid_sortsupport_state *uss = ssup->ssup_extra; double abbr_card; if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating) return false; abbr_card = estimateHyperLogLog(&uss->abbr_card); /* * If we have >100k distinct values, then even if we were sorting many * billion rows we'd likely still break even, and the penalty of undoing * that many rows of abbrevs would probably not be worth it. Stop even * counting at that point. */ if (abbr_card > 100000.0) { #ifdef TRACE_SORT if (trace_sort) elog(LOG, "uuid_abbrev: estimation ends at cardinality %f" " after " INT64_FORMAT " values (%d rows)", abbr_card, uss->input_count, memtupcount); #endif uss->estimating = false; return false; } /* * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row * fudge factor allows us to abort earlier on genuinely pathological data * where we've had exactly one abbreviated value in the first 2k * (non-null) rows. */ if (abbr_card < uss->input_count / 2000.0 + 0.5) { #ifdef TRACE_SORT if (trace_sort) elog(LOG, "uuid_abbrev: aborting abbreviation at cardinality %f" " below threshold %f after " INT64_FORMAT " values (%d rows)", abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count, memtupcount); #endif return true; } #ifdef TRACE_SORT if (trace_sort) elog(LOG, "uuid_abbrev: cardinality %f after " INT64_FORMAT " values (%d rows)", abbr_card, uss->input_count, memtupcount); #endif return false; } /* * Conversion routine for sortsupport. Converts original uuid representation * to abbreviated key representation. Our encoding strategy is simple -- pack * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian * machines, the bytes are stored in reverse order), and treat it as an * unsigned integer. */ static Datum uuid_abbrev_convert(Datum original, SortSupport ssup) { uuid_sortsupport_state *uss = ssup->ssup_extra; pg_uuid_t *authoritative = DatumGetUUIDP(original); Datum res; memcpy(&res, authoritative->data, sizeof(Datum)); uss->input_count += 1; if (uss->estimating) { uint32 tmp; #if SIZEOF_DATUM == 8 tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); #else /* SIZEOF_DATUM != 8 */ tmp = (uint32) res; #endif addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); } /* * Byteswap on little-endian machines. * * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer * 3-way comparator) works correctly on all platforms. If we didn't do * this, the comparator would have to call memcmp() with a pair of * pointers to the first byte of each abbreviated key, which is slower. */ res = DatumBigEndianToNative(res); return res; } /* hash index support */ Datum uuid_hash(PG_FUNCTION_ARGS) { pg_uuid_t *key = PG_GETARG_UUID_P(0); return hash_any(key->data, UUID_LEN); } Datum uuid_hash_extended(PG_FUNCTION_ARGS) { pg_uuid_t *key = PG_GETARG_UUID_P(0); return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1)); } Datum gen_random_uuid(PG_FUNCTION_ARGS) { pg_uuid_t *uuid = palloc(UUID_LEN); if (!pg_strong_random(uuid, UUID_LEN)) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("could not generate random values"))); /* * Set magic numbers for a "version 4" (pseudorandom) UUID, see * http://tools.ietf.org/html/rfc4122#section-4.4 */ uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40; /* time_hi_and_version */ uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; /* clock_seq_hi_and_reserved */ PG_RETURN_UUID_P(uuid); }