1 files changed, 438 insertions, 0 deletions
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
new file mode 100644
index 0000000..b02c9fc
--- /dev/null
+++ b/src/backend/utils/adt/uuid.c
@@ -0,0 +1,438 @@
+/*-------------------------------------------------------------------------
+ *
+ * uuid.c
+ *	  Functions for the built-in type "uuid".
+ *
+ * Copyright (c) 2007-2021, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/adt/uuid.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "common/hashfn.h"
+#include "lib/hyperloglog.h"
+#include "libpq/pqformat.h"
+#include "port/pg_bswap.h"
+#include "utils/builtins.h"
+#include "utils/guc.h"
+#include "utils/sortsupport.h"
+#include "utils/uuid.h"
+
+/* sortsupport for uuid */
+typedef struct
+{
+	int64		input_count;	/* number of non-null values seen */
+	bool		estimating;		/* true if estimating cardinality */
+
+	hyperLogLogState abbr_card; /* cardinality estimator */
+} uuid_sortsupport_state;
+
+static void string_to_uuid(const char *source, pg_uuid_t *uuid);
+static int	uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
+static int	uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
+static int	uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
+static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
+static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
+
+Datum
+uuid_in(PG_FUNCTION_ARGS)
+{
+	char	   *uuid_str = PG_GETARG_CSTRING(0);
+	pg_uuid_t  *uuid;
+
+	uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
+	string_to_uuid(uuid_str, uuid);
+	PG_RETURN_UUID_P(uuid);
+}
+
+Datum
+uuid_out(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
+	static const char hex_chars[] = "0123456789abcdef";
+	StringInfoData buf;
+	int			i;
+
+	initStringInfo(&buf);
+	for (i = 0; i < UUID_LEN; i++)
+	{
+		int			hi;
+		int			lo;
+
+		/*
+		 * We print uuid values as a string of 8, 4, 4, 4, and then 12
+		 * hexadecimal characters, with each group is separated by a hyphen
+		 * ("-"). Therefore, add the hyphens at the appropriate places here.
+		 */
+		if (i == 4 || i == 6 || i == 8 || i == 10)
+			appendStringInfoChar(&buf, '-');
+
+		hi = uuid->data[i] >> 4;
+		lo = uuid->data[i] & 0x0F;
+
+		appendStringInfoChar(&buf, hex_chars[hi]);
+		appendStringInfoChar(&buf, hex_chars[lo]);
+	}
+
+	PG_RETURN_CSTRING(buf.data);
+}
+
+/*
+ * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
+ * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
+ * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
+ * digits, is the only one used for output.)
+ */
+static void
+string_to_uuid(const char *source, pg_uuid_t *uuid)
+{
+	const char *src = source;
+	bool		braces = false;
+	int			i;
+
+	if (src[0] == '{')
+	{
+		src++;
+		braces = true;
+	}
+
+	for (i = 0; i < UUID_LEN; i++)
+	{
+		char		str_buf[3];
+
+		if (src[0] == '\0' || src[1] == '\0')
+			goto syntax_error;
+		memcpy(str_buf, src, 2);
+		if (!isxdigit((unsigned char) str_buf[0]) ||
+			!isxdigit((unsigned char) str_buf[1]))
+			goto syntax_error;
+
+		str_buf[2] = '\0';
+		uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
+		src += 2;
+		if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
+			src++;
+	}
+
+	if (braces)
+	{
+		if (*src != '}')
+			goto syntax_error;
+		src++;
+	}
+
+	if (*src != '\0')
+		goto syntax_error;
+
+	return;
+
+syntax_error:
+	ereport(ERROR,
+			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+			 errmsg("invalid input syntax for type %s: \"%s\"",
+					"uuid", source)));
+}
+
+Datum
+uuid_recv(PG_FUNCTION_ARGS)
+{
+	StringInfo	buffer = (StringInfo) PG_GETARG_POINTER(0);
+	pg_uuid_t  *uuid;
+
+	uuid = (pg_uuid_t *) palloc(UUID_LEN);
+	memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
+	PG_RETURN_POINTER(uuid);
+}
+
+Datum
+uuid_send(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
+	StringInfoData buffer;
+
+	pq_begintypsend(&buffer);
+	pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN);
+	PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
+}
+
+/* internal uuid compare function */
+static int
+uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
+{
+	return memcmp(arg1->data, arg2->data, UUID_LEN);
+}
+
+Datum
+uuid_lt(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
+	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
+
+	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
+}
+
+Datum
+uuid_le(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
+	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
+
+	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
+}
+
+Datum
+uuid_eq(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
+	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
+
+	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
+}
+
+Datum
+uuid_ge(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
+	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
+
+	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
+}
+
+Datum
+uuid_gt(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
+	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
+
+	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
+}
+
+Datum
+uuid_ne(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
+	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
+
+	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
+}
+
+/* handler for btree index operator */
+Datum
+uuid_cmp(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
+	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
+
+	PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
+}
+
+/*
+ * Sort support strategy routine
+ */
+Datum
+uuid_sortsupport(PG_FUNCTION_ARGS)
+{
+	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+	ssup->comparator = uuid_fast_cmp;
+	ssup->ssup_extra = NULL;
+
+	if (ssup->abbreviate)
+	{
+		uuid_sortsupport_state *uss;
+		MemoryContext oldcontext;
+
+		oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+		uss = palloc(sizeof(uuid_sortsupport_state));
+		uss->input_count = 0;
+		uss->estimating = true;
+		initHyperLogLog(&uss->abbr_card, 10);
+
+		ssup->ssup_extra = uss;
+
+		ssup->comparator = uuid_cmp_abbrev;
+		ssup->abbrev_converter = uuid_abbrev_convert;
+		ssup->abbrev_abort = uuid_abbrev_abort;
+		ssup->abbrev_full_comparator = uuid_fast_cmp;
+
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * SortSupport comparison func
+ */
+static int
+uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
+{
+	pg_uuid_t  *arg1 = DatumGetUUIDP(x);
+	pg_uuid_t  *arg2 = DatumGetUUIDP(y);
+
+	return uuid_internal_cmp(arg1, arg2);
+}
+
+/*
+ * Abbreviated key comparison func
+ */
+static int
+uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
+{
+	if (x > y)
+		return 1;
+	else if (x == y)
+		return 0;
+	else
+		return -1;
+}
+
+/*
+ * Callback for estimating effectiveness of abbreviated key optimization.
+ *
+ * We pay no attention to the cardinality of the non-abbreviated data, because
+ * there is no equality fast-path within authoritative uuid comparator.
+ */
+static bool
+uuid_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+	uuid_sortsupport_state *uss = ssup->ssup_extra;
+	double		abbr_card;
+
+	if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
+		return false;
+
+	abbr_card = estimateHyperLogLog(&uss->abbr_card);
+
+	/*
+	 * If we have >100k distinct values, then even if we were sorting many
+	 * billion rows we'd likely still break even, and the penalty of undoing
+	 * that many rows of abbrevs would probably not be worth it.  Stop even
+	 * counting at that point.
+	 */
+	if (abbr_card > 100000.0)
+	{
+#ifdef TRACE_SORT
+		if (trace_sort)
+			elog(LOG,
+				 "uuid_abbrev: estimation ends at cardinality %f"
+				 " after " INT64_FORMAT " values (%d rows)",
+				 abbr_card, uss->input_count, memtupcount);
+#endif
+		uss->estimating = false;
+		return false;
+	}
+
+	/*
+	 * Target minimum cardinality is 1 per ~2k of non-null inputs.  0.5 row
+	 * fudge factor allows us to abort earlier on genuinely pathological data
+	 * where we've had exactly one abbreviated value in the first 2k
+	 * (non-null) rows.
+	 */
+	if (abbr_card < uss->input_count / 2000.0 + 0.5)
+	{
+#ifdef TRACE_SORT
+		if (trace_sort)
+			elog(LOG,
+				 "uuid_abbrev: aborting abbreviation at cardinality %f"
+				 " below threshold %f after " INT64_FORMAT " values (%d rows)",
+				 abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
+				 memtupcount);
+#endif
+		return true;
+	}
+
+#ifdef TRACE_SORT
+	if (trace_sort)
+		elog(LOG,
+			 "uuid_abbrev: cardinality %f after " INT64_FORMAT
+			 " values (%d rows)", abbr_card, uss->input_count, memtupcount);
+#endif
+
+	return false;
+}
+
+/*
+ * Conversion routine for sortsupport.  Converts original uuid representation
+ * to abbreviated key representation.  Our encoding strategy is simple -- pack
+ * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
+ * machines, the bytes are stored in reverse order), and treat it as an
+ * unsigned integer.
+ */
+static Datum
+uuid_abbrev_convert(Datum original, SortSupport ssup)
+{
+	uuid_sortsupport_state *uss = ssup->ssup_extra;
+	pg_uuid_t  *authoritative = DatumGetUUIDP(original);
+	Datum		res;
+
+	memcpy(&res, authoritative->data, sizeof(Datum));
+	uss->input_count += 1;
+
+	if (uss->estimating)
+	{
+		uint32		tmp;
+
+#if SIZEOF_DATUM == 8
+		tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
+#else							/* SIZEOF_DATUM != 8 */
+		tmp = (uint32) res;
+#endif
+
+		addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
+	}
+
+	/*
+	 * Byteswap on little-endian machines.
+	 *
+	 * This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
+	 * comparator) works correctly on all platforms.  If we didn't do this,
+	 * the comparator would have to call memcmp() with a pair of pointers to
+	 * the first byte of each abbreviated key, which is slower.
+	 */
+	res = DatumBigEndianToNative(res);
+
+	return res;
+}
+
+/* hash index support */
+Datum
+uuid_hash(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *key = PG_GETARG_UUID_P(0);
+
+	return hash_any(key->data, UUID_LEN);
+}
+
+Datum
+uuid_hash_extended(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *key = PG_GETARG_UUID_P(0);
+
+	return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
+}
+
+Datum
+gen_random_uuid(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *uuid = palloc(UUID_LEN);
+
+	if (!pg_strong_random(uuid, UUID_LEN))
+		ereport(ERROR,
+				(errcode(ERRCODE_INTERNAL_ERROR),
+				 errmsg("could not generate random values")));
+
+	/*
+	 * Set magic numbers for a "version 4" (pseudorandom) UUID, see
+	 * http://tools.ietf.org/html/rfc4122#section-4.4
+	 */
+	uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40;	/* time_hi_and_version */
+	uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;	/* clock_seq_hi_and_reserved */
+
+	PG_RETURN_UUID_P(uuid);
+}