summaryrefslogtreecommitdiffstats
path: root/src/common
diff options
context:
space:
mode:
Diffstat (limited to 'src/common')
-rw-r--r--src/common/.gitignore1
-rw-r--r--src/common/Makefile181
-rw-r--r--src/common/archive.c121
-rw-r--r--src/common/base64.c242
-rw-r--r--src/common/checksum_helper.c190
-rw-r--r--src/common/config_info.c201
-rw-r--r--src/common/controldata_utils.c263
-rw-r--r--src/common/d2s.c1076
-rw-r--r--src/common/d2s_full_table.h358
-rw-r--r--src/common/d2s_intrinsics.h202
-rw-r--r--src/common/digit_table.h21
-rw-r--r--src/common/encnames.c598
-rw-r--r--src/common/exec.c708
-rw-r--r--src/common/f2s.c803
-rw-r--r--src/common/fe_memutils.c176
-rw-r--r--src/common/file_perm.c91
-rw-r--r--src/common/file_utils.c391
-rw-r--r--src/common/hashfn.c692
-rw-r--r--src/common/ip.c259
-rw-r--r--src/common/jsonapi.c1132
-rw-r--r--src/common/keywords.c33
-rw-r--r--src/common/kwlist_d.h1072
-rw-r--r--src/common/kwlookup.c85
-rw-r--r--src/common/link-canary.c36
-rw-r--r--src/common/logging.c285
-rw-r--r--src/common/md5.c348
-rw-r--r--src/common/pg_lzcompress.c872
-rw-r--r--src/common/pgfnames.c94
-rw-r--r--src/common/protocol_openssl.c117
-rw-r--r--src/common/psprintf.c151
-rw-r--r--src/common/relpath.c210
-rw-r--r--src/common/restricted_token.c202
-rw-r--r--src/common/rmtree.c122
-rw-r--r--src/common/ryu_common.h133
-rw-r--r--src/common/saslprep.c1278
-rw-r--r--src/common/scram-common.c291
-rw-r--r--src/common/sha2.c1006
-rw-r--r--src/common/sha2_openssl.c102
-rw-r--r--src/common/string.c114
-rw-r--r--src/common/stringinfo.c343
-rw-r--r--src/common/unicode/.gitignore8
-rw-r--r--src/common/unicode/Makefile67
-rw-r--r--src/common/unicode/README28
-rw-r--r--src/common/unicode/generate-norm_test_table.pl106
-rw-r--r--src/common/unicode/generate-unicode_combining_table.pl53
-rw-r--r--src/common/unicode/generate-unicode_norm_table.pl234
-rw-r--r--src/common/unicode/generate-unicode_normprops_table.pl88
-rw-r--r--src/common/unicode/norm_test.c86
-rw-r--r--src/common/unicode_norm.c554
-rw-r--r--src/common/username.c87
-rw-r--r--src/common/wait_error.c119
-rw-r--r--src/common/wchar.c1609
52 files changed, 17639 insertions, 0 deletions
diff --git a/src/common/.gitignore b/src/common/.gitignore
new file mode 100644
index 0000000..ffa3284
--- /dev/null
+++ b/src/common/.gitignore
@@ -0,0 +1 @@
+/kwlist_d.h
diff --git a/src/common/Makefile b/src/common/Makefile
new file mode 100644
index 0000000..16619e4
--- /dev/null
+++ b/src/common/Makefile
@@ -0,0 +1,181 @@
+#-------------------------------------------------------------------------
+#
+# Makefile
+# Makefile for src/common
+#
+# These files are used by the Postgres backend, and also by frontend
+# programs. These files provide common functionality that isn't directly
+# concerned with portability and thus doesn't belong in src/port.
+#
+# This makefile generates three outputs:
+#
+# libpgcommon.a - contains object files with FRONTEND defined,
+# for use by client applications
+#
+# libpgcommon_shlib.a - contains object files with FRONTEND defined,
+# built suitably for use in shared libraries; for use
+# by frontend libraries
+#
+# libpgcommon_srv.a - contains object files without FRONTEND defined,
+# for use only by the backend
+#
+# IDENTIFICATION
+# src/common/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/common
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+# don't include subdirectory-path-dependent -I and -L switches
+STD_CPPFLAGS := $(filter-out -I$(top_srcdir)/src/include -I$(top_builddir)/src/include,$(CPPFLAGS))
+STD_LDFLAGS := $(filter-out -L$(top_builddir)/src/common -L$(top_builddir)/src/port,$(LDFLAGS))
+override CPPFLAGS += -DVAL_CC="\"$(CC)\""
+override CPPFLAGS += -DVAL_CPPFLAGS="\"$(STD_CPPFLAGS)\""
+override CPPFLAGS += -DVAL_CFLAGS="\"$(CFLAGS)\""
+override CPPFLAGS += -DVAL_CFLAGS_SL="\"$(CFLAGS_SL)\""
+override CPPFLAGS += -DVAL_LDFLAGS="\"$(STD_LDFLAGS)\""
+override CPPFLAGS += -DVAL_LDFLAGS_EX="\"$(LDFLAGS_EX)\""
+override CPPFLAGS += -DVAL_LDFLAGS_SL="\"$(LDFLAGS_SL)\""
+override CPPFLAGS += -DVAL_LIBS="\"$(LIBS)\""
+
+override CPPFLAGS := -DFRONTEND -I. -I$(top_srcdir)/src/common $(CPPFLAGS)
+LIBS += $(PTHREAD_LIBS)
+
+# If you add objects here, see also src/tools/msvc/Mkvcbuild.pm
+
+OBJS_COMMON = \
+ archive.o \
+ base64.o \
+ checksum_helper.o \
+ config_info.o \
+ controldata_utils.o \
+ d2s.o \
+ encnames.o \
+ exec.o \
+ f2s.o \
+ file_perm.o \
+ hashfn.o \
+ ip.o \
+ jsonapi.o \
+ keywords.o \
+ kwlookup.o \
+ link-canary.o \
+ md5.o \
+ pg_lzcompress.o \
+ pgfnames.o \
+ psprintf.o \
+ relpath.o \
+ rmtree.o \
+ saslprep.o \
+ scram-common.o \
+ string.o \
+ stringinfo.o \
+ unicode_norm.o \
+ username.o \
+ wait_error.o \
+ wchar.o
+
+ifeq ($(with_openssl),yes)
+OBJS_COMMON += \
+ protocol_openssl.o \
+ sha2_openssl.o
+else
+OBJS_COMMON += sha2.o
+endif
+
+# A few files are currently only built for frontend, not server
+# (Mkvcbuild.pm has a copy of this list, too)
+OBJS_FRONTEND = \
+ $(OBJS_COMMON) \
+ fe_memutils.o \
+ file_utils.o \
+ logging.o \
+ restricted_token.o
+
+# foo.o, foo_shlib.o, and foo_srv.o are all built from foo.c
+OBJS_SHLIB = $(OBJS_FRONTEND:%.o=%_shlib.o)
+OBJS_SRV = $(OBJS_COMMON:%.o=%_srv.o)
+
+# where to find gen_keywordlist.pl and subsidiary files
+TOOLSDIR = $(top_srcdir)/src/tools
+GEN_KEYWORDLIST = $(PERL) -I $(TOOLSDIR) $(TOOLSDIR)/gen_keywordlist.pl
+GEN_KEYWORDLIST_DEPS = $(TOOLSDIR)/gen_keywordlist.pl $(TOOLSDIR)/PerfectHash.pm
+
+all: libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a
+
+distprep: kwlist_d.h
+
+# libpgcommon is needed by some contrib
+install: all installdirs
+ $(INSTALL_STLIB) libpgcommon.a '$(DESTDIR)$(libdir)/libpgcommon.a'
+ $(INSTALL_STLIB) libpgcommon_shlib.a '$(DESTDIR)$(libdir)/libpgcommon_shlib.a'
+
+installdirs:
+ $(MKDIR_P) '$(DESTDIR)$(libdir)'
+
+uninstall:
+ rm -f '$(DESTDIR)$(libdir)/libpgcommon.a'
+ rm -f '$(DESTDIR)$(libdir)/libpgcommon_shlib.a'
+
+libpgcommon.a: $(OBJS_FRONTEND)
+ rm -f $@
+ $(AR) $(AROPT) $@ $^
+
+#
+# Shared library versions of object files
+#
+
+libpgcommon_shlib.a: $(OBJS_SHLIB)
+ rm -f $@
+ $(AR) $(AROPT) $@ $^
+
+# Because this uses its own compilation rule, it doesn't use the
+# dependency tracking logic from Makefile.global. To make sure that
+# dependency tracking works anyway for the *_shlib.o files, depend on
+# their *.o siblings as well, which do have proper dependencies. It's
+# a hack that might fail someday if there is a *_shlib.o without a
+# corresponding *.o, but there seems little reason for that.
+%_shlib.o: %.c %.o
+ $(CC) $(CFLAGS) $(CFLAGS_SL) $(CPPFLAGS) -c $< -o $@
+
+#
+# Server versions of object files
+#
+
+libpgcommon_srv.a: $(OBJS_SRV)
+ rm -f $@
+ $(AR) $(AROPT) $@ $^
+
+# Because this uses its own compilation rule, it doesn't use the
+# dependency tracking logic from Makefile.global. To make sure that
+# dependency tracking works anyway for the *_srv.o files, depend on
+# their *.o siblings as well, which do have proper dependencies. It's
+# a hack that might fail someday if there is a *_srv.o without a
+# corresponding *.o, but it works for now.
+%_srv.o: %.c %.o
+ $(CC) $(CFLAGS) $(subst -DFRONTEND,, $(CPPFLAGS)) -c $< -o $@
+
+# generate SQL keyword lookup table to be included into keywords*.o.
+kwlist_d.h: $(top_srcdir)/src/include/parser/kwlist.h $(GEN_KEYWORDLIST_DEPS)
+ $(GEN_KEYWORDLIST) --extern $<
+
+# Dependencies of keywords*.o need to be managed explicitly to make sure
+# that you don't get broken parsing code, even in a non-enable-depend build.
+keywords.o keywords_shlib.o keywords_srv.o: kwlist_d.h
+
+# The code imported from Ryu gets a pass on declaration-after-statement,
+# in order to keep it more closely aligned with its upstream.
+RYU_FILES = d2s.o f2s.o
+RYU_OBJS = $(RYU_FILES) $(RYU_FILES:%.o=%_shlib.o) $(RYU_FILES:%.o=%_srv.o)
+
+$(RYU_OBJS): CFLAGS += $(PERMIT_DECLARATION_AFTER_STATEMENT)
+
+# kwlist_d.h is in the distribution tarball, so it is not cleaned here.
+clean distclean:
+ rm -f libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a
+ rm -f $(OBJS_FRONTEND) $(OBJS_SHLIB) $(OBJS_SRV)
+
+maintainer-clean: distclean
+ rm -f kwlist_d.h
diff --git a/src/common/archive.c b/src/common/archive.c
new file mode 100644
index 0000000..a94e4d0
--- /dev/null
+++ b/src/common/archive.c
@@ -0,0 +1,121 @@
+/*-------------------------------------------------------------------------
+ *
+ * archive.c
+ * Common WAL archive routines
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/archive.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/archive.h"
+#include "lib/stringinfo.h"
+
+/*
+ * BuildRestoreCommand
+ *
+ * Builds a restore command to retrieve a file from WAL archives, replacing
+ * the supported aliases with values supplied by the caller as defined by
+ * the GUC parameter restore_command: xlogpath for %p, xlogfname for %f and
+ * lastRestartPointFname for %r.
+ *
+ * The result is a palloc'd string for the restore command built. The
+ * caller is responsible for freeing it. If any of the required arguments
+ * is NULL and that the corresponding alias is found in the command given
+ * by the caller, then NULL is returned.
+ */
+char *
+BuildRestoreCommand(const char *restoreCommand,
+ const char *xlogpath,
+ const char *xlogfname,
+ const char *lastRestartPointFname)
+{
+ StringInfoData result;
+ const char *sp;
+
+ /*
+ * Build the command to be executed.
+ */
+ initStringInfo(&result);
+
+ for (sp = restoreCommand; *sp; sp++)
+ {
+ if (*sp == '%')
+ {
+ switch (sp[1])
+ {
+ case 'p':
+ {
+ char *nativePath;
+
+ /* %p: relative path of target file */
+ if (xlogpath == NULL)
+ {
+ pfree(result.data);
+ return NULL;
+ }
+ sp++;
+
+ /*
+ * This needs to use a placeholder to not modify the
+ * input with the conversion done via
+ * make_native_path().
+ */
+ nativePath = pstrdup(xlogpath);
+ make_native_path(nativePath);
+ appendStringInfoString(&result,
+ nativePath);
+ pfree(nativePath);
+ break;
+ }
+ case 'f':
+ /* %f: filename of desired file */
+ if (xlogfname == NULL)
+ {
+ pfree(result.data);
+ return NULL;
+ }
+ sp++;
+ appendStringInfoString(&result, xlogfname);
+ break;
+ case 'r':
+ /* %r: filename of last restartpoint */
+ if (lastRestartPointFname == NULL)
+ {
+ pfree(result.data);
+ return NULL;
+ }
+ sp++;
+ appendStringInfoString(&result,
+ lastRestartPointFname);
+ break;
+ case '%':
+ /* convert %% to a single % */
+ sp++;
+ appendStringInfoChar(&result, *sp);
+ break;
+ default:
+ /* otherwise treat the % as not special */
+ appendStringInfoChar(&result, *sp);
+ break;
+ }
+ }
+ else
+ {
+ appendStringInfoChar(&result, *sp);
+ }
+ }
+
+ return result.data;
+}
diff --git a/src/common/base64.c b/src/common/base64.c
new file mode 100644
index 0000000..5ba5937
--- /dev/null
+++ b/src/common/base64.c
@@ -0,0 +1,242 @@
+/*-------------------------------------------------------------------------
+ *
+ * base64.c
+ * Encoding and decoding routines for base64 without whitespace.
+ *
+ * Copyright (c) 2001-2020, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/common/base64.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/base64.h"
+
+/*
+ * BASE64
+ */
+
+static const char _base64[] =
+"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+static const int8 b64lookup[128] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
+ -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
+};
+
+/*
+ * pg_b64_encode
+ *
+ * Encode into base64 the given string. Returns the length of the encoded
+ * string, and -1 in the event of an error with the result buffer zeroed
+ * for safety.
+ */
+int
+pg_b64_encode(const char *src, int len, char *dst, int dstlen)
+{
+ char *p;
+ const char *s,
+ *end = src + len;
+ int pos = 2;
+ uint32 buf = 0;
+
+ s = src;
+ p = dst;
+
+ while (s < end)
+ {
+ buf |= (unsigned char) *s << (pos << 3);
+ pos--;
+ s++;
+
+ /* write it out */
+ if (pos < 0)
+ {
+ /*
+ * Leave if there is an overflow in the area allocated for the
+ * encoded string.
+ */
+ if ((p - dst + 4) > dstlen)
+ goto error;
+
+ *p++ = _base64[(buf >> 18) & 0x3f];
+ *p++ = _base64[(buf >> 12) & 0x3f];
+ *p++ = _base64[(buf >> 6) & 0x3f];
+ *p++ = _base64[buf & 0x3f];
+
+ pos = 2;
+ buf = 0;
+ }
+ }
+ if (pos != 2)
+ {
+ /*
+ * Leave if there is an overflow in the area allocated for the encoded
+ * string.
+ */
+ if ((p - dst + 4) > dstlen)
+ goto error;
+
+ *p++ = _base64[(buf >> 18) & 0x3f];
+ *p++ = _base64[(buf >> 12) & 0x3f];
+ *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
+ *p++ = '=';
+ }
+
+ Assert((p - dst) <= dstlen);
+ return p - dst;
+
+error:
+ memset(dst, 0, dstlen);
+ return -1;
+}
+
+/*
+ * pg_b64_decode
+ *
+ * Decode the given base64 string. Returns the length of the decoded
+ * string on success, and -1 in the event of an error with the result
+ * buffer zeroed for safety.
+ */
+int
+pg_b64_decode(const char *src, int len, char *dst, int dstlen)
+{
+ const char *srcend = src + len,
+ *s = src;
+ char *p = dst;
+ char c;
+ int b = 0;
+ uint32 buf = 0;
+ int pos = 0,
+ end = 0;
+
+ while (s < srcend)
+ {
+ c = *s++;
+
+ /* Leave if a whitespace is found */
+ if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
+ goto error;
+
+ if (c == '=')
+ {
+ /* end sequence */
+ if (!end)
+ {
+ if (pos == 2)
+ end = 1;
+ else if (pos == 3)
+ end = 2;
+ else
+ {
+ /*
+ * Unexpected "=" character found while decoding base64
+ * sequence.
+ */
+ goto error;
+ }
+ }
+ b = 0;
+ }
+ else
+ {
+ b = -1;
+ if (c > 0 && c < 127)
+ b = b64lookup[(unsigned char) c];
+ if (b < 0)
+ {
+ /* invalid symbol found */
+ goto error;
+ }
+ }
+ /* add it to buffer */
+ buf = (buf << 6) + b;
+ pos++;
+ if (pos == 4)
+ {
+ /*
+ * Leave if there is an overflow in the area allocated for the
+ * decoded string.
+ */
+ if ((p - dst + 1) > dstlen)
+ goto error;
+ *p++ = (buf >> 16) & 255;
+
+ if (end == 0 || end > 1)
+ {
+ /* overflow check */
+ if ((p - dst + 1) > dstlen)
+ goto error;
+ *p++ = (buf >> 8) & 255;
+ }
+ if (end == 0 || end > 2)
+ {
+ /* overflow check */
+ if ((p - dst + 1) > dstlen)
+ goto error;
+ *p++ = buf & 255;
+ }
+ buf = 0;
+ pos = 0;
+ }
+ }
+
+ if (pos != 0)
+ {
+ /*
+ * base64 end sequence is invalid. Input data is missing padding, is
+ * truncated or is otherwise corrupted.
+ */
+ goto error;
+ }
+
+ Assert((p - dst) <= dstlen);
+ return p - dst;
+
+error:
+ memset(dst, 0, dstlen);
+ return -1;
+}
+
+/*
+ * pg_b64_enc_len
+ *
+ * Returns to caller the length of the string if it were encoded with
+ * base64 based on the length provided by caller. This is useful to
+ * estimate how large a buffer allocation needs to be done before doing
+ * the actual encoding.
+ */
+int
+pg_b64_enc_len(int srclen)
+{
+ /* 3 bytes will be converted to 4 */
+ return (srclen + 2) * 4 / 3;
+}
+
+/*
+ * pg_b64_dec_len
+ *
+ * Returns to caller the length of the string if it were to be decoded
+ * with base64, based on the length given by caller. This is useful to
+ * estimate how large a buffer allocation needs to be done before doing
+ * the actual decoding.
+ */
+int
+pg_b64_dec_len(int srclen)
+{
+ return (srclen * 3) >> 2;
+}
diff --git a/src/common/checksum_helper.c b/src/common/checksum_helper.c
new file mode 100644
index 0000000..79a9a74
--- /dev/null
+++ b/src/common/checksum_helper.c
@@ -0,0 +1,190 @@
+/*-------------------------------------------------------------------------
+ *
+ * checksum_helper.c
+ * Compute a checksum of any of various types using common routines
+ *
+ * Portions Copyright (c) 2016-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/checksum_helper.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/checksum_helper.h"
+
+/*
+ * If 'name' is a recognized checksum type, set *type to the corresponding
+ * constant and return true. Otherwise, set *type to CHECKSUM_TYPE_NONE and
+ * return false.
+ */
+bool
+pg_checksum_parse_type(char *name, pg_checksum_type *type)
+{
+ pg_checksum_type result_type = CHECKSUM_TYPE_NONE;
+ bool result = true;
+
+ if (pg_strcasecmp(name, "none") == 0)
+ result_type = CHECKSUM_TYPE_NONE;
+ else if (pg_strcasecmp(name, "crc32c") == 0)
+ result_type = CHECKSUM_TYPE_CRC32C;
+ else if (pg_strcasecmp(name, "sha224") == 0)
+ result_type = CHECKSUM_TYPE_SHA224;
+ else if (pg_strcasecmp(name, "sha256") == 0)
+ result_type = CHECKSUM_TYPE_SHA256;
+ else if (pg_strcasecmp(name, "sha384") == 0)
+ result_type = CHECKSUM_TYPE_SHA384;
+ else if (pg_strcasecmp(name, "sha512") == 0)
+ result_type = CHECKSUM_TYPE_SHA512;
+ else
+ result = false;
+
+ *type = result_type;
+ return result;
+}
+
+/*
+ * Get the canonical human-readable name corresponding to a checksum type.
+ */
+char *
+pg_checksum_type_name(pg_checksum_type type)
+{
+ switch (type)
+ {
+ case CHECKSUM_TYPE_NONE:
+ return "NONE";
+ case CHECKSUM_TYPE_CRC32C:
+ return "CRC32C";
+ case CHECKSUM_TYPE_SHA224:
+ return "SHA224";
+ case CHECKSUM_TYPE_SHA256:
+ return "SHA256";
+ case CHECKSUM_TYPE_SHA384:
+ return "SHA384";
+ case CHECKSUM_TYPE_SHA512:
+ return "SHA512";
+ }
+
+ Assert(false);
+ return "???";
+}
+
+/*
+ * Initialize a checksum context for checksums of the given type.
+ */
+void
+pg_checksum_init(pg_checksum_context *context, pg_checksum_type type)
+{
+ context->type = type;
+
+ switch (type)
+ {
+ case CHECKSUM_TYPE_NONE:
+ /* do nothing */
+ break;
+ case CHECKSUM_TYPE_CRC32C:
+ INIT_CRC32C(context->raw_context.c_crc32c);
+ break;
+ case CHECKSUM_TYPE_SHA224:
+ pg_sha224_init(&context->raw_context.c_sha224);
+ break;
+ case CHECKSUM_TYPE_SHA256:
+ pg_sha256_init(&context->raw_context.c_sha256);
+ break;
+ case CHECKSUM_TYPE_SHA384:
+ pg_sha384_init(&context->raw_context.c_sha384);
+ break;
+ case CHECKSUM_TYPE_SHA512:
+ pg_sha512_init(&context->raw_context.c_sha512);
+ break;
+ }
+}
+
+/*
+ * Update a checksum context with new data.
+ */
+void
+pg_checksum_update(pg_checksum_context *context, const uint8 *input,
+ size_t len)
+{
+ switch (context->type)
+ {
+ case CHECKSUM_TYPE_NONE:
+ /* do nothing */
+ break;
+ case CHECKSUM_TYPE_CRC32C:
+ COMP_CRC32C(context->raw_context.c_crc32c, input, len);
+ break;
+ case CHECKSUM_TYPE_SHA224:
+ pg_sha224_update(&context->raw_context.c_sha224, input, len);
+ break;
+ case CHECKSUM_TYPE_SHA256:
+ pg_sha256_update(&context->raw_context.c_sha256, input, len);
+ break;
+ case CHECKSUM_TYPE_SHA384:
+ pg_sha384_update(&context->raw_context.c_sha384, input, len);
+ break;
+ case CHECKSUM_TYPE_SHA512:
+ pg_sha512_update(&context->raw_context.c_sha512, input, len);
+ break;
+ }
+}
+
+/*
+ * Finalize a checksum computation and write the result to an output buffer.
+ *
+ * The caller must ensure that the buffer is at least PG_CHECKSUM_MAX_LENGTH
+ * bytes in length. The return value is the number of bytes actually written.
+ */
+int
+pg_checksum_final(pg_checksum_context *context, uint8 *output)
+{
+ int retval = 0;
+
+ StaticAssertStmt(sizeof(pg_crc32c) <= PG_CHECKSUM_MAX_LENGTH,
+ "CRC-32C digest too big for PG_CHECKSUM_MAX_LENGTH");
+ StaticAssertStmt(PG_SHA224_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH,
+ "SHA224 digest too for PG_CHECKSUM_MAX_LENGTH");
+ StaticAssertStmt(PG_SHA256_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH,
+ "SHA256 digest too for PG_CHECKSUM_MAX_LENGTH");
+ StaticAssertStmt(PG_SHA384_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH,
+ "SHA384 digest too for PG_CHECKSUM_MAX_LENGTH");
+ StaticAssertStmt(PG_SHA512_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH,
+ "SHA512 digest too for PG_CHECKSUM_MAX_LENGTH");
+
+ switch (context->type)
+ {
+ case CHECKSUM_TYPE_NONE:
+ break;
+ case CHECKSUM_TYPE_CRC32C:
+ FIN_CRC32C(context->raw_context.c_crc32c);
+ retval = sizeof(pg_crc32c);
+ memcpy(output, &context->raw_context.c_crc32c, retval);
+ break;
+ case CHECKSUM_TYPE_SHA224:
+ pg_sha224_final(&context->raw_context.c_sha224, output);
+ retval = PG_SHA224_DIGEST_LENGTH;
+ break;
+ case CHECKSUM_TYPE_SHA256:
+ pg_sha256_final(&context->raw_context.c_sha256, output);
+ retval = PG_SHA256_DIGEST_LENGTH;
+ break;
+ case CHECKSUM_TYPE_SHA384:
+ pg_sha384_final(&context->raw_context.c_sha384, output);
+ retval = PG_SHA384_DIGEST_LENGTH;
+ break;
+ case CHECKSUM_TYPE_SHA512:
+ pg_sha512_final(&context->raw_context.c_sha512, output);
+ retval = PG_SHA512_DIGEST_LENGTH;
+ break;
+ }
+
+ Assert(retval <= PG_CHECKSUM_MAX_LENGTH);
+ return retval;
+}
diff --git a/src/common/config_info.c b/src/common/config_info.c
new file mode 100644
index 0000000..8a5dc64
--- /dev/null
+++ b/src/common/config_info.c
@@ -0,0 +1,201 @@
+/*-------------------------------------------------------------------------
+ *
+ * config_info.c
+ * Common code for pg_config output
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/config_info.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/config_info.h"
+
+
+/*
+ * get_configdata(const char *my_exec_path, size_t *configdata_len)
+ *
+ * Get configure-time constants. The caller is responsible
+ * for pfreeing the result.
+ */
+ConfigData *
+get_configdata(const char *my_exec_path, size_t *configdata_len)
+{
+ ConfigData *configdata;
+ char path[MAXPGPATH];
+ char *lastsep;
+ int i = 0;
+
+ /* Adjust this to match the number of items filled below */
+ *configdata_len = 23;
+ configdata = (ConfigData *) palloc(*configdata_len * sizeof(ConfigData));
+
+ configdata[i].name = pstrdup("BINDIR");
+ strlcpy(path, my_exec_path, sizeof(path));
+ lastsep = strrchr(path, '/');
+ if (lastsep)
+ *lastsep = '\0';
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("DOCDIR");
+ get_doc_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("HTMLDIR");
+ get_html_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("INCLUDEDIR");
+ get_include_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("PKGINCLUDEDIR");
+ get_pkginclude_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("INCLUDEDIR-SERVER");
+ get_includeserver_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("LIBDIR");
+ get_lib_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("PKGLIBDIR");
+ get_pkglib_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("LOCALEDIR");
+ get_locale_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("MANDIR");
+ get_man_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("SHAREDIR");
+ get_share_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("SYSCONFDIR");
+ get_etc_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("PGXS");
+ get_pkglib_path(my_exec_path, path);
+ strlcat(path, "/pgxs/src/makefiles/pgxs.mk", sizeof(path));
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("CONFIGURE");
+ configdata[i].setting = pstrdup(CONFIGURE_ARGS);
+ i++;
+
+ configdata[i].name = pstrdup("CC");
+#ifdef VAL_CC
+ configdata[i].setting = pstrdup(VAL_CC);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("CPPFLAGS");
+#ifdef VAL_CPPFLAGS
+ configdata[i].setting = pstrdup(VAL_CPPFLAGS);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("CFLAGS");
+#ifdef VAL_CFLAGS
+ configdata[i].setting = pstrdup(VAL_CFLAGS);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("CFLAGS_SL");
+#ifdef VAL_CFLAGS_SL
+ configdata[i].setting = pstrdup(VAL_CFLAGS_SL);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("LDFLAGS");
+#ifdef VAL_LDFLAGS
+ configdata[i].setting = pstrdup(VAL_LDFLAGS);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("LDFLAGS_EX");
+#ifdef VAL_LDFLAGS_EX
+ configdata[i].setting = pstrdup(VAL_LDFLAGS_EX);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("LDFLAGS_SL");
+#ifdef VAL_LDFLAGS_SL
+ configdata[i].setting = pstrdup(VAL_LDFLAGS_SL);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("LIBS");
+#ifdef VAL_LIBS
+ configdata[i].setting = pstrdup(VAL_LIBS);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("VERSION");
+ configdata[i].setting = pstrdup("PostgreSQL " PG_VERSION);
+ i++;
+
+ Assert(i == *configdata_len);
+
+ return configdata;
+}
diff --git a/src/common/controldata_utils.c b/src/common/controldata_utils.c
new file mode 100644
index 0000000..3d53b6a
--- /dev/null
+++ b/src/common/controldata_utils.c
@@ -0,0 +1,263 @@
+/*-------------------------------------------------------------------------
+ *
+ * controldata_utils.c
+ * Common code for control data file output.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/controldata_utils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "access/xlog_internal.h"
+#include "catalog/pg_control.h"
+#include "common/controldata_utils.h"
+#include "common/file_perm.h"
+#ifdef FRONTEND
+#include "common/logging.h"
+#endif
+#include "port/pg_crc32c.h"
+
+#ifndef FRONTEND
+#include "pgstat.h"
+#include "storage/fd.h"
+#endif
+
+/*
+ * get_controlfile()
+ *
+ * Get controlfile values. The result is returned as a palloc'd copy of the
+ * control file data.
+ *
+ * crc_ok_p can be used by the caller to see whether the CRC of the control
+ * file data is correct.
+ */
+ControlFileData *
+get_controlfile(const char *DataDir, bool *crc_ok_p)
+{
+ ControlFileData *ControlFile;
+ int fd;
+ char ControlFilePath[MAXPGPATH];
+ pg_crc32c crc;
+ int r;
+
+ AssertArg(crc_ok_p);
+
+ ControlFile = palloc(sizeof(ControlFileData));
+ snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir);
+
+#ifndef FRONTEND
+ if ((fd = OpenTransientFile(ControlFilePath, O_RDONLY | PG_BINARY)) == -1)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\" for reading: %m",
+ ControlFilePath)));
+#else
+ if ((fd = open(ControlFilePath, O_RDONLY | PG_BINARY, 0)) == -1)
+ {
+ pg_log_fatal("could not open file \"%s\" for reading: %m",
+ ControlFilePath);
+ exit(EXIT_FAILURE);
+ }
+#endif
+
+ r = read(fd, ControlFile, sizeof(ControlFileData));
+ if (r != sizeof(ControlFileData))
+ {
+ if (r < 0)
+#ifndef FRONTEND
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m", ControlFilePath)));
+#else
+ {
+ pg_log_fatal("could not read file \"%s\": %m", ControlFilePath);
+ exit(EXIT_FAILURE);
+ }
+#endif
+ else
+#ifndef FRONTEND
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("could not read file \"%s\": read %d of %zu",
+ ControlFilePath, r, sizeof(ControlFileData))));
+#else
+ {
+ pg_log_fatal("could not read file \"%s\": read %d of %zu",
+ ControlFilePath, r, sizeof(ControlFileData));
+ exit(EXIT_FAILURE);
+ }
+#endif
+ }
+
+#ifndef FRONTEND
+ if (CloseTransientFile(fd) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close file \"%s\": %m",
+ ControlFilePath)));
+#else
+ if (close(fd) != 0)
+ {
+ pg_log_fatal("could not close file \"%s\": %m", ControlFilePath);
+ exit(EXIT_FAILURE);
+ }
+#endif
+
+ /* Check the CRC. */
+ INIT_CRC32C(crc);
+ COMP_CRC32C(crc,
+ (char *) ControlFile,
+ offsetof(ControlFileData, crc));
+ FIN_CRC32C(crc);
+
+ *crc_ok_p = EQ_CRC32C(crc, ControlFile->crc);
+
+ /* Make sure the control file is valid byte order. */
+ if (ControlFile->pg_control_version % 65536 == 0 &&
+ ControlFile->pg_control_version / 65536 != 0)
+#ifndef FRONTEND
+ elog(ERROR, _("byte ordering mismatch"));
+#else
+ pg_log_warning("possible byte ordering mismatch\n"
+ "The byte ordering used to store the pg_control file might not match the one\n"
+ "used by this program. In that case the results below would be incorrect, and\n"
+ "the PostgreSQL installation would be incompatible with this data directory.");
+#endif
+
+ return ControlFile;
+}
+
+/*
+ * update_controlfile()
+ *
+ * Update controlfile values with the contents given by caller. The
+ * contents to write are included in "ControlFile". "do_sync" can be
+ * optionally used to flush the updated control file. Note that it is up
+ * to the caller to properly lock ControlFileLock when calling this
+ * routine in the backend.
+ */
+void
+update_controlfile(const char *DataDir,
+ ControlFileData *ControlFile, bool do_sync)
+{
+ int fd;
+ char buffer[PG_CONTROL_FILE_SIZE];
+ char ControlFilePath[MAXPGPATH];
+
+ /*
+ * Apply the same static assertions as in backend's WriteControlFile().
+ */
+ StaticAssertStmt(sizeof(ControlFileData) <= PG_CONTROL_MAX_SAFE_SIZE,
+ "pg_control is too large for atomic disk writes");
+ StaticAssertStmt(sizeof(ControlFileData) <= PG_CONTROL_FILE_SIZE,
+ "sizeof(ControlFileData) exceeds PG_CONTROL_FILE_SIZE");
+
+ /* Recalculate CRC of control file */
+ INIT_CRC32C(ControlFile->crc);
+ COMP_CRC32C(ControlFile->crc,
+ (char *) ControlFile,
+ offsetof(ControlFileData, crc));
+ FIN_CRC32C(ControlFile->crc);
+
+ /*
+ * Write out PG_CONTROL_FILE_SIZE bytes into pg_control by zero-padding
+ * the excess over sizeof(ControlFileData), to avoid premature EOF related
+ * errors when reading it.
+ */
+ memset(buffer, 0, PG_CONTROL_FILE_SIZE);
+ memcpy(buffer, ControlFile, sizeof(ControlFileData));
+
+ snprintf(ControlFilePath, sizeof(ControlFilePath), "%s/%s", DataDir, XLOG_CONTROL_FILE);
+
+#ifndef FRONTEND
+
+ /*
+ * All errors issue a PANIC, so no need to use OpenTransientFile() and to
+ * worry about file descriptor leaks.
+ */
+ if ((fd = BasicOpenFile(ControlFilePath, O_RDWR | PG_BINARY)) < 0)
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m",
+ ControlFilePath)));
+#else
+ if ((fd = open(ControlFilePath, O_WRONLY | PG_BINARY,
+ pg_file_create_mode)) == -1)
+ {
+ pg_log_fatal("could not open file \"%s\": %m", ControlFilePath);
+ exit(EXIT_FAILURE);
+ }
+#endif
+
+ errno = 0;
+#ifndef FRONTEND
+ pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE);
+#endif
+ if (write(fd, buffer, PG_CONTROL_FILE_SIZE) != PG_CONTROL_FILE_SIZE)
+ {
+ /* if write didn't set errno, assume problem is no disk space */
+ if (errno == 0)
+ errno = ENOSPC;
+
+#ifndef FRONTEND
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ ControlFilePath)));
+#else
+ pg_log_fatal("could not write file \"%s\": %m", ControlFilePath);
+ exit(EXIT_FAILURE);
+#endif
+ }
+#ifndef FRONTEND
+ pgstat_report_wait_end();
+#endif
+
+ if (do_sync)
+ {
+#ifndef FRONTEND
+ pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE);
+ if (pg_fsync(fd) != 0)
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not fsync file \"%s\": %m",
+ ControlFilePath)));
+ pgstat_report_wait_end();
+#else
+ if (fsync(fd) != 0)
+ {
+ pg_log_fatal("could not fsync file \"%s\": %m", ControlFilePath);
+ exit(EXIT_FAILURE);
+ }
+#endif
+ }
+
+ if (close(fd) != 0)
+ {
+#ifndef FRONTEND
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not close file \"%s\": %m",
+ ControlFilePath)));
+#else
+ pg_log_fatal("could not close file \"%s\": %m", ControlFilePath);
+ exit(EXIT_FAILURE);
+#endif
+ }
+}
diff --git a/src/common/d2s.c b/src/common/d2s.c
new file mode 100644
index 0000000..8b0b7ce
--- /dev/null
+++ b/src/common/d2s.c
@@ -0,0 +1,1076 @@
+/*---------------------------------------------------------------------------
+ *
+ * Ryu floating-point output for double precision.
+ *
+ * Portions Copyright (c) 2018-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/d2s.c
+ *
+ * This is a modification of code taken from github.com/ulfjack/ryu under the
+ * terms of the Boost license (not the Apache license). The original copyright
+ * notice follows:
+ *
+ * Copyright 2018 Ulf Adams
+ *
+ * The contents of this file may be used under the terms of the Apache
+ * License, Version 2.0.
+ *
+ * (See accompanying file LICENSE-Apache or copy at
+ * http://www.apache.org/licenses/LICENSE-2.0)
+ *
+ * Alternatively, the contents of this file may be used under the terms of the
+ * Boost Software License, Version 1.0.
+ *
+ * (See accompanying file LICENSE-Boost or copy at
+ * https://www.boost.org/LICENSE_1_0.txt)
+ *
+ * Unless required by applicable law or agreed to in writing, this software is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+/*
+ * Runtime compiler options:
+ *
+ * -DRYU_ONLY_64_BIT_OPS Avoid using uint128 or 64-bit intrinsics. Slower,
+ * depending on your compiler.
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/shortest_dec.h"
+
+/*
+ * For consistency, we use 128-bit types if and only if the rest of PG also
+ * does, even though we could use them here without worrying about the
+ * alignment concerns that apply elsewhere.
+ */
+#if !defined(HAVE_INT128) && defined(_MSC_VER) \
+ && !defined(RYU_ONLY_64_BIT_OPS) && defined(_M_X64)
+#define HAS_64_BIT_INTRINSICS
+#endif
+
+#include "ryu_common.h"
+#include "digit_table.h"
+#include "d2s_full_table.h"
+#include "d2s_intrinsics.h"
+
+#define DOUBLE_MANTISSA_BITS 52
+#define DOUBLE_EXPONENT_BITS 11
+#define DOUBLE_BIAS 1023
+
+#define DOUBLE_POW5_INV_BITCOUNT 122
+#define DOUBLE_POW5_BITCOUNT 121
+
+
+static inline uint32
+pow5Factor(uint64 value)
+{
+ uint32 count = 0;
+
+ for (;;)
+ {
+ Assert(value != 0);
+ const uint64 q = div5(value);
+ const uint32 r = (uint32) (value - 5 * q);
+
+ if (r != 0)
+ break;
+
+ value = q;
+ ++count;
+ }
+ return count;
+}
+
+/* Returns true if value is divisible by 5^p. */
+static inline bool
+multipleOfPowerOf5(const uint64 value, const uint32 p)
+{
+ /*
+ * I tried a case distinction on p, but there was no performance
+ * difference.
+ */
+ return pow5Factor(value) >= p;
+}
+
+/* Returns true if value is divisible by 2^p. */
+static inline bool
+multipleOfPowerOf2(const uint64 value, const uint32 p)
+{
+ /* return __builtin_ctzll(value) >= p; */
+ return (value & ((UINT64CONST(1) << p) - 1)) == 0;
+}
+
+/*
+ * We need a 64x128-bit multiplication and a subsequent 128-bit shift.
+ *
+ * Multiplication:
+ *
+ * The 64-bit factor is variable and passed in, the 128-bit factor comes
+ * from a lookup table. We know that the 64-bit factor only has 55
+ * significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
+ * factor only has 124 significant bits (i.e., the 4 topmost bits are
+ * zeros).
+ *
+ * Shift:
+ *
+ * In principle, the multiplication result requires 55 + 124 = 179 bits to
+ * represent. However, we then shift this value to the right by j, which is
+ * at least j >= 115, so the result is guaranteed to fit into 179 - 115 =
+ * 64 bits. This means that we only need the topmost 64 significant bits of
+ * the 64x128-bit multiplication.
+ *
+ * There are several ways to do this:
+ *
+ * 1. Best case: the compiler exposes a 128-bit type.
+ * We perform two 64x64-bit multiplications, add the higher 64 bits of the
+ * lower result to the higher result, and shift by j - 64 bits.
+ *
+ * We explicitly cast from 64-bit to 128-bit, so the compiler can tell
+ * that these are only 64-bit inputs, and can map these to the best
+ * possible sequence of assembly instructions. x86-64 machines happen to
+ * have matching assembly instructions for 64x64-bit multiplications and
+ * 128-bit shifts.
+ *
+ * 2. Second best case: the compiler exposes intrinsics for the x86-64
+ * assembly instructions mentioned in 1.
+ *
+ * 3. We only have 64x64 bit instructions that return the lower 64 bits of
+ * the result, i.e., we have to use plain C.
+ *
+ * Our inputs are less than the full width, so we have three options:
+ * a. Ignore this fact and just implement the intrinsics manually.
+ * b. Split both into 31-bit pieces, which guarantees no internal
+ * overflow, but requires extra work upfront (unless we change the
+ * lookup table).
+ * c. Split only the first factor into 31-bit pieces, which also
+ * guarantees no internal overflow, but requires extra work since the
+ * intermediate results are not perfectly aligned.
+ */
+#if defined(HAVE_INT128)
+
+/* Best case: use 128-bit type. */
+static inline uint64
+mulShift(const uint64 m, const uint64 *const mul, const int32 j)
+{
+ const uint128 b0 = ((uint128) m) * mul[0];
+ const uint128 b2 = ((uint128) m) * mul[1];
+
+ return (uint64) (((b0 >> 64) + b2) >> (j - 64));
+}
+
+static inline uint64
+mulShiftAll(const uint64 m, const uint64 *const mul, const int32 j,
+ uint64 *const vp, uint64 *const vm, const uint32 mmShift)
+{
+ *vp = mulShift(4 * m + 2, mul, j);
+ *vm = mulShift(4 * m - 1 - mmShift, mul, j);
+ return mulShift(4 * m, mul, j);
+}
+
+#elif defined(HAS_64_BIT_INTRINSICS)
+
+static inline uint64
+mulShift(const uint64 m, const uint64 *const mul, const int32 j)
+{
+ /* m is maximum 55 bits */
+ uint64 high1;
+
+ /* 128 */
+ const uint64 low1 = umul128(m, mul[1], &high1);
+
+ /* 64 */
+ uint64 high0;
+ uint64 sum;
+
+ /* 64 */
+ umul128(m, mul[0], &high0);
+ /* 0 */
+ sum = high0 + low1;
+
+ if (sum < high0)
+ {
+ ++high1;
+ /* overflow into high1 */
+ }
+ return shiftright128(sum, high1, j - 64);
+}
+
+static inline uint64
+mulShiftAll(const uint64 m, const uint64 *const mul, const int32 j,
+ uint64 *const vp, uint64 *const vm, const uint32 mmShift)
+{
+ *vp = mulShift(4 * m + 2, mul, j);
+ *vm = mulShift(4 * m - 1 - mmShift, mul, j);
+ return mulShift(4 * m, mul, j);
+}
+
+#else /* // !defined(HAVE_INT128) &&
+ * !defined(HAS_64_BIT_INTRINSICS) */
+
+static inline uint64
+mulShiftAll(uint64 m, const uint64 *const mul, const int32 j,
+ uint64 *const vp, uint64 *const vm, const uint32 mmShift)
+{
+ m <<= 1; /* m is maximum 55 bits */
+
+ uint64 tmp;
+ const uint64 lo = umul128(m, mul[0], &tmp);
+ uint64 hi;
+ const uint64 mid = tmp + umul128(m, mul[1], &hi);
+
+ hi += mid < tmp; /* overflow into hi */
+
+ const uint64 lo2 = lo + mul[0];
+ const uint64 mid2 = mid + mul[1] + (lo2 < lo);
+ const uint64 hi2 = hi + (mid2 < mid);
+
+ *vp = shiftright128(mid2, hi2, j - 64 - 1);
+
+ if (mmShift == 1)
+ {
+ const uint64 lo3 = lo - mul[0];
+ const uint64 mid3 = mid - mul[1] - (lo3 > lo);
+ const uint64 hi3 = hi - (mid3 > mid);
+
+ *vm = shiftright128(mid3, hi3, j - 64 - 1);
+ }
+ else
+ {
+ const uint64 lo3 = lo + lo;
+ const uint64 mid3 = mid + mid + (lo3 < lo);
+ const uint64 hi3 = hi + hi + (mid3 < mid);
+ const uint64 lo4 = lo3 - mul[0];
+ const uint64 mid4 = mid3 - mul[1] - (lo4 > lo3);
+ const uint64 hi4 = hi3 - (mid4 > mid3);
+
+ *vm = shiftright128(mid4, hi4, j - 64);
+ }
+
+ return shiftright128(mid, hi, j - 64 - 1);
+}
+
+#endif /* // HAS_64_BIT_INTRINSICS */
+
+static inline uint32
+decimalLength(const uint64 v)
+{
+ /* This is slightly faster than a loop. */
+ /* The average output length is 16.38 digits, so we check high-to-low. */
+ /* Function precondition: v is not an 18, 19, or 20-digit number. */
+ /* (17 digits are sufficient for round-tripping.) */
+ Assert(v < 100000000000000000L);
+ if (v >= 10000000000000000L)
+ {
+ return 17;
+ }
+ if (v >= 1000000000000000L)
+ {
+ return 16;
+ }
+ if (v >= 100000000000000L)
+ {
+ return 15;
+ }
+ if (v >= 10000000000000L)
+ {
+ return 14;
+ }
+ if (v >= 1000000000000L)
+ {
+ return 13;
+ }
+ if (v >= 100000000000L)
+ {
+ return 12;
+ }
+ if (v >= 10000000000L)
+ {
+ return 11;
+ }
+ if (v >= 1000000000L)
+ {
+ return 10;
+ }
+ if (v >= 100000000L)
+ {
+ return 9;
+ }
+ if (v >= 10000000L)
+ {
+ return 8;
+ }
+ if (v >= 1000000L)
+ {
+ return 7;
+ }
+ if (v >= 100000L)
+ {
+ return 6;
+ }
+ if (v >= 10000L)
+ {
+ return 5;
+ }
+ if (v >= 1000L)
+ {
+ return 4;
+ }
+ if (v >= 100L)
+ {
+ return 3;
+ }
+ if (v >= 10L)
+ {
+ return 2;
+ }
+ return 1;
+}
+
+/* A floating decimal representing m * 10^e. */
+typedef struct floating_decimal_64
+{
+ uint64 mantissa;
+ int32 exponent;
+} floating_decimal_64;
+
+static inline floating_decimal_64
+d2d(const uint64 ieeeMantissa, const uint32 ieeeExponent)
+{
+ int32 e2;
+ uint64 m2;
+
+ if (ieeeExponent == 0)
+ {
+ /* We subtract 2 so that the bounds computation has 2 additional bits. */
+ e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
+ m2 = ieeeMantissa;
+ }
+ else
+ {
+ e2 = ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
+ m2 = (UINT64CONST(1) << DOUBLE_MANTISSA_BITS) | ieeeMantissa;
+ }
+
+#if STRICTLY_SHORTEST
+ const bool even = (m2 & 1) == 0;
+ const bool acceptBounds = even;
+#else
+ const bool acceptBounds = false;
+#endif
+
+ /* Step 2: Determine the interval of legal decimal representations. */
+ const uint64 mv = 4 * m2;
+
+ /* Implicit bool -> int conversion. True is 1, false is 0. */
+ const uint32 mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
+
+ /* We would compute mp and mm like this: */
+ /* uint64 mp = 4 * m2 + 2; */
+ /* uint64 mm = mv - 1 - mmShift; */
+
+ /* Step 3: Convert to a decimal power base using 128-bit arithmetic. */
+ uint64 vr,
+ vp,
+ vm;
+ int32 e10;
+ bool vmIsTrailingZeros = false;
+ bool vrIsTrailingZeros = false;
+
+ if (e2 >= 0)
+ {
+ /*
+ * I tried special-casing q == 0, but there was no effect on
+ * performance.
+ *
+ * This expr is slightly faster than max(0, log10Pow2(e2) - 1).
+ */
+ const uint32 q = log10Pow2(e2) - (e2 > 3);
+ const int32 k = DOUBLE_POW5_INV_BITCOUNT + pow5bits(q) - 1;
+ const int32 i = -e2 + q + k;
+
+ e10 = q;
+
+ vr = mulShiftAll(m2, DOUBLE_POW5_INV_SPLIT[q], i, &vp, &vm, mmShift);
+
+ if (q <= 21)
+ {
+ /*
+ * This should use q <= 22, but I think 21 is also safe. Smaller
+ * values may still be safe, but it's more difficult to reason
+ * about them.
+ *
+ * Only one of mp, mv, and mm can be a multiple of 5, if any.
+ */
+ const uint32 mvMod5 = (uint32) (mv - 5 * div5(mv));
+
+ if (mvMod5 == 0)
+ {
+ vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
+ }
+ else if (acceptBounds)
+ {
+ /*----
+ * Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q
+ * <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q
+ * <=> true && pow5Factor(mm) >= q, since e2 >= q.
+ *----
+ */
+ vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q);
+ }
+ else
+ {
+ /* Same as min(e2 + 1, pow5Factor(mp)) >= q. */
+ vp -= multipleOfPowerOf5(mv + 2, q);
+ }
+ }
+ }
+ else
+ {
+ /*
+ * This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
+ */
+ const uint32 q = log10Pow5(-e2) - (-e2 > 1);
+ const int32 i = -e2 - q;
+ const int32 k = pow5bits(i) - DOUBLE_POW5_BITCOUNT;
+ const int32 j = q - k;
+
+ e10 = q + e2;
+
+ vr = mulShiftAll(m2, DOUBLE_POW5_SPLIT[i], j, &vp, &vm, mmShift);
+
+ if (q <= 1)
+ {
+ /*
+ * {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q
+ * trailing 0 bits.
+ */
+ /* mv = 4 * m2, so it always has at least two trailing 0 bits. */
+ vrIsTrailingZeros = true;
+ if (acceptBounds)
+ {
+ /*
+ * mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff
+ * mmShift == 1.
+ */
+ vmIsTrailingZeros = mmShift == 1;
+ }
+ else
+ {
+ /*
+ * mp = mv + 2, so it always has at least one trailing 0 bit.
+ */
+ --vp;
+ }
+ }
+ else if (q < 63)
+ {
+ /* TODO(ulfjack):Use a tighter bound here. */
+ /*
+ * We need to compute min(ntz(mv), pow5Factor(mv) - e2) >= q - 1
+ */
+ /* <=> ntz(mv) >= q - 1 && pow5Factor(mv) - e2 >= q - 1 */
+ /* <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q) */
+ /* <=> (mv & ((1 << (q - 1)) - 1)) == 0 */
+
+ /*
+ * We also need to make sure that the left shift does not
+ * overflow.
+ */
+ vrIsTrailingZeros = multipleOfPowerOf2(mv, q - 1);
+ }
+ }
+
+ /*
+ * Step 4: Find the shortest decimal representation in the interval of
+ * legal representations.
+ */
+ uint32 removed = 0;
+ uint8 lastRemovedDigit = 0;
+ uint64 output;
+
+ /* On average, we remove ~2 digits. */
+ if (vmIsTrailingZeros || vrIsTrailingZeros)
+ {
+ /* General case, which happens rarely (~0.7%). */
+ for (;;)
+ {
+ const uint64 vpDiv10 = div10(vp);
+ const uint64 vmDiv10 = div10(vm);
+
+ if (vpDiv10 <= vmDiv10)
+ break;
+
+ const uint32 vmMod10 = (uint32) (vm - 10 * vmDiv10);
+ const uint64 vrDiv10 = div10(vr);
+ const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10);
+
+ vmIsTrailingZeros &= vmMod10 == 0;
+ vrIsTrailingZeros &= lastRemovedDigit == 0;
+ lastRemovedDigit = (uint8) vrMod10;
+ vr = vrDiv10;
+ vp = vpDiv10;
+ vm = vmDiv10;
+ ++removed;
+ }
+
+ if (vmIsTrailingZeros)
+ {
+ for (;;)
+ {
+ const uint64 vmDiv10 = div10(vm);
+ const uint32 vmMod10 = (uint32) (vm - 10 * vmDiv10);
+
+ if (vmMod10 != 0)
+ break;
+
+ const uint64 vpDiv10 = div10(vp);
+ const uint64 vrDiv10 = div10(vr);
+ const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10);
+
+ vrIsTrailingZeros &= lastRemovedDigit == 0;
+ lastRemovedDigit = (uint8) vrMod10;
+ vr = vrDiv10;
+ vp = vpDiv10;
+ vm = vmDiv10;
+ ++removed;
+ }
+ }
+
+ if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0)
+ {
+ /* Round even if the exact number is .....50..0. */
+ lastRemovedDigit = 4;
+ }
+
+ /*
+ * We need to take vr + 1 if vr is outside bounds or we need to round
+ * up.
+ */
+ output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
+ }
+ else
+ {
+ /*
+ * Specialized for the common case (~99.3%). Percentages below are
+ * relative to this.
+ */
+ bool roundUp = false;
+ const uint64 vpDiv100 = div100(vp);
+ const uint64 vmDiv100 = div100(vm);
+
+ if (vpDiv100 > vmDiv100)
+ {
+ /* Optimization:remove two digits at a time(~86.2 %). */
+ const uint64 vrDiv100 = div100(vr);
+ const uint32 vrMod100 = (uint32) (vr - 100 * vrDiv100);
+
+ roundUp = vrMod100 >= 50;
+ vr = vrDiv100;
+ vp = vpDiv100;
+ vm = vmDiv100;
+ removed += 2;
+ }
+
+ /*----
+ * Loop iterations below (approximately), without optimization
+ * above:
+ *
+ * 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%,
+ * 6+: 0.02%
+ *
+ * Loop iterations below (approximately), with optimization
+ * above:
+ *
+ * 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
+ *----
+ */
+ for (;;)
+ {
+ const uint64 vpDiv10 = div10(vp);
+ const uint64 vmDiv10 = div10(vm);
+
+ if (vpDiv10 <= vmDiv10)
+ break;
+
+ const uint64 vrDiv10 = div10(vr);
+ const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10);
+
+ roundUp = vrMod10 >= 5;
+ vr = vrDiv10;
+ vp = vpDiv10;
+ vm = vmDiv10;
+ ++removed;
+ }
+
+ /*
+ * We need to take vr + 1 if vr is outside bounds or we need to round
+ * up.
+ */
+ output = vr + (vr == vm || roundUp);
+ }
+
+ const int32 exp = e10 + removed;
+
+ floating_decimal_64 fd;
+
+ fd.exponent = exp;
+ fd.mantissa = output;
+ return fd;
+}
+
+static inline int
+to_chars_df(const floating_decimal_64 v, const uint32 olength, char *const result)
+{
+ /* Step 5: Print the decimal representation. */
+ int index = 0;
+
+ uint64 output = v.mantissa;
+ int32 exp = v.exponent;
+
+ /*----
+ * On entry, mantissa * 10^exp is the result to be output.
+ * Caller has already done the - sign if needed.
+ *
+ * We want to insert the point somewhere depending on the output length
+ * and exponent, which might mean adding zeros:
+ *
+ * exp | format
+ * 1+ | ddddddddd000000
+ * 0 | ddddddddd
+ * -1 .. -len+1 | dddddddd.d to d.ddddddddd
+ * -len ... | 0.ddddddddd to 0.000dddddd
+ */
+ uint32 i = 0;
+ int32 nexp = exp + olength;
+
+ if (nexp <= 0)
+ {
+ /* -nexp is number of 0s to add after '.' */
+ Assert(nexp >= -3);
+ /* 0.000ddddd */
+ index = 2 - nexp;
+ /* won't need more than this many 0s */
+ memcpy(result, "0.000000", 8);
+ }
+ else if (exp < 0)
+ {
+ /*
+ * dddd.dddd; leave space at the start and move the '.' in after
+ */
+ index = 1;
+ }
+ else
+ {
+ /*
+ * We can save some code later by pre-filling with zeros. We know that
+ * there can be no more than 16 output digits in this form, otherwise
+ * we would not choose fixed-point output.
+ */
+ Assert(exp < 16 && exp + olength <= 16);
+ memset(result, '0', 16);
+ }
+
+ /*
+ * We prefer 32-bit operations, even on 64-bit platforms. We have at most
+ * 17 digits, and uint32 can store 9 digits. If output doesn't fit into
+ * uint32, we cut off 8 digits, so the rest will fit into uint32.
+ */
+ if ((output >> 32) != 0)
+ {
+ /* Expensive 64-bit division. */
+ const uint64 q = div1e8(output);
+ uint32 output2 = (uint32) (output - 100000000 * q);
+ const uint32 c = output2 % 10000;
+
+ output = q;
+ output2 /= 10000;
+
+ const uint32 d = output2 % 10000;
+ const uint32 c0 = (c % 100) << 1;
+ const uint32 c1 = (c / 100) << 1;
+ const uint32 d0 = (d % 100) << 1;
+ const uint32 d1 = (d / 100) << 1;
+
+ memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2);
+ memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2);
+ memcpy(result + index + olength - i - 6, DIGIT_TABLE + d0, 2);
+ memcpy(result + index + olength - i - 8, DIGIT_TABLE + d1, 2);
+ i += 8;
+ }
+
+ uint32 output2 = (uint32) output;
+
+ while (output2 >= 10000)
+ {
+ const uint32 c = output2 - 10000 * (output2 / 10000);
+ const uint32 c0 = (c % 100) << 1;
+ const uint32 c1 = (c / 100) << 1;
+
+ output2 /= 10000;
+ memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2);
+ memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2);
+ i += 4;
+ }
+ if (output2 >= 100)
+ {
+ const uint32 c = (output2 % 100) << 1;
+
+ output2 /= 100;
+ memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2);
+ i += 2;
+ }
+ if (output2 >= 10)
+ {
+ const uint32 c = output2 << 1;
+
+ memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2);
+ }
+ else
+ {
+ result[index] = (char) ('0' + output2);
+ }
+
+ if (index == 1)
+ {
+ /*
+ * nexp is 1..15 here, representing the number of digits before the
+ * point. A value of 16 is not possible because we switch to
+ * scientific notation when the display exponent reaches 15.
+ */
+ Assert(nexp < 16);
+ /* gcc only seems to want to optimize memmove for small 2^n */
+ if (nexp & 8)
+ {
+ memmove(result + index - 1, result + index, 8);
+ index += 8;
+ }
+ if (nexp & 4)
+ {
+ memmove(result + index - 1, result + index, 4);
+ index += 4;
+ }
+ if (nexp & 2)
+ {
+ memmove(result + index - 1, result + index, 2);
+ index += 2;
+ }
+ if (nexp & 1)
+ {
+ result[index - 1] = result[index];
+ }
+ result[nexp] = '.';
+ index = olength + 1;
+ }
+ else if (exp >= 0)
+ {
+ /* we supplied the trailing zeros earlier, now just set the length. */
+ index = olength + exp;
+ }
+ else
+ {
+ index = olength + (2 - nexp);
+ }
+
+ return index;
+}
+
+static inline int
+to_chars(floating_decimal_64 v, const bool sign, char *const result)
+{
+ /* Step 5: Print the decimal representation. */
+ int index = 0;
+
+ uint64 output = v.mantissa;
+ uint32 olength = decimalLength(output);
+ int32 exp = v.exponent + olength - 1;
+
+ if (sign)
+ {
+ result[index++] = '-';
+ }
+
+ /*
+ * The thresholds for fixed-point output are chosen to match printf
+ * defaults. Beware that both the code of to_chars_df and the value of
+ * DOUBLE_SHORTEST_DECIMAL_LEN are sensitive to these thresholds.
+ */
+ if (exp >= -4 && exp < 15)
+ return to_chars_df(v, olength, result + index) + sign;
+
+ /*
+ * If v.exponent is exactly 0, we might have reached here via the small
+ * integer fast path, in which case v.mantissa might contain trailing
+ * (decimal) zeros. For scientific notation we need to move these zeros
+ * into the exponent. (For fixed point this doesn't matter, which is why
+ * we do this here rather than above.)
+ *
+ * Since we already calculated the display exponent (exp) above based on
+ * the old decimal length, that value does not change here. Instead, we
+ * just reduce the display length for each digit removed.
+ *
+ * If we didn't get here via the fast path, the raw exponent will not
+ * usually be 0, and there will be no trailing zeros, so we pay no more
+ * than one div10/multiply extra cost. We claw back half of that by
+ * checking for divisibility by 2 before dividing by 10.
+ */
+ if (v.exponent == 0)
+ {
+ while ((output & 1) == 0)
+ {
+ const uint64 q = div10(output);
+ const uint32 r = (uint32) (output - 10 * q);
+
+ if (r != 0)
+ break;
+ output = q;
+ --olength;
+ }
+ }
+
+ /*----
+ * Print the decimal digits.
+ *
+ * The following code is equivalent to:
+ *
+ * for (uint32 i = 0; i < olength - 1; ++i) {
+ * const uint32 c = output % 10; output /= 10;
+ * result[index + olength - i] = (char) ('0' + c);
+ * }
+ * result[index] = '0' + output % 10;
+ *----
+ */
+
+ uint32 i = 0;
+
+ /*
+ * We prefer 32-bit operations, even on 64-bit platforms. We have at most
+ * 17 digits, and uint32 can store 9 digits. If output doesn't fit into
+ * uint32, we cut off 8 digits, so the rest will fit into uint32.
+ */
+ if ((output >> 32) != 0)
+ {
+ /* Expensive 64-bit division. */
+ const uint64 q = div1e8(output);
+ uint32 output2 = (uint32) (output - 100000000 * q);
+
+ output = q;
+
+ const uint32 c = output2 % 10000;
+
+ output2 /= 10000;
+
+ const uint32 d = output2 % 10000;
+ const uint32 c0 = (c % 100) << 1;
+ const uint32 c1 = (c / 100) << 1;
+ const uint32 d0 = (d % 100) << 1;
+ const uint32 d1 = (d / 100) << 1;
+
+ memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2);
+ memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2);
+ memcpy(result + index + olength - i - 5, DIGIT_TABLE + d0, 2);
+ memcpy(result + index + olength - i - 7, DIGIT_TABLE + d1, 2);
+ i += 8;
+ }
+
+ uint32 output2 = (uint32) output;
+
+ while (output2 >= 10000)
+ {
+ const uint32 c = output2 - 10000 * (output2 / 10000);
+
+ output2 /= 10000;
+
+ const uint32 c0 = (c % 100) << 1;
+ const uint32 c1 = (c / 100) << 1;
+
+ memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2);
+ memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2);
+ i += 4;
+ }
+ if (output2 >= 100)
+ {
+ const uint32 c = (output2 % 100) << 1;
+
+ output2 /= 100;
+ memcpy(result + index + olength - i - 1, DIGIT_TABLE + c, 2);
+ i += 2;
+ }
+ if (output2 >= 10)
+ {
+ const uint32 c = output2 << 1;
+
+ /*
+ * We can't use memcpy here: the decimal dot goes between these two
+ * digits.
+ */
+ result[index + olength - i] = DIGIT_TABLE[c + 1];
+ result[index] = DIGIT_TABLE[c];
+ }
+ else
+ {
+ result[index] = (char) ('0' + output2);
+ }
+
+ /* Print decimal point if needed. */
+ if (olength > 1)
+ {
+ result[index + 1] = '.';
+ index += olength + 1;
+ }
+ else
+ {
+ ++index;
+ }
+
+ /* Print the exponent. */
+ result[index++] = 'e';
+ if (exp < 0)
+ {
+ result[index++] = '-';
+ exp = -exp;
+ }
+ else
+ result[index++] = '+';
+
+ if (exp >= 100)
+ {
+ const int32 c = exp % 10;
+
+ memcpy(result + index, DIGIT_TABLE + 2 * (exp / 10), 2);
+ result[index + 2] = (char) ('0' + c);
+ index += 3;
+ }
+ else
+ {
+ memcpy(result + index, DIGIT_TABLE + 2 * exp, 2);
+ index += 2;
+ }
+
+ return index;
+}
+
+static inline bool
+d2d_small_int(const uint64 ieeeMantissa,
+ const uint32 ieeeExponent,
+ floating_decimal_64 *v)
+{
+ const int32 e2 = (int32) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;
+
+ /*
+ * Avoid using multiple "return false;" here since it tends to provoke the
+ * compiler into inlining multiple copies of d2d, which is undesirable.
+ */
+
+ if (e2 >= -DOUBLE_MANTISSA_BITS && e2 <= 0)
+ {
+ /*----
+ * Since 2^52 <= m2 < 2^53 and 0 <= -e2 <= 52:
+ * 1 <= f = m2 / 2^-e2 < 2^53.
+ *
+ * Test if the lower -e2 bits of the significand are 0, i.e. whether
+ * the fraction is 0. We can use ieeeMantissa here, since the implied
+ * 1 bit can never be tested by this; the implied 1 can only be part
+ * of a fraction if e2 < -DOUBLE_MANTISSA_BITS which we already
+ * checked. (e.g. 0.5 gives ieeeMantissa == 0 and e2 == -53)
+ */
+ const uint64 mask = (UINT64CONST(1) << -e2) - 1;
+ const uint64 fraction = ieeeMantissa & mask;
+
+ if (fraction == 0)
+ {
+ /*----
+ * f is an integer in the range [1, 2^53).
+ * Note: mantissa might contain trailing (decimal) 0's.
+ * Note: since 2^53 < 10^16, there is no need to adjust
+ * decimalLength().
+ */
+ const uint64 m2 = (UINT64CONST(1) << DOUBLE_MANTISSA_BITS) | ieeeMantissa;
+
+ v->mantissa = m2 >> -e2;
+ v->exponent = 0;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Store the shortest decimal representation of the given double as an
+ * UNTERMINATED string in the caller's supplied buffer (which must be at least
+ * DOUBLE_SHORTEST_DECIMAL_LEN-1 bytes long).
+ *
+ * Returns the number of bytes stored.
+ */
+int
+double_to_shortest_decimal_bufn(double f, char *result)
+{
+ /*
+ * Step 1: Decode the floating-point number, and unify normalized and
+ * subnormal cases.
+ */
+ const uint64 bits = double_to_bits(f);
+
+ /* Decode bits into sign, mantissa, and exponent. */
+ const bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;
+ const uint64 ieeeMantissa = bits & ((UINT64CONST(1) << DOUBLE_MANTISSA_BITS) - 1);
+ const uint32 ieeeExponent = (uint32) ((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1));
+
+ /* Case distinction; exit early for the easy cases. */
+ if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0))
+ {
+ return copy_special_str(result, ieeeSign, (ieeeExponent != 0), (ieeeMantissa != 0));
+ }
+
+ floating_decimal_64 v;
+ const bool isSmallInt = d2d_small_int(ieeeMantissa, ieeeExponent, &v);
+
+ if (!isSmallInt)
+ {
+ v = d2d(ieeeMantissa, ieeeExponent);
+ }
+
+ return to_chars(v, ieeeSign, result);
+}
+
+/*
+ * Store the shortest decimal representation of the given double as a
+ * null-terminated string in the caller's supplied buffer (which must be at
+ * least DOUBLE_SHORTEST_DECIMAL_LEN bytes long).
+ *
+ * Returns the string length.
+ */
+int
+double_to_shortest_decimal_buf(double f, char *result)
+{
+ const int index = double_to_shortest_decimal_bufn(f, result);
+
+ /* Terminate the string. */
+ Assert(index < DOUBLE_SHORTEST_DECIMAL_LEN);
+ result[index] = '\0';
+ return index;
+}
+
+/*
+ * Return the shortest decimal representation as a null-terminated palloc'd
+ * string (outside the backend, uses malloc() instead).
+ *
+ * Caller is responsible for freeing the result.
+ */
+char *
+double_to_shortest_decimal(double f)
+{
+ char *const result = (char *) palloc(DOUBLE_SHORTEST_DECIMAL_LEN);
+
+ double_to_shortest_decimal_buf(f, result);
+ return result;
+}
diff --git a/src/common/d2s_full_table.h b/src/common/d2s_full_table.h
new file mode 100644
index 0000000..23fd715
--- /dev/null
+++ b/src/common/d2s_full_table.h
@@ -0,0 +1,358 @@
+/*---------------------------------------------------------------------------
+ *
+ * Ryu floating-point output for double precision.
+ *
+ * Portions Copyright (c) 2018-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/d2s_full_table.h
+ *
+ * This is a modification of code taken from github.com/ulfjack/ryu under the
+ * terms of the Boost license (not the Apache license). The original copyright
+ * notice follows:
+ *
+ * Copyright 2018 Ulf Adams
+ *
+ * The contents of this file may be used under the terms of the Apache
+ * License, Version 2.0.
+ *
+ * (See accompanying file LICENSE-Apache or copy at
+ * http://www.apache.org/licenses/LICENSE-2.0)
+ *
+ * Alternatively, the contents of this file may be used under the terms of the
+ * Boost Software License, Version 1.0.
+ *
+ * (See accompanying file LICENSE-Boost or copy at
+ * https://www.boost.org/LICENSE_1_0.txt)
+ *
+ * Unless required by applicable law or agreed to in writing, this software is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+#ifndef RYU_D2S_FULL_TABLE_H
+#define RYU_D2S_FULL_TABLE_H
+
+/*
+ * These tables are generated (by the upstream) using PrintDoubleLookupTable
+ * from the upstream sources at github.com/ulfjack/ryu, and then modified (by
+ * us) by adding UINT64CONST.
+ */
+static const uint64 DOUBLE_POW5_INV_SPLIT[292][2] = {
+ {UINT64CONST(1), UINT64CONST(288230376151711744)}, {UINT64CONST(3689348814741910324), UINT64CONST(230584300921369395)},
+ {UINT64CONST(2951479051793528259), UINT64CONST(184467440737095516)}, {UINT64CONST(17118578500402463900), UINT64CONST(147573952589676412)},
+ {UINT64CONST(12632330341676300947), UINT64CONST(236118324143482260)}, {UINT64CONST(10105864273341040758), UINT64CONST(188894659314785808)},
+ {UINT64CONST(15463389048156653253), UINT64CONST(151115727451828646)}, {UINT64CONST(17362724847566824558), UINT64CONST(241785163922925834)},
+ {UINT64CONST(17579528692795369969), UINT64CONST(193428131138340667)}, {UINT64CONST(6684925324752475329), UINT64CONST(154742504910672534)},
+ {UINT64CONST(18074578149087781173), UINT64CONST(247588007857076054)}, {UINT64CONST(18149011334012135262), UINT64CONST(198070406285660843)},
+ {UINT64CONST(3451162622983977240), UINT64CONST(158456325028528675)}, {UINT64CONST(5521860196774363583), UINT64CONST(253530120045645880)},
+ {UINT64CONST(4417488157419490867), UINT64CONST(202824096036516704)}, {UINT64CONST(7223339340677503017), UINT64CONST(162259276829213363)},
+ {UINT64CONST(7867994130342094503), UINT64CONST(259614842926741381)}, {UINT64CONST(2605046489531765280), UINT64CONST(207691874341393105)},
+ {UINT64CONST(2084037191625412224), UINT64CONST(166153499473114484)}, {UINT64CONST(10713157136084480204), UINT64CONST(265845599156983174)},
+ {UINT64CONST(12259874523609494487), UINT64CONST(212676479325586539)}, {UINT64CONST(13497248433629505913), UINT64CONST(170141183460469231)},
+ {UINT64CONST(14216899864323388813), UINT64CONST(272225893536750770)}, {UINT64CONST(11373519891458711051), UINT64CONST(217780714829400616)},
+ {UINT64CONST(5409467098425058518), UINT64CONST(174224571863520493)}, {UINT64CONST(4965798542738183305), UINT64CONST(278759314981632789)},
+ {UINT64CONST(7661987648932456967), UINT64CONST(223007451985306231)}, {UINT64CONST(2440241304404055250), UINT64CONST(178405961588244985)},
+ {UINT64CONST(3904386087046488400), UINT64CONST(285449538541191976)}, {UINT64CONST(17880904128604832013), UINT64CONST(228359630832953580)},
+ {UINT64CONST(14304723302883865611), UINT64CONST(182687704666362864)}, {UINT64CONST(15133127457049002812), UINT64CONST(146150163733090291)},
+ {UINT64CONST(16834306301794583852), UINT64CONST(233840261972944466)}, {UINT64CONST(9778096226693756759), UINT64CONST(187072209578355573)},
+ {UINT64CONST(15201174610838826053), UINT64CONST(149657767662684458)}, {UINT64CONST(2185786488890659746), UINT64CONST(239452428260295134)},
+ {UINT64CONST(5437978005854438120), UINT64CONST(191561942608236107)}, {UINT64CONST(15418428848909281466), UINT64CONST(153249554086588885)},
+ {UINT64CONST(6222742084545298729), UINT64CONST(245199286538542217)}, {UINT64CONST(16046240111861969953), UINT64CONST(196159429230833773)},
+ {UINT64CONST(1768945645263844993), UINT64CONST(156927543384667019)}, {UINT64CONST(10209010661905972635), UINT64CONST(251084069415467230)},
+ {UINT64CONST(8167208529524778108), UINT64CONST(200867255532373784)}, {UINT64CONST(10223115638361732810), UINT64CONST(160693804425899027)},
+ {UINT64CONST(1599589762411131202), UINT64CONST(257110087081438444)}, {UINT64CONST(4969020624670815285), UINT64CONST(205688069665150755)},
+ {UINT64CONST(3975216499736652228), UINT64CONST(164550455732120604)}, {UINT64CONST(13739044029062464211), UINT64CONST(263280729171392966)},
+ {UINT64CONST(7301886408508061046), UINT64CONST(210624583337114373)}, {UINT64CONST(13220206756290269483), UINT64CONST(168499666669691498)},
+ {UINT64CONST(17462981995322520850), UINT64CONST(269599466671506397)}, {UINT64CONST(6591687966774196033), UINT64CONST(215679573337205118)},
+ {UINT64CONST(12652048002903177473), UINT64CONST(172543658669764094)}, {UINT64CONST(9175230360419352987), UINT64CONST(276069853871622551)},
+ {UINT64CONST(3650835473593572067), UINT64CONST(220855883097298041)}, {UINT64CONST(17678063637842498946), UINT64CONST(176684706477838432)},
+ {UINT64CONST(13527506561580357021), UINT64CONST(282695530364541492)}, {UINT64CONST(3443307619780464970), UINT64CONST(226156424291633194)},
+ {UINT64CONST(6443994910566282300), UINT64CONST(180925139433306555)}, {UINT64CONST(5155195928453025840), UINT64CONST(144740111546645244)},
+ {UINT64CONST(15627011115008661990), UINT64CONST(231584178474632390)}, {UINT64CONST(12501608892006929592), UINT64CONST(185267342779705912)},
+ {UINT64CONST(2622589484121723027), UINT64CONST(148213874223764730)}, {UINT64CONST(4196143174594756843), UINT64CONST(237142198758023568)},
+ {UINT64CONST(10735612169159626121), UINT64CONST(189713759006418854)}, {UINT64CONST(12277838550069611220), UINT64CONST(151771007205135083)},
+ {UINT64CONST(15955192865369467629), UINT64CONST(242833611528216133)}, {UINT64CONST(1696107848069843133), UINT64CONST(194266889222572907)},
+ {UINT64CONST(12424932722681605476), UINT64CONST(155413511378058325)}, {UINT64CONST(1433148282581017146), UINT64CONST(248661618204893321)},
+ {UINT64CONST(15903913885032455010), UINT64CONST(198929294563914656)}, {UINT64CONST(9033782293284053685), UINT64CONST(159143435651131725)},
+ {UINT64CONST(14454051669254485895), UINT64CONST(254629497041810760)}, {UINT64CONST(11563241335403588716), UINT64CONST(203703597633448608)},
+ {UINT64CONST(16629290697806691620), UINT64CONST(162962878106758886)}, {UINT64CONST(781423413297334329), UINT64CONST(260740604970814219)},
+ {UINT64CONST(4314487545379777786), UINT64CONST(208592483976651375)}, {UINT64CONST(3451590036303822229), UINT64CONST(166873987181321100)},
+ {UINT64CONST(5522544058086115566), UINT64CONST(266998379490113760)}, {UINT64CONST(4418035246468892453), UINT64CONST(213598703592091008)},
+ {UINT64CONST(10913125826658934609), UINT64CONST(170878962873672806)}, {UINT64CONST(10082303693170474728), UINT64CONST(273406340597876490)},
+ {UINT64CONST(8065842954536379782), UINT64CONST(218725072478301192)}, {UINT64CONST(17520720807854834795), UINT64CONST(174980057982640953)},
+ {UINT64CONST(5897060404116273733), UINT64CONST(279968092772225526)}, {UINT64CONST(1028299508551108663), UINT64CONST(223974474217780421)},
+ {UINT64CONST(15580034865808528224), UINT64CONST(179179579374224336)}, {UINT64CONST(17549358155809824511), UINT64CONST(286687326998758938)},
+ {UINT64CONST(2971440080422128639), UINT64CONST(229349861599007151)}, {UINT64CONST(17134547323305344204), UINT64CONST(183479889279205720)},
+ {UINT64CONST(13707637858644275364), UINT64CONST(146783911423364576)}, {UINT64CONST(14553522944347019935), UINT64CONST(234854258277383322)},
+ {UINT64CONST(4264120725993795302), UINT64CONST(187883406621906658)}, {UINT64CONST(10789994210278856888), UINT64CONST(150306725297525326)},
+ {UINT64CONST(9885293106962350374), UINT64CONST(240490760476040522)}, {UINT64CONST(529536856086059653), UINT64CONST(192392608380832418)},
+ {UINT64CONST(7802327114352668369), UINT64CONST(153914086704665934)}, {UINT64CONST(1415676938738538420), UINT64CONST(246262538727465495)},
+ {UINT64CONST(1132541550990830736), UINT64CONST(197010030981972396)}, {UINT64CONST(15663428499760305882), UINT64CONST(157608024785577916)},
+ {UINT64CONST(17682787970132668764), UINT64CONST(252172839656924666)}, {UINT64CONST(10456881561364224688), UINT64CONST(201738271725539733)},
+ {UINT64CONST(15744202878575200397), UINT64CONST(161390617380431786)}, {UINT64CONST(17812026976236499989), UINT64CONST(258224987808690858)},
+ {UINT64CONST(3181575136763469022), UINT64CONST(206579990246952687)}, {UINT64CONST(13613306553636506187), UINT64CONST(165263992197562149)},
+ {UINT64CONST(10713244041592678929), UINT64CONST(264422387516099439)}, {UINT64CONST(12259944048016053467), UINT64CONST(211537910012879551)},
+ {UINT64CONST(6118606423670932450), UINT64CONST(169230328010303641)}, {UINT64CONST(2411072648389671274), UINT64CONST(270768524816485826)},
+ {UINT64CONST(16686253377679378312), UINT64CONST(216614819853188660)}, {UINT64CONST(13349002702143502650), UINT64CONST(173291855882550928)},
+ {UINT64CONST(17669055508687693916), UINT64CONST(277266969412081485)}, {UINT64CONST(14135244406950155133), UINT64CONST(221813575529665188)},
+ {UINT64CONST(240149081334393137), UINT64CONST(177450860423732151)}, {UINT64CONST(11452284974360759988), UINT64CONST(283921376677971441)},
+ {UINT64CONST(5472479164746697667), UINT64CONST(227137101342377153)}, {UINT64CONST(11756680961281178780), UINT64CONST(181709681073901722)},
+ {UINT64CONST(2026647139541122378), UINT64CONST(145367744859121378)}, {UINT64CONST(18000030682233437097), UINT64CONST(232588391774594204)},
+ {UINT64CONST(18089373360528660001), UINT64CONST(186070713419675363)}, {UINT64CONST(3403452244197197031), UINT64CONST(148856570735740291)},
+ {UINT64CONST(16513570034941246220), UINT64CONST(238170513177184465)}, {UINT64CONST(13210856027952996976), UINT64CONST(190536410541747572)},
+ {UINT64CONST(3189987192878576934), UINT64CONST(152429128433398058)}, {UINT64CONST(1414630693863812771), UINT64CONST(243886605493436893)},
+ {UINT64CONST(8510402184574870864), UINT64CONST(195109284394749514)}, {UINT64CONST(10497670562401807014), UINT64CONST(156087427515799611)},
+ {UINT64CONST(9417575270359070576), UINT64CONST(249739884025279378)}, {UINT64CONST(14912757845771077107), UINT64CONST(199791907220223502)},
+ {UINT64CONST(4551508647133041040), UINT64CONST(159833525776178802)}, {UINT64CONST(10971762650154775986), UINT64CONST(255733641241886083)},
+ {UINT64CONST(16156107749607641435), UINT64CONST(204586912993508866)}, {UINT64CONST(9235537384944202825), UINT64CONST(163669530394807093)},
+ {UINT64CONST(11087511001168814197), UINT64CONST(261871248631691349)}, {UINT64CONST(12559357615676961681), UINT64CONST(209496998905353079)},
+ {UINT64CONST(13736834907283479668), UINT64CONST(167597599124282463)}, {UINT64CONST(18289587036911657145), UINT64CONST(268156158598851941)},
+ {UINT64CONST(10942320814787415393), UINT64CONST(214524926879081553)}, {UINT64CONST(16132554281313752961), UINT64CONST(171619941503265242)},
+ {UINT64CONST(11054691591134363444), UINT64CONST(274591906405224388)}, {UINT64CONST(16222450902391311402), UINT64CONST(219673525124179510)},
+ {UINT64CONST(12977960721913049122), UINT64CONST(175738820099343608)}, {UINT64CONST(17075388340318968271), UINT64CONST(281182112158949773)},
+ {UINT64CONST(2592264228029443648), UINT64CONST(224945689727159819)}, {UINT64CONST(5763160197165465241), UINT64CONST(179956551781727855)},
+ {UINT64CONST(9221056315464744386), UINT64CONST(287930482850764568)}, {UINT64CONST(14755542681855616155), UINT64CONST(230344386280611654)},
+ {UINT64CONST(15493782960226403247), UINT64CONST(184275509024489323)}, {UINT64CONST(1326979923955391628), UINT64CONST(147420407219591459)},
+ {UINT64CONST(9501865507812447252), UINT64CONST(235872651551346334)}, {UINT64CONST(11290841220991868125), UINT64CONST(188698121241077067)},
+ {UINT64CONST(1653975347309673853), UINT64CONST(150958496992861654)}, {UINT64CONST(10025058185179298811), UINT64CONST(241533595188578646)},
+ {UINT64CONST(4330697733401528726), UINT64CONST(193226876150862917)}, {UINT64CONST(14532604630946953951), UINT64CONST(154581500920690333)},
+ {UINT64CONST(1116074521063664381), UINT64CONST(247330401473104534)}, {UINT64CONST(4582208431592841828), UINT64CONST(197864321178483627)},
+ {UINT64CONST(14733813189500004432), UINT64CONST(158291456942786901)}, {UINT64CONST(16195403473716186445), UINT64CONST(253266331108459042)},
+ {UINT64CONST(5577625149489128510), UINT64CONST(202613064886767234)}, {UINT64CONST(8151448934333213131), UINT64CONST(162090451909413787)},
+ {UINT64CONST(16731667109675051333), UINT64CONST(259344723055062059)}, {UINT64CONST(17074682502481951390), UINT64CONST(207475778444049647)},
+ {UINT64CONST(6281048372501740465), UINT64CONST(165980622755239718)}, {UINT64CONST(6360328581260874421), UINT64CONST(265568996408383549)},
+ {UINT64CONST(8777611679750609860), UINT64CONST(212455197126706839)}, {UINT64CONST(10711438158542398211), UINT64CONST(169964157701365471)},
+ {UINT64CONST(9759603424184016492), UINT64CONST(271942652322184754)}, {UINT64CONST(11497031554089123517), UINT64CONST(217554121857747803)},
+ {UINT64CONST(16576322872755119460), UINT64CONST(174043297486198242)}, {UINT64CONST(11764721337440549842), UINT64CONST(278469275977917188)},
+ {UINT64CONST(16790474699436260520), UINT64CONST(222775420782333750)}, {UINT64CONST(13432379759549008416), UINT64CONST(178220336625867000)},
+ {UINT64CONST(3045063541568861850), UINT64CONST(285152538601387201)}, {UINT64CONST(17193446092222730773), UINT64CONST(228122030881109760)},
+ {UINT64CONST(13754756873778184618), UINT64CONST(182497624704887808)}, {UINT64CONST(18382503128506368341), UINT64CONST(145998099763910246)},
+ {UINT64CONST(3586563302416817083), UINT64CONST(233596959622256395)}, {UINT64CONST(2869250641933453667), UINT64CONST(186877567697805116)},
+ {UINT64CONST(17052795772514404226), UINT64CONST(149502054158244092)}, {UINT64CONST(12527077977055405469), UINT64CONST(239203286653190548)},
+ {UINT64CONST(17400360011128145022), UINT64CONST(191362629322552438)}, {UINT64CONST(2852241564676785048), UINT64CONST(153090103458041951)},
+ {UINT64CONST(15631632947708587046), UINT64CONST(244944165532867121)}, {UINT64CONST(8815957543424959314), UINT64CONST(195955332426293697)},
+ {UINT64CONST(18120812478965698421), UINT64CONST(156764265941034957)}, {UINT64CONST(14235904707377476180), UINT64CONST(250822825505655932)},
+ {UINT64CONST(4010026136418160298), UINT64CONST(200658260404524746)}, {UINT64CONST(17965416168102169531), UINT64CONST(160526608323619796)},
+ {UINT64CONST(2919224165770098987), UINT64CONST(256842573317791675)}, {UINT64CONST(2335379332616079190), UINT64CONST(205474058654233340)},
+ {UINT64CONST(1868303466092863352), UINT64CONST(164379246923386672)}, {UINT64CONST(6678634360490491686), UINT64CONST(263006795077418675)},
+ {UINT64CONST(5342907488392393349), UINT64CONST(210405436061934940)}, {UINT64CONST(4274325990713914679), UINT64CONST(168324348849547952)},
+ {UINT64CONST(10528270399884173809), UINT64CONST(269318958159276723)}, {UINT64CONST(15801313949391159694), UINT64CONST(215455166527421378)},
+ {UINT64CONST(1573004715287196786), UINT64CONST(172364133221937103)}, {UINT64CONST(17274202803427156150), UINT64CONST(275782613155099364)},
+ {UINT64CONST(17508711057483635243), UINT64CONST(220626090524079491)}, {UINT64CONST(10317620031244997871), UINT64CONST(176500872419263593)},
+ {UINT64CONST(12818843235250086271), UINT64CONST(282401395870821749)}, {UINT64CONST(13944423402941979340), UINT64CONST(225921116696657399)},
+ {UINT64CONST(14844887537095493795), UINT64CONST(180736893357325919)}, {UINT64CONST(15565258844418305359), UINT64CONST(144589514685860735)},
+ {UINT64CONST(6457670077359736959), UINT64CONST(231343223497377177)}, {UINT64CONST(16234182506113520537), UINT64CONST(185074578797901741)},
+ {UINT64CONST(9297997190148906106), UINT64CONST(148059663038321393)}, {UINT64CONST(11187446689496339446), UINT64CONST(236895460861314229)},
+ {UINT64CONST(12639306166338981880), UINT64CONST(189516368689051383)}, {UINT64CONST(17490142562555006151), UINT64CONST(151613094951241106)},
+ {UINT64CONST(2158786396894637579), UINT64CONST(242580951921985771)}, {UINT64CONST(16484424376483351356), UINT64CONST(194064761537588616)},
+ {UINT64CONST(9498190686444770762), UINT64CONST(155251809230070893)}, {UINT64CONST(11507756283569722895), UINT64CONST(248402894768113429)},
+ {UINT64CONST(12895553841597688639), UINT64CONST(198722315814490743)}, {UINT64CONST(17695140702761971558), UINT64CONST(158977852651592594)},
+ {UINT64CONST(17244178680193423523), UINT64CONST(254364564242548151)}, {UINT64CONST(10105994129412828495), UINT64CONST(203491651394038521)},
+ {UINT64CONST(4395446488788352473), UINT64CONST(162793321115230817)}, {UINT64CONST(10722063196803274280), UINT64CONST(260469313784369307)},
+ {UINT64CONST(1198952927958798777), UINT64CONST(208375451027495446)}, {UINT64CONST(15716557601334680315), UINT64CONST(166700360821996356)},
+ {UINT64CONST(17767794532651667857), UINT64CONST(266720577315194170)}, {UINT64CONST(14214235626121334286), UINT64CONST(213376461852155336)},
+ {UINT64CONST(7682039686155157106), UINT64CONST(170701169481724269)}, {UINT64CONST(1223217053622520399), UINT64CONST(273121871170758831)},
+ {UINT64CONST(15735968901865657612), UINT64CONST(218497496936607064)}, {UINT64CONST(16278123936234436413), UINT64CONST(174797997549285651)},
+ {UINT64CONST(219556594781725998), UINT64CONST(279676796078857043)}, {UINT64CONST(7554342905309201445), UINT64CONST(223741436863085634)},
+ {UINT64CONST(9732823138989271479), UINT64CONST(178993149490468507)}, {UINT64CONST(815121763415193074), UINT64CONST(286389039184749612)},
+ {UINT64CONST(11720143854957885429), UINT64CONST(229111231347799689)}, {UINT64CONST(13065463898708218666), UINT64CONST(183288985078239751)},
+ {UINT64CONST(6763022304224664610), UINT64CONST(146631188062591801)}, {UINT64CONST(3442138057275642729), UINT64CONST(234609900900146882)},
+ {UINT64CONST(13821756890046245153), UINT64CONST(187687920720117505)}, {UINT64CONST(11057405512036996122), UINT64CONST(150150336576094004)},
+ {UINT64CONST(6623802375033462826), UINT64CONST(240240538521750407)}, {UINT64CONST(16367088344252501231), UINT64CONST(192192430817400325)},
+ {UINT64CONST(13093670675402000985), UINT64CONST(153753944653920260)}, {UINT64CONST(2503129006933649959), UINT64CONST(246006311446272417)},
+ {UINT64CONST(13070549649772650937), UINT64CONST(196805049157017933)}, {UINT64CONST(17835137349301941396), UINT64CONST(157444039325614346)},
+ {UINT64CONST(2710778055689733971), UINT64CONST(251910462920982955)}, {UINT64CONST(2168622444551787177), UINT64CONST(201528370336786364)},
+ {UINT64CONST(5424246770383340065), UINT64CONST(161222696269429091)}, {UINT64CONST(1300097203129523457), UINT64CONST(257956314031086546)},
+ {UINT64CONST(15797473021471260058), UINT64CONST(206365051224869236)}, {UINT64CONST(8948629602435097724), UINT64CONST(165092040979895389)},
+ {UINT64CONST(3249760919670425388), UINT64CONST(264147265567832623)}, {UINT64CONST(9978506365220160957), UINT64CONST(211317812454266098)},
+ {UINT64CONST(15361502721659949412), UINT64CONST(169054249963412878)}, {UINT64CONST(2442311466204457120), UINT64CONST(270486799941460606)},
+ {UINT64CONST(16711244431931206989), UINT64CONST(216389439953168484)}, {UINT64CONST(17058344360286875914), UINT64CONST(173111551962534787)},
+ {UINT64CONST(12535955717491360170), UINT64CONST(276978483140055660)}, {UINT64CONST(10028764573993088136), UINT64CONST(221582786512044528)},
+ {UINT64CONST(15401709288678291155), UINT64CONST(177266229209635622)}, {UINT64CONST(9885339602917624555), UINT64CONST(283625966735416996)},
+ {UINT64CONST(4218922867592189321), UINT64CONST(226900773388333597)}, {UINT64CONST(14443184738299482427), UINT64CONST(181520618710666877)},
+ {UINT64CONST(4175850161155765295), UINT64CONST(145216494968533502)}, {UINT64CONST(10370709072591134795), UINT64CONST(232346391949653603)},
+ {UINT64CONST(15675264887556728482), UINT64CONST(185877113559722882)}, {UINT64CONST(5161514280561562140), UINT64CONST(148701690847778306)},
+ {UINT64CONST(879725219414678777), UINT64CONST(237922705356445290)}, {UINT64CONST(703780175531743021), UINT64CONST(190338164285156232)},
+ {UINT64CONST(11631070584651125387), UINT64CONST(152270531428124985)}, {UINT64CONST(162968861732249003), UINT64CONST(243632850284999977)},
+ {UINT64CONST(11198421533611530172), UINT64CONST(194906280227999981)}, {UINT64CONST(5269388412147313814), UINT64CONST(155925024182399985)},
+ {UINT64CONST(8431021459435702103), UINT64CONST(249480038691839976)}, {UINT64CONST(3055468352806651359), UINT64CONST(199584030953471981)},
+ {UINT64CONST(17201769941212962380), UINT64CONST(159667224762777584)}, {UINT64CONST(16454785461715008838), UINT64CONST(255467559620444135)},
+ {UINT64CONST(13163828369372007071), UINT64CONST(204374047696355308)}, {UINT64CONST(17909760324981426303), UINT64CONST(163499238157084246)},
+ {UINT64CONST(2830174816776909822), UINT64CONST(261598781051334795)}, {UINT64CONST(2264139853421527858), UINT64CONST(209279024841067836)},
+ {UINT64CONST(16568707141704863579), UINT64CONST(167423219872854268)}, {UINT64CONST(4373838538276319787), UINT64CONST(267877151796566830)},
+ {UINT64CONST(3499070830621055830), UINT64CONST(214301721437253464)}, {UINT64CONST(6488605479238754987), UINT64CONST(171441377149802771)},
+ {UINT64CONST(3003071137298187333), UINT64CONST(274306203439684434)}, {UINT64CONST(6091805724580460189), UINT64CONST(219444962751747547)},
+ {UINT64CONST(15941491023890099121), UINT64CONST(175555970201398037)}, {UINT64CONST(10748990379256517301), UINT64CONST(280889552322236860)},
+ {UINT64CONST(8599192303405213841), UINT64CONST(224711641857789488)}, {UINT64CONST(14258051472207991719), UINT64CONST(179769313486231590)}
+};
+
+static const uint64 DOUBLE_POW5_SPLIT[326][2] = {
+ {UINT64CONST(0), UINT64CONST(72057594037927936)}, {UINT64CONST(0), UINT64CONST(90071992547409920)},
+ {UINT64CONST(0), UINT64CONST(112589990684262400)}, {UINT64CONST(0), UINT64CONST(140737488355328000)},
+ {UINT64CONST(0), UINT64CONST(87960930222080000)}, {UINT64CONST(0), UINT64CONST(109951162777600000)},
+ {UINT64CONST(0), UINT64CONST(137438953472000000)}, {UINT64CONST(0), UINT64CONST(85899345920000000)},
+ {UINT64CONST(0), UINT64CONST(107374182400000000)}, {UINT64CONST(0), UINT64CONST(134217728000000000)},
+ {UINT64CONST(0), UINT64CONST(83886080000000000)}, {UINT64CONST(0), UINT64CONST(104857600000000000)},
+ {UINT64CONST(0), UINT64CONST(131072000000000000)}, {UINT64CONST(0), UINT64CONST(81920000000000000)},
+ {UINT64CONST(0), UINT64CONST(102400000000000000)}, {UINT64CONST(0), UINT64CONST(128000000000000000)},
+ {UINT64CONST(0), UINT64CONST(80000000000000000)}, {UINT64CONST(0), UINT64CONST(100000000000000000)},
+ {UINT64CONST(0), UINT64CONST(125000000000000000)}, {UINT64CONST(0), UINT64CONST(78125000000000000)},
+ {UINT64CONST(0), UINT64CONST(97656250000000000)}, {UINT64CONST(0), UINT64CONST(122070312500000000)},
+ {UINT64CONST(0), UINT64CONST(76293945312500000)}, {UINT64CONST(0), UINT64CONST(95367431640625000)},
+ {UINT64CONST(0), UINT64CONST(119209289550781250)}, {UINT64CONST(4611686018427387904), UINT64CONST(74505805969238281)},
+ {UINT64CONST(10376293541461622784), UINT64CONST(93132257461547851)}, {UINT64CONST(8358680908399640576), UINT64CONST(116415321826934814)},
+ {UINT64CONST(612489549322387456), UINT64CONST(72759576141834259)}, {UINT64CONST(14600669991935148032), UINT64CONST(90949470177292823)},
+ {UINT64CONST(13639151471491547136), UINT64CONST(113686837721616029)}, {UINT64CONST(3213881284082270208), UINT64CONST(142108547152020037)},
+ {UINT64CONST(4314518811765112832), UINT64CONST(88817841970012523)}, {UINT64CONST(781462496279003136), UINT64CONST(111022302462515654)},
+ {UINT64CONST(10200200157203529728), UINT64CONST(138777878078144567)}, {UINT64CONST(13292654125893287936), UINT64CONST(86736173798840354)},
+ {UINT64CONST(7392445620511834112), UINT64CONST(108420217248550443)}, {UINT64CONST(4628871007212404736), UINT64CONST(135525271560688054)},
+ {UINT64CONST(16728102434789916672), UINT64CONST(84703294725430033)}, {UINT64CONST(7075069988205232128), UINT64CONST(105879118406787542)},
+ {UINT64CONST(18067209522111315968), UINT64CONST(132348898008484427)}, {UINT64CONST(8986162942105878528), UINT64CONST(82718061255302767)},
+ {UINT64CONST(6621017659204960256), UINT64CONST(103397576569128459)}, {UINT64CONST(3664586055578812416), UINT64CONST(129246970711410574)},
+ {UINT64CONST(16125424340018921472), UINT64CONST(80779356694631608)}, {UINT64CONST(1710036351314100224), UINT64CONST(100974195868289511)},
+ {UINT64CONST(15972603494424788992), UINT64CONST(126217744835361888)}, {UINT64CONST(9982877184015493120), UINT64CONST(78886090522101180)},
+ {UINT64CONST(12478596480019366400), UINT64CONST(98607613152626475)}, {UINT64CONST(10986559581596820096), UINT64CONST(123259516440783094)},
+ {UINT64CONST(2254913720070624656), UINT64CONST(77037197775489434)}, {UINT64CONST(12042014186943056628), UINT64CONST(96296497219361792)},
+ {UINT64CONST(15052517733678820785), UINT64CONST(120370621524202240)}, {UINT64CONST(9407823583549262990), UINT64CONST(75231638452626400)},
+ {UINT64CONST(11759779479436578738), UINT64CONST(94039548065783000)}, {UINT64CONST(14699724349295723422), UINT64CONST(117549435082228750)},
+ {UINT64CONST(4575641699882439235), UINT64CONST(73468396926392969)}, {UINT64CONST(10331238143280436948), UINT64CONST(91835496157991211)},
+ {UINT64CONST(8302361660673158281), UINT64CONST(114794370197489014)}, {UINT64CONST(1154580038986672043), UINT64CONST(143492962746861268)},
+ {UINT64CONST(9944984561221445835), UINT64CONST(89683101716788292)}, {UINT64CONST(12431230701526807293), UINT64CONST(112103877145985365)},
+ {UINT64CONST(1703980321626345405), UINT64CONST(140129846432481707)}, {UINT64CONST(17205888765512323542), UINT64CONST(87581154020301066)},
+ {UINT64CONST(12283988920035628619), UINT64CONST(109476442525376333)}, {UINT64CONST(1519928094762372062), UINT64CONST(136845553156720417)},
+ {UINT64CONST(12479170105294952299), UINT64CONST(85528470722950260)}, {UINT64CONST(15598962631618690374), UINT64CONST(106910588403687825)},
+ {UINT64CONST(5663645234241199255), UINT64CONST(133638235504609782)}, {UINT64CONST(17374836326682913246), UINT64CONST(83523897190381113)},
+ {UINT64CONST(7883487353071477846), UINT64CONST(104404871487976392)}, {UINT64CONST(9854359191339347308), UINT64CONST(130506089359970490)},
+ {UINT64CONST(10770660513014479971), UINT64CONST(81566305849981556)}, {UINT64CONST(13463325641268099964), UINT64CONST(101957882312476945)},
+ {UINT64CONST(2994098996302961243), UINT64CONST(127447352890596182)}, {UINT64CONST(15706369927971514489), UINT64CONST(79654595556622613)},
+ {UINT64CONST(5797904354682229399), UINT64CONST(99568244445778267)}, {UINT64CONST(2635694424925398845), UINT64CONST(124460305557222834)},
+ {UINT64CONST(6258995034005762182), UINT64CONST(77787690973264271)}, {UINT64CONST(3212057774079814824), UINT64CONST(97234613716580339)},
+ {UINT64CONST(17850130272881932242), UINT64CONST(121543267145725423)}, {UINT64CONST(18073860448192289507), UINT64CONST(75964541966078389)},
+ {UINT64CONST(8757267504958198172), UINT64CONST(94955677457597987)}, {UINT64CONST(6334898362770359811), UINT64CONST(118694596821997484)},
+ {UINT64CONST(13182683513586250689), UINT64CONST(74184123013748427)}, {UINT64CONST(11866668373555425458), UINT64CONST(92730153767185534)},
+ {UINT64CONST(5609963430089506015), UINT64CONST(115912692208981918)}, {UINT64CONST(17341285199088104971), UINT64CONST(72445432630613698)},
+ {UINT64CONST(12453234462005355406), UINT64CONST(90556790788267123)}, {UINT64CONST(10954857059079306353), UINT64CONST(113195988485333904)},
+ {UINT64CONST(13693571323849132942), UINT64CONST(141494985606667380)}, {UINT64CONST(17781854114260483896), UINT64CONST(88434366004167112)},
+ {UINT64CONST(3780573569116053255), UINT64CONST(110542957505208891)}, {UINT64CONST(114030942967678664), UINT64CONST(138178696881511114)},
+ {UINT64CONST(4682955357782187069), UINT64CONST(86361685550944446)}, {UINT64CONST(15077066234082509644), UINT64CONST(107952106938680557)},
+ {UINT64CONST(5011274737320973344), UINT64CONST(134940133673350697)}, {UINT64CONST(14661261756894078100), UINT64CONST(84337583545844185)},
+ {UINT64CONST(4491519140835433913), UINT64CONST(105421979432305232)}, {UINT64CONST(5614398926044292391), UINT64CONST(131777474290381540)},
+ {UINT64CONST(12732371365632458552), UINT64CONST(82360921431488462)}, {UINT64CONST(6692092170185797382), UINT64CONST(102951151789360578)},
+ {UINT64CONST(17588487249587022536), UINT64CONST(128688939736700722)}, {UINT64CONST(15604490549419276989), UINT64CONST(80430587335437951)},
+ {UINT64CONST(14893927168346708332), UINT64CONST(100538234169297439)}, {UINT64CONST(14005722942005997511), UINT64CONST(125672792711621799)},
+ {UINT64CONST(15671105866394830300), UINT64CONST(78545495444763624)}, {UINT64CONST(1142138259283986260), UINT64CONST(98181869305954531)},
+ {UINT64CONST(15262730879387146537), UINT64CONST(122727336632443163)}, {UINT64CONST(7233363790403272633), UINT64CONST(76704585395276977)},
+ {UINT64CONST(13653390756431478696), UINT64CONST(95880731744096221)}, {UINT64CONST(3231680390257184658), UINT64CONST(119850914680120277)},
+ {UINT64CONST(4325643253124434363), UINT64CONST(74906821675075173)}, {UINT64CONST(10018740084832930858), UINT64CONST(93633527093843966)},
+ {UINT64CONST(3300053069186387764), UINT64CONST(117041908867304958)}, {UINT64CONST(15897591223523656064), UINT64CONST(73151193042065598)},
+ {UINT64CONST(10648616992549794273), UINT64CONST(91438991302581998)}, {UINT64CONST(4087399203832467033), UINT64CONST(114298739128227498)},
+ {UINT64CONST(14332621041645359599), UINT64CONST(142873423910284372)}, {UINT64CONST(18181260187883125557), UINT64CONST(89295889943927732)},
+ {UINT64CONST(4279831161144355331), UINT64CONST(111619862429909666)}, {UINT64CONST(14573160988285219972), UINT64CONST(139524828037387082)},
+ {UINT64CONST(13719911636105650386), UINT64CONST(87203017523366926)}, {UINT64CONST(7926517508277287175), UINT64CONST(109003771904208658)},
+ {UINT64CONST(684774848491833161), UINT64CONST(136254714880260823)}, {UINT64CONST(7345513307948477581), UINT64CONST(85159196800163014)},
+ {UINT64CONST(18405263671790372785), UINT64CONST(106448996000203767)}, {UINT64CONST(18394893571310578077), UINT64CONST(133061245000254709)},
+ {UINT64CONST(13802651491282805250), UINT64CONST(83163278125159193)}, {UINT64CONST(3418256308821342851), UINT64CONST(103954097656448992)},
+ {UINT64CONST(4272820386026678563), UINT64CONST(129942622070561240)}, {UINT64CONST(2670512741266674102), UINT64CONST(81214138794100775)},
+ {UINT64CONST(17173198981865506339), UINT64CONST(101517673492625968)}, {UINT64CONST(3019754653622331308), UINT64CONST(126897091865782461)},
+ {UINT64CONST(4193189667727651020), UINT64CONST(79310682416114038)}, {UINT64CONST(14464859121514339583), UINT64CONST(99138353020142547)},
+ {UINT64CONST(13469387883465536574), UINT64CONST(123922941275178184)}, {UINT64CONST(8418367427165960359), UINT64CONST(77451838296986365)},
+ {UINT64CONST(15134645302384838353), UINT64CONST(96814797871232956)}, {UINT64CONST(471562554271496325), UINT64CONST(121018497339041196)},
+ {UINT64CONST(9518098633274461011), UINT64CONST(75636560836900747)}, {UINT64CONST(7285937273165688360), UINT64CONST(94545701046125934)},
+ {UINT64CONST(18330793628311886258), UINT64CONST(118182126307657417)}, {UINT64CONST(4539216990053847055), UINT64CONST(73863828942285886)},
+ {UINT64CONST(14897393274422084627), UINT64CONST(92329786177857357)}, {UINT64CONST(4786683537745442072), UINT64CONST(115412232722321697)},
+ {UINT64CONST(14520892257159371055), UINT64CONST(72132645451451060)}, {UINT64CONST(18151115321449213818), UINT64CONST(90165806814313825)},
+ {UINT64CONST(8853836096529353561), UINT64CONST(112707258517892282)}, {UINT64CONST(1843923083806916143), UINT64CONST(140884073147365353)},
+ {UINT64CONST(12681666973447792349), UINT64CONST(88052545717103345)}, {UINT64CONST(2017025661527576725), UINT64CONST(110065682146379182)},
+ {UINT64CONST(11744654113764246714), UINT64CONST(137582102682973977)}, {UINT64CONST(422879793461572340), UINT64CONST(85988814176858736)},
+ {UINT64CONST(528599741826965425), UINT64CONST(107486017721073420)}, {UINT64CONST(660749677283706782), UINT64CONST(134357522151341775)},
+ {UINT64CONST(7330497575943398595), UINT64CONST(83973451344588609)}, {UINT64CONST(13774807988356636147), UINT64CONST(104966814180735761)},
+ {UINT64CONST(3383451930163631472), UINT64CONST(131208517725919702)}, {UINT64CONST(15949715511634433382), UINT64CONST(82005323578699813)},
+ {UINT64CONST(6102086334260878016), UINT64CONST(102506654473374767)}, {UINT64CONST(3015921899398709616), UINT64CONST(128133318091718459)},
+ {UINT64CONST(18025852251620051174), UINT64CONST(80083323807324036)}, {UINT64CONST(4085571240815512351), UINT64CONST(100104154759155046)},
+ {UINT64CONST(14330336087874166247), UINT64CONST(125130193448943807)}, {UINT64CONST(15873989082562435760), UINT64CONST(78206370905589879)},
+ {UINT64CONST(15230800334775656796), UINT64CONST(97757963631987349)}, {UINT64CONST(5203442363187407284), UINT64CONST(122197454539984187)},
+ {UINT64CONST(946308467778435600), UINT64CONST(76373409087490117)}, {UINT64CONST(5794571603150432404), UINT64CONST(95466761359362646)},
+ {UINT64CONST(16466586540792816313), UINT64CONST(119333451699203307)}, {UINT64CONST(7985773578781816244), UINT64CONST(74583407312002067)},
+ {UINT64CONST(5370530955049882401), UINT64CONST(93229259140002584)}, {UINT64CONST(6713163693812353001), UINT64CONST(116536573925003230)},
+ {UINT64CONST(18030785363914884337), UINT64CONST(72835358703127018)}, {UINT64CONST(13315109668038829614), UINT64CONST(91044198378908773)},
+ {UINT64CONST(2808829029766373305), UINT64CONST(113805247973635967)}, {UINT64CONST(17346094342490130344), UINT64CONST(142256559967044958)},
+ {UINT64CONST(6229622945628943561), UINT64CONST(88910349979403099)}, {UINT64CONST(3175342663608791547), UINT64CONST(111137937474253874)},
+ {UINT64CONST(13192550366365765242), UINT64CONST(138922421842817342)}, {UINT64CONST(3633657960551215372), UINT64CONST(86826513651760839)},
+ {UINT64CONST(18377130505971182927), UINT64CONST(108533142064701048)}, {UINT64CONST(4524669058754427043), UINT64CONST(135666427580876311)},
+ {UINT64CONST(9745447189362598758), UINT64CONST(84791517238047694)}, {UINT64CONST(2958436949848472639), UINT64CONST(105989396547559618)},
+ {UINT64CONST(12921418224165366607), UINT64CONST(132486745684449522)}, {UINT64CONST(12687572408530742033), UINT64CONST(82804216052780951)},
+ {UINT64CONST(11247779492236039638), UINT64CONST(103505270065976189)}, {UINT64CONST(224666310012885835), UINT64CONST(129381587582470237)},
+ {UINT64CONST(2446259452971747599), UINT64CONST(80863492239043898)}, {UINT64CONST(12281196353069460307), UINT64CONST(101079365298804872)},
+ {UINT64CONST(15351495441336825384), UINT64CONST(126349206623506090)}, {UINT64CONST(14206370669262903769), UINT64CONST(78968254139691306)},
+ {UINT64CONST(8534591299723853903), UINT64CONST(98710317674614133)}, {UINT64CONST(15279925143082205283), UINT64CONST(123387897093267666)},
+ {UINT64CONST(14161639232853766206), UINT64CONST(77117435683292291)}, {UINT64CONST(13090363022639819853), UINT64CONST(96396794604115364)},
+ {UINT64CONST(16362953778299774816), UINT64CONST(120495993255144205)}, {UINT64CONST(12532689120651053212), UINT64CONST(75309995784465128)},
+ {UINT64CONST(15665861400813816515), UINT64CONST(94137494730581410)}, {UINT64CONST(10358954714162494836), UINT64CONST(117671868413226763)},
+ {UINT64CONST(4168503687137865320), UINT64CONST(73544917758266727)}, {UINT64CONST(598943590494943747), UINT64CONST(91931147197833409)},
+ {UINT64CONST(5360365506546067587), UINT64CONST(114913933997291761)}, {UINT64CONST(11312142901609972388), UINT64CONST(143642417496614701)},
+ {UINT64CONST(9375932322719926695), UINT64CONST(89776510935384188)}, {UINT64CONST(11719915403399908368), UINT64CONST(112220638669230235)},
+ {UINT64CONST(10038208235822497557), UINT64CONST(140275798336537794)}, {UINT64CONST(10885566165816448877), UINT64CONST(87672373960336121)},
+ {UINT64CONST(18218643725697949000), UINT64CONST(109590467450420151)}, {UINT64CONST(18161618638695048346), UINT64CONST(136988084313025189)},
+ {UINT64CONST(13656854658398099168), UINT64CONST(85617552695640743)}, {UINT64CONST(12459382304570236056), UINT64CONST(107021940869550929)},
+ {UINT64CONST(1739169825430631358), UINT64CONST(133777426086938662)}, {UINT64CONST(14922039196176308311), UINT64CONST(83610891304336663)},
+ {UINT64CONST(14040862976792997485), UINT64CONST(104513614130420829)}, {UINT64CONST(3716020665709083144), UINT64CONST(130642017663026037)},
+ {UINT64CONST(4628355925281870917), UINT64CONST(81651261039391273)}, {UINT64CONST(10397130925029726550), UINT64CONST(102064076299239091)},
+ {UINT64CONST(8384727637859770284), UINT64CONST(127580095374048864)}, {UINT64CONST(5240454773662356427), UINT64CONST(79737559608780540)},
+ {UINT64CONST(6550568467077945534), UINT64CONST(99671949510975675)}, {UINT64CONST(3576524565420044014), UINT64CONST(124589936888719594)},
+ {UINT64CONST(6847013871814915412), UINT64CONST(77868710555449746)}, {UINT64CONST(17782139376623420074), UINT64CONST(97335888194312182)},
+ {UINT64CONST(13004302183924499284), UINT64CONST(121669860242890228)}, {UINT64CONST(17351060901807587860), UINT64CONST(76043662651806392)},
+ {UINT64CONST(3242082053549933210), UINT64CONST(95054578314757991)}, {UINT64CONST(17887660622219580224), UINT64CONST(118818222893447488)},
+ {UINT64CONST(11179787888887237640), UINT64CONST(74261389308404680)}, {UINT64CONST(13974734861109047050), UINT64CONST(92826736635505850)},
+ {UINT64CONST(8245046539531533005), UINT64CONST(116033420794382313)}, {UINT64CONST(16682369133275677888), UINT64CONST(72520887996488945)},
+ {UINT64CONST(7017903361312433648), UINT64CONST(90651109995611182)}, {UINT64CONST(17995751238495317868), UINT64CONST(113313887494513977)},
+ {UINT64CONST(8659630992836983623), UINT64CONST(141642359368142472)}, {UINT64CONST(5412269370523114764), UINT64CONST(88526474605089045)},
+ {UINT64CONST(11377022731581281359), UINT64CONST(110658093256361306)}, {UINT64CONST(4997906377621825891), UINT64CONST(138322616570451633)},
+ {UINT64CONST(14652906532082110942), UINT64CONST(86451635356532270)}, {UINT64CONST(9092761128247862869), UINT64CONST(108064544195665338)},
+ {UINT64CONST(2142579373455052779), UINT64CONST(135080680244581673)}, {UINT64CONST(12868327154477877747), UINT64CONST(84425425152863545)},
+ {UINT64CONST(2250350887815183471), UINT64CONST(105531781441079432)}, {UINT64CONST(2812938609768979339), UINT64CONST(131914726801349290)},
+ {UINT64CONST(6369772649532999991), UINT64CONST(82446704250843306)}, {UINT64CONST(17185587848771025797), UINT64CONST(103058380313554132)},
+ {UINT64CONST(3035240737254230630), UINT64CONST(128822975391942666)}, {UINT64CONST(6508711479211282048), UINT64CONST(80514359619964166)},
+ {UINT64CONST(17359261385868878368), UINT64CONST(100642949524955207)}, {UINT64CONST(17087390713908710056), UINT64CONST(125803686906194009)},
+ {UINT64CONST(3762090168551861929), UINT64CONST(78627304316371256)}, {UINT64CONST(4702612710689827411), UINT64CONST(98284130395464070)},
+ {UINT64CONST(15101637925217060072), UINT64CONST(122855162994330087)}, {UINT64CONST(16356052730901744401), UINT64CONST(76784476871456304)},
+ {UINT64CONST(1998321839917628885), UINT64CONST(95980596089320381)}, {UINT64CONST(7109588318324424010), UINT64CONST(119975745111650476)},
+ {UINT64CONST(13666864735807540814), UINT64CONST(74984840694781547)}, {UINT64CONST(12471894901332038114), UINT64CONST(93731050868476934)},
+ {UINT64CONST(6366496589810271835), UINT64CONST(117163813585596168)}, {UINT64CONST(3979060368631419896), UINT64CONST(73227383490997605)},
+ {UINT64CONST(9585511479216662775), UINT64CONST(91534229363747006)}, {UINT64CONST(2758517312166052660), UINT64CONST(114417786704683758)},
+ {UINT64CONST(12671518677062341634), UINT64CONST(143022233380854697)}, {UINT64CONST(1002170145522881665), UINT64CONST(89388895863034186)},
+ {UINT64CONST(10476084718758377889), UINT64CONST(111736119828792732)}, {UINT64CONST(13095105898447972362), UINT64CONST(139670149785990915)},
+ {UINT64CONST(5878598177316288774), UINT64CONST(87293843616244322)}, {UINT64CONST(16571619758500136775), UINT64CONST(109117304520305402)},
+ {UINT64CONST(11491152661270395161), UINT64CONST(136396630650381753)}, {UINT64CONST(264441385652915120), UINT64CONST(85247894156488596)},
+ {UINT64CONST(330551732066143900), UINT64CONST(106559867695610745)}, {UINT64CONST(5024875683510067779), UINT64CONST(133199834619513431)},
+ {UINT64CONST(10058076329834874218), UINT64CONST(83249896637195894)}, {UINT64CONST(3349223375438816964), UINT64CONST(104062370796494868)},
+ {UINT64CONST(4186529219298521205), UINT64CONST(130077963495618585)}, {UINT64CONST(14145795808130045513), UINT64CONST(81298727184761615)},
+ {UINT64CONST(13070558741735168987), UINT64CONST(101623408980952019)}, {UINT64CONST(11726512408741573330), UINT64CONST(127029261226190024)},
+ {UINT64CONST(7329070255463483331), UINT64CONST(79393288266368765)}, {UINT64CONST(13773023837756742068), UINT64CONST(99241610332960956)},
+ {UINT64CONST(17216279797195927585), UINT64CONST(124052012916201195)}, {UINT64CONST(8454331864033760789), UINT64CONST(77532508072625747)},
+ {UINT64CONST(5956228811614813082), UINT64CONST(96915635090782184)}, {UINT64CONST(7445286014518516353), UINT64CONST(121144543863477730)},
+ {UINT64CONST(9264989777501460624), UINT64CONST(75715339914673581)}, {UINT64CONST(16192923240304213684), UINT64CONST(94644174893341976)},
+ {UINT64CONST(1794409976670715490), UINT64CONST(118305218616677471)}, {UINT64CONST(8039035263060279037), UINT64CONST(73940761635423419)},
+ {UINT64CONST(5437108060397960892), UINT64CONST(92425952044279274)}, {UINT64CONST(16019757112352226923), UINT64CONST(115532440055349092)},
+ {UINT64CONST(788976158365366019), UINT64CONST(72207775034593183)}, {UINT64CONST(14821278253238871236), UINT64CONST(90259718793241478)},
+ {UINT64CONST(9303225779693813237), UINT64CONST(112824648491551848)}, {UINT64CONST(11629032224617266546), UINT64CONST(141030810614439810)},
+ {UINT64CONST(11879831158813179495), UINT64CONST(88144256634024881)}, {UINT64CONST(1014730893234310657), UINT64CONST(110180320792531102)},
+ {UINT64CONST(10491785653397664129), UINT64CONST(137725400990663877)}, {UINT64CONST(8863209042587234033), UINT64CONST(86078375619164923)},
+ {UINT64CONST(6467325284806654637), UINT64CONST(107597969523956154)}, {UINT64CONST(17307528642863094104), UINT64CONST(134497461904945192)},
+ {UINT64CONST(10817205401789433815), UINT64CONST(84060913690590745)}, {UINT64CONST(18133192770664180173), UINT64CONST(105076142113238431)},
+ {UINT64CONST(18054804944902837312), UINT64CONST(131345177641548039)}, {UINT64CONST(18201782118205355176), UINT64CONST(82090736025967524)},
+ {UINT64CONST(4305483574047142354), UINT64CONST(102613420032459406)}, {UINT64CONST(14605226504413703751), UINT64CONST(128266775040574257)},
+ {UINT64CONST(2210737537617482988), UINT64CONST(80166734400358911)}, {UINT64CONST(16598479977304017447), UINT64CONST(100208418000448638)},
+ {UINT64CONST(11524727934775246001), UINT64CONST(125260522500560798)}, {UINT64CONST(2591268940807140847), UINT64CONST(78287826562850499)},
+ {UINT64CONST(17074144231291089770), UINT64CONST(97859783203563123)}, {UINT64CONST(16730994270686474309), UINT64CONST(122324729004453904)},
+ {UINT64CONST(10456871419179046443), UINT64CONST(76452955627783690)}, {UINT64CONST(3847717237119032246), UINT64CONST(95566194534729613)},
+ {UINT64CONST(9421332564826178211), UINT64CONST(119457743168412016)}, {UINT64CONST(5888332853016361382), UINT64CONST(74661089480257510)},
+ {UINT64CONST(16583788103125227536), UINT64CONST(93326361850321887)}, {UINT64CONST(16118049110479146516), UINT64CONST(116657952312902359)},
+ {UINT64CONST(16991309721690548428), UINT64CONST(72911220195563974)}, {UINT64CONST(12015765115258409727), UINT64CONST(91139025244454968)},
+ {UINT64CONST(15019706394073012159), UINT64CONST(113923781555568710)}, {UINT64CONST(9551260955736489391), UINT64CONST(142404726944460888)},
+ {UINT64CONST(5969538097335305869), UINT64CONST(89002954340288055)}, {UINT64CONST(2850236603241744433), UINT64CONST(111253692925360069)}
+};
+
+#endif /* RYU_D2S_FULL_TABLE_H */
diff --git a/src/common/d2s_intrinsics.h b/src/common/d2s_intrinsics.h
new file mode 100644
index 0000000..b1f31ca
--- /dev/null
+++ b/src/common/d2s_intrinsics.h
@@ -0,0 +1,202 @@
+/*---------------------------------------------------------------------------
+ *
+ * Ryu floating-point output for double precision.
+ *
+ * Portions Copyright (c) 2018-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/d2s_intrinsics.h
+ *
+ * This is a modification of code taken from github.com/ulfjack/ryu under the
+ * terms of the Boost license (not the Apache license). The original copyright
+ * notice follows:
+ *
+ * Copyright 2018 Ulf Adams
+ *
+ * The contents of this file may be used under the terms of the Apache
+ * License, Version 2.0.
+ *
+ * (See accompanying file LICENSE-Apache or copy at
+ * http://www.apache.org/licenses/LICENSE-2.0)
+ *
+ * Alternatively, the contents of this file may be used under the terms of the
+ * Boost Software License, Version 1.0.
+ *
+ * (See accompanying file LICENSE-Boost or copy at
+ * https://www.boost.org/LICENSE_1_0.txt)
+ *
+ * Unless required by applicable law or agreed to in writing, this software is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.
+ *
+ *---------------------------------------------------------------------------
+ */
+#ifndef RYU_D2S_INTRINSICS_H
+#define RYU_D2S_INTRINSICS_H
+
+#if defined(HAS_64_BIT_INTRINSICS)
+
+#include <intrin.h>
+
+static inline uint64
+umul128(const uint64 a, const uint64 b, uint64 *const productHi)
+{
+ return _umul128(a, b, productHi);
+}
+
+static inline uint64
+shiftright128(const uint64 lo, const uint64 hi, const uint32 dist)
+{
+ /*
+ * For the __shiftright128 intrinsic, the shift value is always modulo 64.
+ * In the current implementation of the double-precision version of Ryu,
+ * the shift value is always < 64. (In the case RYU_OPTIMIZE_SIZE == 0,
+ * the shift value is in the range [49, 58]. Otherwise in the range [2,
+ * 59].) Check this here in case a future change requires larger shift
+ * values. In this case this function needs to be adjusted.
+ */
+ Assert(dist < 64);
+ return __shiftright128(lo, hi, (unsigned char) dist);
+}
+
+#else /* defined(HAS_64_BIT_INTRINSICS) */
+
+static inline uint64
+umul128(const uint64 a, const uint64 b, uint64 *const productHi)
+{
+ /*
+ * The casts here help MSVC to avoid calls to the __allmul library
+ * function.
+ */
+ const uint32 aLo = (uint32) a;
+ const uint32 aHi = (uint32) (a >> 32);
+ const uint32 bLo = (uint32) b;
+ const uint32 bHi = (uint32) (b >> 32);
+
+ const uint64 b00 = (uint64) aLo * bLo;
+ const uint64 b01 = (uint64) aLo * bHi;
+ const uint64 b10 = (uint64) aHi * bLo;
+ const uint64 b11 = (uint64) aHi * bHi;
+
+ const uint32 b00Lo = (uint32) b00;
+ const uint32 b00Hi = (uint32) (b00 >> 32);
+
+ const uint64 mid1 = b10 + b00Hi;
+ const uint32 mid1Lo = (uint32) (mid1);
+ const uint32 mid1Hi = (uint32) (mid1 >> 32);
+
+ const uint64 mid2 = b01 + mid1Lo;
+ const uint32 mid2Lo = (uint32) (mid2);
+ const uint32 mid2Hi = (uint32) (mid2 >> 32);
+
+ const uint64 pHi = b11 + mid1Hi + mid2Hi;
+ const uint64 pLo = ((uint64) mid2Lo << 32) + b00Lo;
+
+ *productHi = pHi;
+ return pLo;
+}
+
+static inline uint64
+shiftright128(const uint64 lo, const uint64 hi, const uint32 dist)
+{
+ /* We don't need to handle the case dist >= 64 here (see above). */
+ Assert(dist < 64);
+#if !defined(RYU_32_BIT_PLATFORM)
+ Assert(dist > 0);
+ return (hi << (64 - dist)) | (lo >> dist);
+#else
+ /* Avoid a 64-bit shift by taking advantage of the range of shift values. */
+ Assert(dist >= 32);
+ return (hi << (64 - dist)) | ((uint32) (lo >> 32) >> (dist - 32));
+#endif
+}
+
+#endif /* // defined(HAS_64_BIT_INTRINSICS) */
+
+#ifdef RYU_32_BIT_PLATFORM
+
+/* Returns the high 64 bits of the 128-bit product of a and b. */
+static inline uint64
+umulh(const uint64 a, const uint64 b)
+{
+ /*
+ * Reuse the umul128 implementation. Optimizers will likely eliminate the
+ * instructions used to compute the low part of the product.
+ */
+ uint64 hi;
+
+ umul128(a, b, &hi);
+ return hi;
+}
+
+/*----
+ * On 32-bit platforms, compilers typically generate calls to library
+ * functions for 64-bit divisions, even if the divisor is a constant.
+ *
+ * E.g.:
+ * https://bugs.llvm.org/show_bug.cgi?id=37932
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=17958
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37443
+ *
+ * The functions here perform division-by-constant using multiplications
+ * in the same way as 64-bit compilers would do.
+ *
+ * NB:
+ * The multipliers and shift values are the ones generated by clang x64
+ * for expressions like x/5, x/10, etc.
+ *----
+ */
+
+static inline uint64
+div5(const uint64 x)
+{
+ return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 2;
+}
+
+static inline uint64
+div10(const uint64 x)
+{
+ return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 3;
+}
+
+static inline uint64
+div100(const uint64 x)
+{
+ return umulh(x >> 2, UINT64CONST(0x28F5C28F5C28F5C3)) >> 2;
+}
+
+static inline uint64
+div1e8(const uint64 x)
+{
+ return umulh(x, UINT64CONST(0xABCC77118461CEFD)) >> 26;
+}
+
+#else /* RYU_32_BIT_PLATFORM */
+
+static inline uint64
+div5(const uint64 x)
+{
+ return x / 5;
+}
+
+static inline uint64
+div10(const uint64 x)
+{
+ return x / 10;
+}
+
+static inline uint64
+div100(const uint64 x)
+{
+ return x / 100;
+}
+
+static inline uint64
+div1e8(const uint64 x)
+{
+ return x / 100000000;
+}
+
+#endif /* RYU_32_BIT_PLATFORM */
+
+#endif /* RYU_D2S_INTRINSICS_H */
diff --git a/src/common/digit_table.h b/src/common/digit_table.h
new file mode 100644
index 0000000..483aa17
--- /dev/null
+++ b/src/common/digit_table.h
@@ -0,0 +1,21 @@
+#ifndef RYU_DIGIT_TABLE_H
+#define RYU_DIGIT_TABLE_H
+
+/*
+ * A table of all two-digit numbers. This is used to speed up decimal digit
+ * generation by copying pairs of digits into the final output.
+ */
+static const char DIGIT_TABLE[200] = {
+ '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
+ '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
+ '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
+ '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9',
+ '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
+ '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
+ '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
+ '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9',
+ '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
+ '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'
+};
+
+#endif /* RYU_DIGIT_TABLE_H */
diff --git a/src/common/encnames.c b/src/common/encnames.c
new file mode 100644
index 0000000..14cf1b3
--- /dev/null
+++ b/src/common/encnames.c
@@ -0,0 +1,598 @@
+/*-------------------------------------------------------------------------
+ *
+ * encnames.c
+ * Encoding names and routines for working with them.
+ *
+ * Portions Copyright (c) 2001-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/encnames.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include <ctype.h>
+#include <unistd.h>
+
+#include "mb/pg_wchar.h"
+
+
+/* ----------
+ * All encoding names, sorted: *** A L P H A B E T I C ***
+ *
+ * All names must be without irrelevant chars, search routines use
+ * isalnum() chars only. It means ISO-8859-1, iso_8859-1 and Iso8859_1
+ * are always converted to 'iso88591'. All must be lower case.
+ *
+ * The table doesn't contain 'cs' aliases (like csISOLatin1). It's needed?
+ *
+ * Karel Zak, Aug 2001
+ * ----------
+ */
+typedef struct pg_encname
+{
+ const char *name;
+ pg_enc encoding;
+} pg_encname;
+
+static const pg_encname pg_encname_tbl[] =
+{
+ {
+ "abc", PG_WIN1258
+ }, /* alias for WIN1258 */
+ {
+ "alt", PG_WIN866
+ }, /* IBM866 */
+ {
+ "big5", PG_BIG5
+ }, /* Big5; Chinese for Taiwan multibyte set */
+ {
+ "euccn", PG_EUC_CN
+ }, /* EUC-CN; Extended Unix Code for simplified
+ * Chinese */
+ {
+ "eucjis2004", PG_EUC_JIS_2004
+ }, /* EUC-JIS-2004; Extended UNIX Code fixed
+ * Width for Japanese, standard JIS X 0213 */
+ {
+ "eucjp", PG_EUC_JP
+ }, /* EUC-JP; Extended UNIX Code fixed Width for
+ * Japanese, standard OSF */
+ {
+ "euckr", PG_EUC_KR
+ }, /* EUC-KR; Extended Unix Code for Korean , KS
+ * X 1001 standard */
+ {
+ "euctw", PG_EUC_TW
+ }, /* EUC-TW; Extended Unix Code for
+ *
+ * traditional Chinese */
+ {
+ "gb18030", PG_GB18030
+ }, /* GB18030;GB18030 */
+ {
+ "gbk", PG_GBK
+ }, /* GBK; Chinese Windows CodePage 936
+ * simplified Chinese */
+ {
+ "iso88591", PG_LATIN1
+ }, /* ISO-8859-1; RFC1345,KXS2 */
+ {
+ "iso885910", PG_LATIN6
+ }, /* ISO-8859-10; RFC1345,KXS2 */
+ {
+ "iso885913", PG_LATIN7
+ }, /* ISO-8859-13; RFC1345,KXS2 */
+ {
+ "iso885914", PG_LATIN8
+ }, /* ISO-8859-14; RFC1345,KXS2 */
+ {
+ "iso885915", PG_LATIN9
+ }, /* ISO-8859-15; RFC1345,KXS2 */
+ {
+ "iso885916", PG_LATIN10
+ }, /* ISO-8859-16; RFC1345,KXS2 */
+ {
+ "iso88592", PG_LATIN2
+ }, /* ISO-8859-2; RFC1345,KXS2 */
+ {
+ "iso88593", PG_LATIN3
+ }, /* ISO-8859-3; RFC1345,KXS2 */
+ {
+ "iso88594", PG_LATIN4
+ }, /* ISO-8859-4; RFC1345,KXS2 */
+ {
+ "iso88595", PG_ISO_8859_5
+ }, /* ISO-8859-5; RFC1345,KXS2 */
+ {
+ "iso88596", PG_ISO_8859_6
+ }, /* ISO-8859-6; RFC1345,KXS2 */
+ {
+ "iso88597", PG_ISO_8859_7
+ }, /* ISO-8859-7; RFC1345,KXS2 */
+ {
+ "iso88598", PG_ISO_8859_8
+ }, /* ISO-8859-8; RFC1345,KXS2 */
+ {
+ "iso88599", PG_LATIN5
+ }, /* ISO-8859-9; RFC1345,KXS2 */
+ {
+ "johab", PG_JOHAB
+ }, /* JOHAB; Extended Unix Code for simplified
+ * Chinese */
+ {
+ "koi8", PG_KOI8R
+ }, /* _dirty_ alias for KOI8-R (backward
+ * compatibility) */
+ {
+ "koi8r", PG_KOI8R
+ }, /* KOI8-R; RFC1489 */
+ {
+ "koi8u", PG_KOI8U
+ }, /* KOI8-U; RFC2319 */
+ {
+ "latin1", PG_LATIN1
+ }, /* alias for ISO-8859-1 */
+ {
+ "latin10", PG_LATIN10
+ }, /* alias for ISO-8859-16 */
+ {
+ "latin2", PG_LATIN2
+ }, /* alias for ISO-8859-2 */
+ {
+ "latin3", PG_LATIN3
+ }, /* alias for ISO-8859-3 */
+ {
+ "latin4", PG_LATIN4
+ }, /* alias for ISO-8859-4 */
+ {
+ "latin5", PG_LATIN5
+ }, /* alias for ISO-8859-9 */
+ {
+ "latin6", PG_LATIN6
+ }, /* alias for ISO-8859-10 */
+ {
+ "latin7", PG_LATIN7
+ }, /* alias for ISO-8859-13 */
+ {
+ "latin8", PG_LATIN8
+ }, /* alias for ISO-8859-14 */
+ {
+ "latin9", PG_LATIN9
+ }, /* alias for ISO-8859-15 */
+ {
+ "mskanji", PG_SJIS
+ }, /* alias for Shift_JIS */
+ {
+ "muleinternal", PG_MULE_INTERNAL
+ },
+ {
+ "shiftjis", PG_SJIS
+ }, /* Shift_JIS; JIS X 0202-1991 */
+
+ {
+ "shiftjis2004", PG_SHIFT_JIS_2004
+ }, /* SHIFT-JIS-2004; Shift JIS for Japanese,
+ * standard JIS X 0213 */
+ {
+ "sjis", PG_SJIS
+ }, /* alias for Shift_JIS */
+ {
+ "sqlascii", PG_SQL_ASCII
+ },
+ {
+ "tcvn", PG_WIN1258
+ }, /* alias for WIN1258 */
+ {
+ "tcvn5712", PG_WIN1258
+ }, /* alias for WIN1258 */
+ {
+ "uhc", PG_UHC
+ }, /* UHC; Korean Windows CodePage 949 */
+ {
+ "unicode", PG_UTF8
+ }, /* alias for UTF8 */
+ {
+ "utf8", PG_UTF8
+ }, /* alias for UTF8 */
+ {
+ "vscii", PG_WIN1258
+ }, /* alias for WIN1258 */
+ {
+ "win", PG_WIN1251
+ }, /* _dirty_ alias for windows-1251 (backward
+ * compatibility) */
+ {
+ "win1250", PG_WIN1250
+ }, /* alias for Windows-1250 */
+ {
+ "win1251", PG_WIN1251
+ }, /* alias for Windows-1251 */
+ {
+ "win1252", PG_WIN1252
+ }, /* alias for Windows-1252 */
+ {
+ "win1253", PG_WIN1253
+ }, /* alias for Windows-1253 */
+ {
+ "win1254", PG_WIN1254
+ }, /* alias for Windows-1254 */
+ {
+ "win1255", PG_WIN1255
+ }, /* alias for Windows-1255 */
+ {
+ "win1256", PG_WIN1256
+ }, /* alias for Windows-1256 */
+ {
+ "win1257", PG_WIN1257
+ }, /* alias for Windows-1257 */
+ {
+ "win1258", PG_WIN1258
+ }, /* alias for Windows-1258 */
+ {
+ "win866", PG_WIN866
+ }, /* IBM866 */
+ {
+ "win874", PG_WIN874
+ }, /* alias for Windows-874 */
+ {
+ "win932", PG_SJIS
+ }, /* alias for Shift_JIS */
+ {
+ "win936", PG_GBK
+ }, /* alias for GBK */
+ {
+ "win949", PG_UHC
+ }, /* alias for UHC */
+ {
+ "win950", PG_BIG5
+ }, /* alias for BIG5 */
+ {
+ "windows1250", PG_WIN1250
+ }, /* Windows-1251; Microsoft */
+ {
+ "windows1251", PG_WIN1251
+ }, /* Windows-1251; Microsoft */
+ {
+ "windows1252", PG_WIN1252
+ }, /* Windows-1252; Microsoft */
+ {
+ "windows1253", PG_WIN1253
+ }, /* Windows-1253; Microsoft */
+ {
+ "windows1254", PG_WIN1254
+ }, /* Windows-1254; Microsoft */
+ {
+ "windows1255", PG_WIN1255
+ }, /* Windows-1255; Microsoft */
+ {
+ "windows1256", PG_WIN1256
+ }, /* Windows-1256; Microsoft */
+ {
+ "windows1257", PG_WIN1257
+ }, /* Windows-1257; Microsoft */
+ {
+ "windows1258", PG_WIN1258
+ }, /* Windows-1258; Microsoft */
+ {
+ "windows866", PG_WIN866
+ }, /* IBM866 */
+ {
+ "windows874", PG_WIN874
+ }, /* Windows-874; Microsoft */
+ {
+ "windows932", PG_SJIS
+ }, /* alias for Shift_JIS */
+ {
+ "windows936", PG_GBK
+ }, /* alias for GBK */
+ {
+ "windows949", PG_UHC
+ }, /* alias for UHC */
+ {
+ "windows950", PG_BIG5
+ } /* alias for BIG5 */
+};
+
+/* ----------
+ * These are "official" encoding names.
+ * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
+ * ----------
+ */
+#ifndef WIN32
+#define DEF_ENC2NAME(name, codepage) { #name, PG_##name }
+#else
+#define DEF_ENC2NAME(name, codepage) { #name, PG_##name, codepage }
+#endif
+
+const pg_enc2name pg_enc2name_tbl[] =
+{
+ DEF_ENC2NAME(SQL_ASCII, 0),
+ DEF_ENC2NAME(EUC_JP, 20932),
+ DEF_ENC2NAME(EUC_CN, 20936),
+ DEF_ENC2NAME(EUC_KR, 51949),
+ DEF_ENC2NAME(EUC_TW, 0),
+ DEF_ENC2NAME(EUC_JIS_2004, 20932),
+ DEF_ENC2NAME(UTF8, 65001),
+ DEF_ENC2NAME(MULE_INTERNAL, 0),
+ DEF_ENC2NAME(LATIN1, 28591),
+ DEF_ENC2NAME(LATIN2, 28592),
+ DEF_ENC2NAME(LATIN3, 28593),
+ DEF_ENC2NAME(LATIN4, 28594),
+ DEF_ENC2NAME(LATIN5, 28599),
+ DEF_ENC2NAME(LATIN6, 0),
+ DEF_ENC2NAME(LATIN7, 0),
+ DEF_ENC2NAME(LATIN8, 0),
+ DEF_ENC2NAME(LATIN9, 28605),
+ DEF_ENC2NAME(LATIN10, 0),
+ DEF_ENC2NAME(WIN1256, 1256),
+ DEF_ENC2NAME(WIN1258, 1258),
+ DEF_ENC2NAME(WIN866, 866),
+ DEF_ENC2NAME(WIN874, 874),
+ DEF_ENC2NAME(KOI8R, 20866),
+ DEF_ENC2NAME(WIN1251, 1251),
+ DEF_ENC2NAME(WIN1252, 1252),
+ DEF_ENC2NAME(ISO_8859_5, 28595),
+ DEF_ENC2NAME(ISO_8859_6, 28596),
+ DEF_ENC2NAME(ISO_8859_7, 28597),
+ DEF_ENC2NAME(ISO_8859_8, 28598),
+ DEF_ENC2NAME(WIN1250, 1250),
+ DEF_ENC2NAME(WIN1253, 1253),
+ DEF_ENC2NAME(WIN1254, 1254),
+ DEF_ENC2NAME(WIN1255, 1255),
+ DEF_ENC2NAME(WIN1257, 1257),
+ DEF_ENC2NAME(KOI8U, 21866),
+ DEF_ENC2NAME(SJIS, 932),
+ DEF_ENC2NAME(BIG5, 950),
+ DEF_ENC2NAME(GBK, 936),
+ DEF_ENC2NAME(UHC, 949),
+ DEF_ENC2NAME(GB18030, 54936),
+ DEF_ENC2NAME(JOHAB, 0),
+ DEF_ENC2NAME(SHIFT_JIS_2004, 932)
+};
+
+/* ----------
+ * These are encoding names for gettext.
+ *
+ * This covers all encodings except MULE_INTERNAL, which is alien to gettext.
+ * ----------
+ */
+const pg_enc2gettext pg_enc2gettext_tbl[] =
+{
+ {PG_SQL_ASCII, "US-ASCII"},
+ {PG_UTF8, "UTF-8"},
+ {PG_LATIN1, "LATIN1"},
+ {PG_LATIN2, "LATIN2"},
+ {PG_LATIN3, "LATIN3"},
+ {PG_LATIN4, "LATIN4"},
+ {PG_ISO_8859_5, "ISO-8859-5"},
+ {PG_ISO_8859_6, "ISO_8859-6"},
+ {PG_ISO_8859_7, "ISO-8859-7"},
+ {PG_ISO_8859_8, "ISO-8859-8"},
+ {PG_LATIN5, "LATIN5"},
+ {PG_LATIN6, "LATIN6"},
+ {PG_LATIN7, "LATIN7"},
+ {PG_LATIN8, "LATIN8"},
+ {PG_LATIN9, "LATIN-9"},
+ {PG_LATIN10, "LATIN10"},
+ {PG_KOI8R, "KOI8-R"},
+ {PG_KOI8U, "KOI8-U"},
+ {PG_WIN1250, "CP1250"},
+ {PG_WIN1251, "CP1251"},
+ {PG_WIN1252, "CP1252"},
+ {PG_WIN1253, "CP1253"},
+ {PG_WIN1254, "CP1254"},
+ {PG_WIN1255, "CP1255"},
+ {PG_WIN1256, "CP1256"},
+ {PG_WIN1257, "CP1257"},
+ {PG_WIN1258, "CP1258"},
+ {PG_WIN866, "CP866"},
+ {PG_WIN874, "CP874"},
+ {PG_EUC_CN, "EUC-CN"},
+ {PG_EUC_JP, "EUC-JP"},
+ {PG_EUC_KR, "EUC-KR"},
+ {PG_EUC_TW, "EUC-TW"},
+ {PG_EUC_JIS_2004, "EUC-JP"},
+ {PG_SJIS, "SHIFT-JIS"},
+ {PG_BIG5, "BIG5"},
+ {PG_GBK, "GBK"},
+ {PG_UHC, "UHC"},
+ {PG_GB18030, "GB18030"},
+ {PG_JOHAB, "JOHAB"},
+ {PG_SHIFT_JIS_2004, "SHIFT_JISX0213"},
+ {0, NULL}
+};
+
+
+/*
+ * Table of encoding names for ICU (currently covers backend encodings only)
+ *
+ * Reference: <https://ssl.icu-project.org/icu-bin/convexp>
+ *
+ * NULL entries are not supported by ICU, or their mapping is unclear.
+ */
+static const char *const pg_enc2icu_tbl[] =
+{
+ NULL, /* PG_SQL_ASCII */
+ "EUC-JP", /* PG_EUC_JP */
+ "EUC-CN", /* PG_EUC_CN */
+ "EUC-KR", /* PG_EUC_KR */
+ "EUC-TW", /* PG_EUC_TW */
+ NULL, /* PG_EUC_JIS_2004 */
+ "UTF-8", /* PG_UTF8 */
+ NULL, /* PG_MULE_INTERNAL */
+ "ISO-8859-1", /* PG_LATIN1 */
+ "ISO-8859-2", /* PG_LATIN2 */
+ "ISO-8859-3", /* PG_LATIN3 */
+ "ISO-8859-4", /* PG_LATIN4 */
+ "ISO-8859-9", /* PG_LATIN5 */
+ "ISO-8859-10", /* PG_LATIN6 */
+ "ISO-8859-13", /* PG_LATIN7 */
+ "ISO-8859-14", /* PG_LATIN8 */
+ "ISO-8859-15", /* PG_LATIN9 */
+ NULL, /* PG_LATIN10 */
+ "CP1256", /* PG_WIN1256 */
+ "CP1258", /* PG_WIN1258 */
+ "CP866", /* PG_WIN866 */
+ NULL, /* PG_WIN874 */
+ "KOI8-R", /* PG_KOI8R */
+ "CP1251", /* PG_WIN1251 */
+ "CP1252", /* PG_WIN1252 */
+ "ISO-8859-5", /* PG_ISO_8859_5 */
+ "ISO-8859-6", /* PG_ISO_8859_6 */
+ "ISO-8859-7", /* PG_ISO_8859_7 */
+ "ISO-8859-8", /* PG_ISO_8859_8 */
+ "CP1250", /* PG_WIN1250 */
+ "CP1253", /* PG_WIN1253 */
+ "CP1254", /* PG_WIN1254 */
+ "CP1255", /* PG_WIN1255 */
+ "CP1257", /* PG_WIN1257 */
+ "KOI8-U", /* PG_KOI8U */
+};
+
+
+/*
+ * Is this encoding supported by ICU?
+ */
+bool
+is_encoding_supported_by_icu(int encoding)
+{
+ if (!PG_VALID_BE_ENCODING(encoding))
+ return false;
+ return (pg_enc2icu_tbl[encoding] != NULL);
+}
+
+/*
+ * Returns ICU's name for encoding, or NULL if not supported
+ */
+const char *
+get_encoding_name_for_icu(int encoding)
+{
+ StaticAssertStmt(lengthof(pg_enc2icu_tbl) == PG_ENCODING_BE_LAST + 1,
+ "pg_enc2icu_tbl incomplete");
+
+ if (!PG_VALID_BE_ENCODING(encoding))
+ return NULL;
+ return pg_enc2icu_tbl[encoding];
+}
+
+
+/* ----------
+ * Encoding checks, for error returns -1 else encoding id
+ * ----------
+ */
+int
+pg_valid_client_encoding(const char *name)
+{
+ int enc;
+
+ if ((enc = pg_char_to_encoding(name)) < 0)
+ return -1;
+
+ if (!PG_VALID_FE_ENCODING(enc))
+ return -1;
+
+ return enc;
+}
+
+int
+pg_valid_server_encoding(const char *name)
+{
+ int enc;
+
+ if ((enc = pg_char_to_encoding(name)) < 0)
+ return -1;
+
+ if (!PG_VALID_BE_ENCODING(enc))
+ return -1;
+
+ return enc;
+}
+
+int
+pg_valid_server_encoding_id(int encoding)
+{
+ return PG_VALID_BE_ENCODING(encoding);
+}
+
+/*
+ * Remove irrelevant chars from encoding name, store at *newkey
+ *
+ * (Caller's responsibility to provide a large enough buffer)
+ */
+static char *
+clean_encoding_name(const char *key, char *newkey)
+{
+ const char *p;
+ char *np;
+
+ for (p = key, np = newkey; *p != '\0'; p++)
+ {
+ if (isalnum((unsigned char) *p))
+ {
+ if (*p >= 'A' && *p <= 'Z')
+ *np++ = *p + 'a' - 'A';
+ else
+ *np++ = *p;
+ }
+ }
+ *np = '\0';
+ return newkey;
+}
+
+/*
+ * Search encoding by encoding name
+ *
+ * Returns encoding ID, or -1 if not recognized
+ */
+int
+pg_char_to_encoding(const char *name)
+{
+ unsigned int nel = lengthof(pg_encname_tbl);
+ const pg_encname *base = pg_encname_tbl,
+ *last = base + nel - 1,
+ *position;
+ int result;
+ char buff[NAMEDATALEN],
+ *key;
+
+ if (name == NULL || *name == '\0')
+ return -1;
+
+ if (strlen(name) >= NAMEDATALEN)
+ return -1; /* it's certainly not in the table */
+
+ key = clean_encoding_name(name, buff);
+
+ while (last >= base)
+ {
+ position = base + ((last - base) >> 1);
+ result = key[0] - position->name[0];
+
+ if (result == 0)
+ {
+ result = strcmp(key, position->name);
+ if (result == 0)
+ return position->encoding;
+ }
+ if (result < 0)
+ last = position - 1;
+ else
+ base = position + 1;
+ }
+ return -1;
+}
+
+const char *
+pg_encoding_to_char(int encoding)
+{
+ if (PG_VALID_ENCODING(encoding))
+ {
+ const pg_enc2name *p = &pg_enc2name_tbl[encoding];
+
+ Assert(encoding == p->encoding);
+ return p->name;
+ }
+ return "";
+}
diff --git a/src/common/exec.c b/src/common/exec.c
new file mode 100644
index 0000000..f39b0a2
--- /dev/null
+++ b/src/common/exec.c
@@ -0,0 +1,708 @@
+/*-------------------------------------------------------------------------
+ *
+ * exec.c
+ * Functions for finding and validating executable files
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/exec.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <signal.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+/*
+ * Hacky solution to allow expressing both frontend and backend error reports
+ * in one macro call. First argument of log_error is an errcode() call of
+ * some sort (ignored if FRONTEND); the rest are errmsg_internal() arguments,
+ * i.e. message string and any parameters for it.
+ *
+ * Caller must provide the gettext wrapper around the message string, if
+ * appropriate, so that it gets translated in the FRONTEND case; this
+ * motivates using errmsg_internal() not errmsg(). We handle appending a
+ * newline, if needed, inside the macro, so that there's only one translatable
+ * string per call not two.
+ */
+#ifndef FRONTEND
+#define log_error(errcodefn, ...) \
+ ereport(LOG, (errcodefn, errmsg_internal(__VA_ARGS__)))
+#else
+#define log_error(errcodefn, ...) \
+ (fprintf(stderr, __VA_ARGS__), fputc('\n', stderr))
+#endif
+
+#ifdef _MSC_VER
+#define getcwd(cwd,len) GetCurrentDirectory(len, cwd)
+#endif
+
+static int validate_exec(const char *path);
+static int resolve_symlinks(char *path);
+
+#ifdef WIN32
+static BOOL GetTokenUser(HANDLE hToken, PTOKEN_USER *ppTokenUser);
+#endif
+
+/*
+ * validate_exec -- validate "path" as an executable file
+ *
+ * returns 0 if the file is found and no error is encountered.
+ * -1 if the regular file "path" does not exist or cannot be executed.
+ * -2 if the file is otherwise valid but cannot be read.
+ */
+static int
+validate_exec(const char *path)
+{
+ struct stat buf;
+ int is_r;
+ int is_x;
+
+#ifdef WIN32
+ char path_exe[MAXPGPATH + sizeof(".exe") - 1];
+
+ /* Win32 requires a .exe suffix for stat() */
+ if (strlen(path) >= strlen(".exe") &&
+ pg_strcasecmp(path + strlen(path) - strlen(".exe"), ".exe") != 0)
+ {
+ strlcpy(path_exe, path, sizeof(path_exe) - 4);
+ strcat(path_exe, ".exe");
+ path = path_exe;
+ }
+#endif
+
+ /*
+ * Ensure that the file exists and is a regular file.
+ *
+ * XXX if you have a broken system where stat() looks at the symlink
+ * instead of the underlying file, you lose.
+ */
+ if (stat(path, &buf) < 0)
+ return -1;
+
+ if (!S_ISREG(buf.st_mode))
+ return -1;
+
+ /*
+ * Ensure that the file is both executable and readable (required for
+ * dynamic loading).
+ */
+#ifndef WIN32
+ is_r = (access(path, R_OK) == 0);
+ is_x = (access(path, X_OK) == 0);
+#else
+ is_r = buf.st_mode & S_IRUSR;
+ is_x = buf.st_mode & S_IXUSR;
+#endif
+ return is_x ? (is_r ? 0 : -2) : -1;
+}
+
+
+/*
+ * find_my_exec -- find an absolute path to a valid executable
+ *
+ * argv0 is the name passed on the command line
+ * retpath is the output area (must be of size MAXPGPATH)
+ * Returns 0 if OK, -1 if error.
+ *
+ * The reason we have to work so hard to find an absolute path is that
+ * on some platforms we can't do dynamic loading unless we know the
+ * executable's location. Also, we need a full path not a relative
+ * path because we will later change working directory. Finally, we want
+ * a true path not a symlink location, so that we can locate other files
+ * that are part of our installation relative to the executable.
+ */
+int
+find_my_exec(const char *argv0, char *retpath)
+{
+ char cwd[MAXPGPATH],
+ test_path[MAXPGPATH];
+ char *path;
+
+ if (!getcwd(cwd, MAXPGPATH))
+ {
+ log_error(errcode_for_file_access(),
+ _("could not identify current directory: %m"));
+ return -1;
+ }
+
+ /*
+ * If argv0 contains a separator, then PATH wasn't used.
+ */
+ if (first_dir_separator(argv0) != NULL)
+ {
+ if (is_absolute_path(argv0))
+ StrNCpy(retpath, argv0, MAXPGPATH);
+ else
+ join_path_components(retpath, cwd, argv0);
+ canonicalize_path(retpath);
+
+ if (validate_exec(retpath) == 0)
+ return resolve_symlinks(retpath);
+
+ log_error(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ _("invalid binary \"%s\""), retpath);
+ return -1;
+ }
+
+#ifdef WIN32
+ /* Win32 checks the current directory first for names without slashes */
+ join_path_components(retpath, cwd, argv0);
+ if (validate_exec(retpath) == 0)
+ return resolve_symlinks(retpath);
+#endif
+
+ /*
+ * Since no explicit path was supplied, the user must have been relying on
+ * PATH. We'll search the same PATH.
+ */
+ if ((path = getenv("PATH")) && *path)
+ {
+ char *startp = NULL,
+ *endp = NULL;
+
+ do
+ {
+ if (!startp)
+ startp = path;
+ else
+ startp = endp + 1;
+
+ endp = first_path_var_separator(startp);
+ if (!endp)
+ endp = startp + strlen(startp); /* point to end */
+
+ StrNCpy(test_path, startp, Min(endp - startp + 1, MAXPGPATH));
+
+ if (is_absolute_path(test_path))
+ join_path_components(retpath, test_path, argv0);
+ else
+ {
+ join_path_components(retpath, cwd, test_path);
+ join_path_components(retpath, retpath, argv0);
+ }
+ canonicalize_path(retpath);
+
+ switch (validate_exec(retpath))
+ {
+ case 0: /* found ok */
+ return resolve_symlinks(retpath);
+ case -1: /* wasn't even a candidate, keep looking */
+ break;
+ case -2: /* found but disqualified */
+ log_error(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ _("could not read binary \"%s\""),
+ retpath);
+ break;
+ }
+ } while (*endp);
+ }
+
+ log_error(errcode(ERRCODE_UNDEFINED_FILE),
+ _("could not find a \"%s\" to execute"), argv0);
+ return -1;
+}
+
+
+/*
+ * resolve_symlinks - resolve symlinks to the underlying file
+ *
+ * Replace "path" by the absolute path to the referenced file.
+ *
+ * Returns 0 if OK, -1 if error.
+ *
+ * Note: we are not particularly tense about producing nice error messages
+ * because we are not really expecting error here; we just determined that
+ * the symlink does point to a valid executable.
+ */
+static int
+resolve_symlinks(char *path)
+{
+#ifdef HAVE_READLINK
+ struct stat buf;
+ char orig_wd[MAXPGPATH],
+ link_buf[MAXPGPATH];
+ char *fname;
+
+ /*
+ * To resolve a symlink properly, we have to chdir into its directory and
+ * then chdir to where the symlink points; otherwise we may fail to
+ * resolve relative links correctly (consider cases involving mount
+ * points, for example). After following the final symlink, we use
+ * getcwd() to figure out where the heck we're at.
+ *
+ * One might think we could skip all this if path doesn't point to a
+ * symlink to start with, but that's wrong. We also want to get rid of
+ * any directory symlinks that are present in the given path. We expect
+ * getcwd() to give us an accurate, symlink-free path.
+ */
+ if (!getcwd(orig_wd, MAXPGPATH))
+ {
+ log_error(errcode_for_file_access(),
+ _("could not identify current directory: %m"));
+ return -1;
+ }
+
+ for (;;)
+ {
+ char *lsep;
+ int rllen;
+
+ lsep = last_dir_separator(path);
+ if (lsep)
+ {
+ *lsep = '\0';
+ if (chdir(path) == -1)
+ {
+ log_error(errcode_for_file_access(),
+ _("could not change directory to \"%s\": %m"), path);
+ return -1;
+ }
+ fname = lsep + 1;
+ }
+ else
+ fname = path;
+
+ if (lstat(fname, &buf) < 0 ||
+ !S_ISLNK(buf.st_mode))
+ break;
+
+ errno = 0;
+ rllen = readlink(fname, link_buf, sizeof(link_buf));
+ if (rllen < 0 || rllen >= sizeof(link_buf))
+ {
+ log_error(errcode_for_file_access(),
+ _("could not read symbolic link \"%s\": %m"), fname);
+ return -1;
+ }
+ link_buf[rllen] = '\0';
+ strcpy(path, link_buf);
+ }
+
+ /* must copy final component out of 'path' temporarily */
+ strlcpy(link_buf, fname, sizeof(link_buf));
+
+ if (!getcwd(path, MAXPGPATH))
+ {
+ log_error(errcode_for_file_access(),
+ _("could not identify current directory: %m"));
+ return -1;
+ }
+ join_path_components(path, path, link_buf);
+ canonicalize_path(path);
+
+ if (chdir(orig_wd) == -1)
+ {
+ log_error(errcode_for_file_access(),
+ _("could not change directory to \"%s\": %m"), orig_wd);
+ return -1;
+ }
+#endif /* HAVE_READLINK */
+
+ return 0;
+}
+
+
+/*
+ * Find another program in our binary's directory,
+ * then make sure it is the proper version.
+ */
+int
+find_other_exec(const char *argv0, const char *target,
+ const char *versionstr, char *retpath)
+{
+ char cmd[MAXPGPATH];
+ char line[MAXPGPATH];
+
+ if (find_my_exec(argv0, retpath) < 0)
+ return -1;
+
+ /* Trim off program name and keep just directory */
+ *last_dir_separator(retpath) = '\0';
+ canonicalize_path(retpath);
+
+ /* Now append the other program's name */
+ snprintf(retpath + strlen(retpath), MAXPGPATH - strlen(retpath),
+ "/%s%s", target, EXE);
+
+ if (validate_exec(retpath) != 0)
+ return -1;
+
+ snprintf(cmd, sizeof(cmd), "\"%s\" -V", retpath);
+
+ if (!pipe_read_line(cmd, line, sizeof(line)))
+ return -1;
+
+ if (strcmp(line, versionstr) != 0)
+ return -2;
+
+ return 0;
+}
+
+
+/*
+ * Execute a command in a pipe and read the first line from it.
+ */
+char *
+pipe_read_line(char *cmd, char *line, int maxsize)
+{
+ FILE *pgver;
+
+ /* flush output buffers in case popen does not... */
+ fflush(stdout);
+ fflush(stderr);
+
+ errno = 0;
+ if ((pgver = popen(cmd, "r")) == NULL)
+ {
+ perror("popen failure");
+ return NULL;
+ }
+
+ errno = 0;
+ if (fgets(line, maxsize, pgver) == NULL)
+ {
+ if (feof(pgver))
+ fprintf(stderr, "no data was returned by command \"%s\"\n", cmd);
+ else
+ perror("fgets failure");
+ pclose(pgver); /* no error checking */
+ return NULL;
+ }
+
+ if (pclose_check(pgver))
+ return NULL;
+
+ return line;
+}
+
+
+/*
+ * pclose() plus useful error reporting
+ */
+int
+pclose_check(FILE *stream)
+{
+ int exitstatus;
+ char *reason;
+
+ exitstatus = pclose(stream);
+
+ if (exitstatus == 0)
+ return 0; /* all is well */
+
+ if (exitstatus == -1)
+ {
+ /* pclose() itself failed, and hopefully set errno */
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ _("pclose failed: %m"));
+ }
+ else
+ {
+ reason = wait_result_to_str(exitstatus);
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "%s", reason);
+ pfree(reason);
+ }
+ return exitstatus;
+}
+
+/*
+ * set_pglocale_pgservice
+ *
+ * Set application-specific locale and service directory
+ *
+ * This function takes the value of argv[0] rather than a full path.
+ *
+ * (You may be wondering why this is in exec.c. It requires this module's
+ * services and doesn't introduce any new dependencies, so this seems as
+ * good as anyplace.)
+ */
+void
+set_pglocale_pgservice(const char *argv0, const char *app)
+{
+ char path[MAXPGPATH];
+ char my_exec_path[MAXPGPATH];
+ char env_path[MAXPGPATH + sizeof("PGSYSCONFDIR=")]; /* longer than
+ * PGLOCALEDIR */
+ char *dup_path;
+
+ /* don't set LC_ALL in the backend */
+ if (strcmp(app, PG_TEXTDOMAIN("postgres")) != 0)
+ {
+ setlocale(LC_ALL, "");
+
+ /*
+ * One could make a case for reproducing here PostmasterMain()'s test
+ * for whether the process is multithreaded. Unlike the postmaster,
+ * no frontend program calls sigprocmask() or otherwise provides for
+ * mutual exclusion between signal handlers. While frontends using
+ * fork(), if multithreaded, are formally exposed to undefined
+ * behavior, we have not witnessed a concrete bug. Therefore,
+ * complaining about multithreading here may be mere pedantry.
+ */
+ }
+
+ if (find_my_exec(argv0, my_exec_path) < 0)
+ return;
+
+#ifdef ENABLE_NLS
+ get_locale_path(my_exec_path, path);
+ bindtextdomain(app, path);
+ textdomain(app);
+
+ if (getenv("PGLOCALEDIR") == NULL)
+ {
+ /* set for libpq to use */
+ snprintf(env_path, sizeof(env_path), "PGLOCALEDIR=%s", path);
+ canonicalize_path(env_path + 12);
+ dup_path = strdup(env_path);
+ if (dup_path)
+ putenv(dup_path);
+ }
+#endif
+
+ if (getenv("PGSYSCONFDIR") == NULL)
+ {
+ get_etc_path(my_exec_path, path);
+
+ /* set for libpq to use */
+ snprintf(env_path, sizeof(env_path), "PGSYSCONFDIR=%s", path);
+ canonicalize_path(env_path + 13);
+ dup_path = strdup(env_path);
+ if (dup_path)
+ putenv(dup_path);
+ }
+}
+
+#ifdef WIN32
+
+/*
+ * AddUserToTokenDacl(HANDLE hToken)
+ *
+ * This function adds the current user account to the restricted
+ * token used when we create a restricted process.
+ *
+ * This is required because of some security changes in Windows
+ * that appeared in patches to XP/2K3 and in Vista/2008.
+ *
+ * On these machines, the Administrator account is not included in
+ * the default DACL - you just get Administrators + System. For
+ * regular users you get User + System. Because we strip Administrators
+ * when we create the restricted token, we are left with only System
+ * in the DACL which leads to access denied errors for later CreatePipe()
+ * and CreateProcess() calls when running as Administrator.
+ *
+ * This function fixes this problem by modifying the DACL of the
+ * token the process will use, and explicitly re-adding the current
+ * user account. This is still secure because the Administrator account
+ * inherits its privileges from the Administrators group - it doesn't
+ * have any of its own.
+ */
+BOOL
+AddUserToTokenDacl(HANDLE hToken)
+{
+ int i;
+ ACL_SIZE_INFORMATION asi;
+ ACCESS_ALLOWED_ACE *pace;
+ DWORD dwNewAclSize;
+ DWORD dwSize = 0;
+ DWORD dwTokenInfoLength = 0;
+ PACL pacl = NULL;
+ PTOKEN_USER pTokenUser = NULL;
+ TOKEN_DEFAULT_DACL tddNew;
+ TOKEN_DEFAULT_DACL *ptdd = NULL;
+ TOKEN_INFORMATION_CLASS tic = TokenDefaultDacl;
+ BOOL ret = FALSE;
+
+ /* Figure out the buffer size for the DACL info */
+ if (!GetTokenInformation(hToken, tic, (LPVOID) NULL, dwTokenInfoLength, &dwSize))
+ {
+ if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
+ {
+ ptdd = (TOKEN_DEFAULT_DACL *) LocalAlloc(LPTR, dwSize);
+ if (ptdd == NULL)
+ {
+ log_error(errcode(ERRCODE_OUT_OF_MEMORY),
+ _("out of memory"));
+ goto cleanup;
+ }
+
+ if (!GetTokenInformation(hToken, tic, (LPVOID) ptdd, dwSize, &dwSize))
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not get token information: error code %lu",
+ GetLastError());
+ goto cleanup;
+ }
+ }
+ else
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not get token information buffer size: error code %lu",
+ GetLastError());
+ goto cleanup;
+ }
+ }
+
+ /* Get the ACL info */
+ if (!GetAclInformation(ptdd->DefaultDacl, (LPVOID) &asi,
+ (DWORD) sizeof(ACL_SIZE_INFORMATION),
+ AclSizeInformation))
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not get ACL information: error code %lu",
+ GetLastError());
+ goto cleanup;
+ }
+
+ /* Get the current user SID */
+ if (!GetTokenUser(hToken, &pTokenUser))
+ goto cleanup; /* callee printed a message */
+
+ /* Figure out the size of the new ACL */
+ dwNewAclSize = asi.AclBytesInUse + sizeof(ACCESS_ALLOWED_ACE) +
+ GetLengthSid(pTokenUser->User.Sid) - sizeof(DWORD);
+
+ /* Allocate the ACL buffer & initialize it */
+ pacl = (PACL) LocalAlloc(LPTR, dwNewAclSize);
+ if (pacl == NULL)
+ {
+ log_error(errcode(ERRCODE_OUT_OF_MEMORY),
+ _("out of memory"));
+ goto cleanup;
+ }
+
+ if (!InitializeAcl(pacl, dwNewAclSize, ACL_REVISION))
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not initialize ACL: error code %lu", GetLastError());
+ goto cleanup;
+ }
+
+ /* Loop through the existing ACEs, and build the new ACL */
+ for (i = 0; i < (int) asi.AceCount; i++)
+ {
+ if (!GetAce(ptdd->DefaultDacl, i, (LPVOID *) &pace))
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not get ACE: error code %lu", GetLastError());
+ goto cleanup;
+ }
+
+ if (!AddAce(pacl, ACL_REVISION, MAXDWORD, pace, ((PACE_HEADER) pace)->AceSize))
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not add ACE: error code %lu", GetLastError());
+ goto cleanup;
+ }
+ }
+
+ /* Add the new ACE for the current user */
+ if (!AddAccessAllowedAceEx(pacl, ACL_REVISION, OBJECT_INHERIT_ACE, GENERIC_ALL, pTokenUser->User.Sid))
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not add access allowed ACE: error code %lu",
+ GetLastError());
+ goto cleanup;
+ }
+
+ /* Set the new DACL in the token */
+ tddNew.DefaultDacl = pacl;
+
+ if (!SetTokenInformation(hToken, tic, (LPVOID) &tddNew, dwNewAclSize))
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not set token information: error code %lu",
+ GetLastError());
+ goto cleanup;
+ }
+
+ ret = TRUE;
+
+cleanup:
+ if (pTokenUser)
+ LocalFree((HLOCAL) pTokenUser);
+
+ if (pacl)
+ LocalFree((HLOCAL) pacl);
+
+ if (ptdd)
+ LocalFree((HLOCAL) ptdd);
+
+ return ret;
+}
+
+/*
+ * GetTokenUser(HANDLE hToken, PTOKEN_USER *ppTokenUser)
+ *
+ * Get the users token information from a process token.
+ *
+ * The caller of this function is responsible for calling LocalFree() on the
+ * returned TOKEN_USER memory.
+ */
+static BOOL
+GetTokenUser(HANDLE hToken, PTOKEN_USER *ppTokenUser)
+{
+ DWORD dwLength;
+
+ *ppTokenUser = NULL;
+
+ if (!GetTokenInformation(hToken,
+ TokenUser,
+ NULL,
+ 0,
+ &dwLength))
+ {
+ if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
+ {
+ *ppTokenUser = (PTOKEN_USER) LocalAlloc(LPTR, dwLength);
+
+ if (*ppTokenUser == NULL)
+ {
+ log_error(errcode(ERRCODE_OUT_OF_MEMORY),
+ _("out of memory"));
+ return FALSE;
+ }
+ }
+ else
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not get token information buffer size: error code %lu",
+ GetLastError());
+ return FALSE;
+ }
+ }
+
+ if (!GetTokenInformation(hToken,
+ TokenUser,
+ *ppTokenUser,
+ dwLength,
+ &dwLength))
+ {
+ LocalFree(*ppTokenUser);
+ *ppTokenUser = NULL;
+
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not get token information: error code %lu",
+ GetLastError());
+ return FALSE;
+ }
+
+ /* Memory in *ppTokenUser is LocalFree():d by the caller */
+ return TRUE;
+}
+
+#endif
diff --git a/src/common/f2s.c b/src/common/f2s.c
new file mode 100644
index 0000000..463d192
--- /dev/null
+++ b/src/common/f2s.c
@@ -0,0 +1,803 @@
+/*---------------------------------------------------------------------------
+ *
+ * Ryu floating-point output for single precision.
+ *
+ * Portions Copyright (c) 2018-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/f2s.c
+ *
+ * This is a modification of code taken from github.com/ulfjack/ryu under the
+ * terms of the Boost license (not the Apache license). The original copyright
+ * notice follows:
+ *
+ * Copyright 2018 Ulf Adams
+ *
+ * The contents of this file may be used under the terms of the Apache
+ * License, Version 2.0.
+ *
+ * (See accompanying file LICENSE-Apache or copy at
+ * http://www.apache.org/licenses/LICENSE-2.0)
+ *
+ * Alternatively, the contents of this file may be used under the terms of the
+ * Boost Software License, Version 1.0.
+ *
+ * (See accompanying file LICENSE-Boost or copy at
+ * https://www.boost.org/LICENSE_1_0.txt)
+ *
+ * Unless required by applicable law or agreed to in writing, this software is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/shortest_dec.h"
+#include "digit_table.h"
+#include "ryu_common.h"
+
+#define FLOAT_MANTISSA_BITS 23
+#define FLOAT_EXPONENT_BITS 8
+#define FLOAT_BIAS 127
+
+/*
+ * This table is generated (by the upstream) by PrintFloatLookupTable,
+ * and modified (by us) to add UINT64CONST.
+ */
+#define FLOAT_POW5_INV_BITCOUNT 59
+static const uint64 FLOAT_POW5_INV_SPLIT[31] = {
+ UINT64CONST(576460752303423489), UINT64CONST(461168601842738791), UINT64CONST(368934881474191033), UINT64CONST(295147905179352826),
+ UINT64CONST(472236648286964522), UINT64CONST(377789318629571618), UINT64CONST(302231454903657294), UINT64CONST(483570327845851670),
+ UINT64CONST(386856262276681336), UINT64CONST(309485009821345069), UINT64CONST(495176015714152110), UINT64CONST(396140812571321688),
+ UINT64CONST(316912650057057351), UINT64CONST(507060240091291761), UINT64CONST(405648192073033409), UINT64CONST(324518553658426727),
+ UINT64CONST(519229685853482763), UINT64CONST(415383748682786211), UINT64CONST(332306998946228969), UINT64CONST(531691198313966350),
+ UINT64CONST(425352958651173080), UINT64CONST(340282366920938464), UINT64CONST(544451787073501542), UINT64CONST(435561429658801234),
+ UINT64CONST(348449143727040987), UINT64CONST(557518629963265579), UINT64CONST(446014903970612463), UINT64CONST(356811923176489971),
+ UINT64CONST(570899077082383953), UINT64CONST(456719261665907162), UINT64CONST(365375409332725730)
+};
+#define FLOAT_POW5_BITCOUNT 61
+static const uint64 FLOAT_POW5_SPLIT[47] = {
+ UINT64CONST(1152921504606846976), UINT64CONST(1441151880758558720), UINT64CONST(1801439850948198400), UINT64CONST(2251799813685248000),
+ UINT64CONST(1407374883553280000), UINT64CONST(1759218604441600000), UINT64CONST(2199023255552000000), UINT64CONST(1374389534720000000),
+ UINT64CONST(1717986918400000000), UINT64CONST(2147483648000000000), UINT64CONST(1342177280000000000), UINT64CONST(1677721600000000000),
+ UINT64CONST(2097152000000000000), UINT64CONST(1310720000000000000), UINT64CONST(1638400000000000000), UINT64CONST(2048000000000000000),
+ UINT64CONST(1280000000000000000), UINT64CONST(1600000000000000000), UINT64CONST(2000000000000000000), UINT64CONST(1250000000000000000),
+ UINT64CONST(1562500000000000000), UINT64CONST(1953125000000000000), UINT64CONST(1220703125000000000), UINT64CONST(1525878906250000000),
+ UINT64CONST(1907348632812500000), UINT64CONST(1192092895507812500), UINT64CONST(1490116119384765625), UINT64CONST(1862645149230957031),
+ UINT64CONST(1164153218269348144), UINT64CONST(1455191522836685180), UINT64CONST(1818989403545856475), UINT64CONST(2273736754432320594),
+ UINT64CONST(1421085471520200371), UINT64CONST(1776356839400250464), UINT64CONST(2220446049250313080), UINT64CONST(1387778780781445675),
+ UINT64CONST(1734723475976807094), UINT64CONST(2168404344971008868), UINT64CONST(1355252715606880542), UINT64CONST(1694065894508600678),
+ UINT64CONST(2117582368135750847), UINT64CONST(1323488980084844279), UINT64CONST(1654361225106055349), UINT64CONST(2067951531382569187),
+ UINT64CONST(1292469707114105741), UINT64CONST(1615587133892632177), UINT64CONST(2019483917365790221)
+};
+
+static inline uint32
+pow5Factor(uint32 value)
+{
+ uint32 count = 0;
+
+ for (;;)
+ {
+ Assert(value != 0);
+ const uint32 q = value / 5;
+ const uint32 r = value % 5;
+
+ if (r != 0)
+ break;
+
+ value = q;
+ ++count;
+ }
+ return count;
+}
+
+/* Returns true if value is divisible by 5^p. */
+static inline bool
+multipleOfPowerOf5(const uint32 value, const uint32 p)
+{
+ return pow5Factor(value) >= p;
+}
+
+/* Returns true if value is divisible by 2^p. */
+static inline bool
+multipleOfPowerOf2(const uint32 value, const uint32 p)
+{
+ /* return __builtin_ctz(value) >= p; */
+ return (value & ((1u << p) - 1)) == 0;
+}
+
+/*
+ * It seems to be slightly faster to avoid uint128_t here, although the
+ * generated code for uint128_t looks slightly nicer.
+ */
+static inline uint32
+mulShift(const uint32 m, const uint64 factor, const int32 shift)
+{
+ /*
+ * The casts here help MSVC to avoid calls to the __allmul library
+ * function.
+ */
+ const uint32 factorLo = (uint32) (factor);
+ const uint32 factorHi = (uint32) (factor >> 32);
+ const uint64 bits0 = (uint64) m * factorLo;
+ const uint64 bits1 = (uint64) m * factorHi;
+
+ Assert(shift > 32);
+
+#ifdef RYU_32_BIT_PLATFORM
+
+ /*
+ * On 32-bit platforms we can avoid a 64-bit shift-right since we only
+ * need the upper 32 bits of the result and the shift value is > 32.
+ */
+ const uint32 bits0Hi = (uint32) (bits0 >> 32);
+ uint32 bits1Lo = (uint32) (bits1);
+ uint32 bits1Hi = (uint32) (bits1 >> 32);
+
+ bits1Lo += bits0Hi;
+ bits1Hi += (bits1Lo < bits0Hi);
+
+ const int32 s = shift - 32;
+
+ return (bits1Hi << (32 - s)) | (bits1Lo >> s);
+
+#else /* RYU_32_BIT_PLATFORM */
+
+ const uint64 sum = (bits0 >> 32) + bits1;
+ const uint64 shiftedSum = sum >> (shift - 32);
+
+ Assert(shiftedSum <= PG_UINT32_MAX);
+ return (uint32) shiftedSum;
+
+#endif /* RYU_32_BIT_PLATFORM */
+}
+
+static inline uint32
+mulPow5InvDivPow2(const uint32 m, const uint32 q, const int32 j)
+{
+ return mulShift(m, FLOAT_POW5_INV_SPLIT[q], j);
+}
+
+static inline uint32
+mulPow5divPow2(const uint32 m, const uint32 i, const int32 j)
+{
+ return mulShift(m, FLOAT_POW5_SPLIT[i], j);
+}
+
+static inline uint32
+decimalLength(const uint32 v)
+{
+ /* Function precondition: v is not a 10-digit number. */
+ /* (9 digits are sufficient for round-tripping.) */
+ Assert(v < 1000000000);
+ if (v >= 100000000)
+ {
+ return 9;
+ }
+ if (v >= 10000000)
+ {
+ return 8;
+ }
+ if (v >= 1000000)
+ {
+ return 7;
+ }
+ if (v >= 100000)
+ {
+ return 6;
+ }
+ if (v >= 10000)
+ {
+ return 5;
+ }
+ if (v >= 1000)
+ {
+ return 4;
+ }
+ if (v >= 100)
+ {
+ return 3;
+ }
+ if (v >= 10)
+ {
+ return 2;
+ }
+ return 1;
+}
+
+/* A floating decimal representing m * 10^e. */
+typedef struct floating_decimal_32
+{
+ uint32 mantissa;
+ int32 exponent;
+} floating_decimal_32;
+
+static inline floating_decimal_32
+f2d(const uint32 ieeeMantissa, const uint32 ieeeExponent)
+{
+ int32 e2;
+ uint32 m2;
+
+ if (ieeeExponent == 0)
+ {
+ /* We subtract 2 so that the bounds computation has 2 additional bits. */
+ e2 = 1 - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2;
+ m2 = ieeeMantissa;
+ }
+ else
+ {
+ e2 = ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2;
+ m2 = (1u << FLOAT_MANTISSA_BITS) | ieeeMantissa;
+ }
+
+#if STRICTLY_SHORTEST
+ const bool even = (m2 & 1) == 0;
+ const bool acceptBounds = even;
+#else
+ const bool acceptBounds = false;
+#endif
+
+ /* Step 2: Determine the interval of legal decimal representations. */
+ const uint32 mv = 4 * m2;
+ const uint32 mp = 4 * m2 + 2;
+
+ /* Implicit bool -> int conversion. True is 1, false is 0. */
+ const uint32 mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
+ const uint32 mm = 4 * m2 - 1 - mmShift;
+
+ /* Step 3: Convert to a decimal power base using 64-bit arithmetic. */
+ uint32 vr,
+ vp,
+ vm;
+ int32 e10;
+ bool vmIsTrailingZeros = false;
+ bool vrIsTrailingZeros = false;
+ uint8 lastRemovedDigit = 0;
+
+ if (e2 >= 0)
+ {
+ const uint32 q = log10Pow2(e2);
+
+ e10 = q;
+
+ const int32 k = FLOAT_POW5_INV_BITCOUNT + pow5bits(q) - 1;
+ const int32 i = -e2 + q + k;
+
+ vr = mulPow5InvDivPow2(mv, q, i);
+ vp = mulPow5InvDivPow2(mp, q, i);
+ vm = mulPow5InvDivPow2(mm, q, i);
+
+ if (q != 0 && (vp - 1) / 10 <= vm / 10)
+ {
+ /*
+ * We need to know one removed digit even if we are not going to
+ * loop below. We could use q = X - 1 above, except that would
+ * require 33 bits for the result, and we've found that 32-bit
+ * arithmetic is faster even on 64-bit machines.
+ */
+ const int32 l = FLOAT_POW5_INV_BITCOUNT + pow5bits(q - 1) - 1;
+
+ lastRemovedDigit = (uint8) (mulPow5InvDivPow2(mv, q - 1, -e2 + q - 1 + l) % 10);
+ }
+ if (q <= 9)
+ {
+ /*
+ * The largest power of 5 that fits in 24 bits is 5^10, but q <= 9
+ * seems to be safe as well.
+ *
+ * Only one of mp, mv, and mm can be a multiple of 5, if any.
+ */
+ if (mv % 5 == 0)
+ {
+ vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
+ }
+ else if (acceptBounds)
+ {
+ vmIsTrailingZeros = multipleOfPowerOf5(mm, q);
+ }
+ else
+ {
+ vp -= multipleOfPowerOf5(mp, q);
+ }
+ }
+ }
+ else
+ {
+ const uint32 q = log10Pow5(-e2);
+
+ e10 = q + e2;
+
+ const int32 i = -e2 - q;
+ const int32 k = pow5bits(i) - FLOAT_POW5_BITCOUNT;
+ int32 j = q - k;
+
+ vr = mulPow5divPow2(mv, i, j);
+ vp = mulPow5divPow2(mp, i, j);
+ vm = mulPow5divPow2(mm, i, j);
+
+ if (q != 0 && (vp - 1) / 10 <= vm / 10)
+ {
+ j = q - 1 - (pow5bits(i + 1) - FLOAT_POW5_BITCOUNT);
+ lastRemovedDigit = (uint8) (mulPow5divPow2(mv, i + 1, j) % 10);
+ }
+ if (q <= 1)
+ {
+ /*
+ * {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q
+ * trailing 0 bits.
+ */
+ /* mv = 4 * m2, so it always has at least two trailing 0 bits. */
+ vrIsTrailingZeros = true;
+ if (acceptBounds)
+ {
+ /*
+ * mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff
+ * mmShift == 1.
+ */
+ vmIsTrailingZeros = mmShift == 1;
+ }
+ else
+ {
+ /*
+ * mp = mv + 2, so it always has at least one trailing 0 bit.
+ */
+ --vp;
+ }
+ }
+ else if (q < 31)
+ {
+ /* TODO(ulfjack):Use a tighter bound here. */
+ vrIsTrailingZeros = multipleOfPowerOf2(mv, q - 1);
+ }
+ }
+
+ /*
+ * Step 4: Find the shortest decimal representation in the interval of
+ * legal representations.
+ */
+ uint32 removed = 0;
+ uint32 output;
+
+ if (vmIsTrailingZeros || vrIsTrailingZeros)
+ {
+ /* General case, which happens rarely (~4.0%). */
+ while (vp / 10 > vm / 10)
+ {
+ vmIsTrailingZeros &= vm - (vm / 10) * 10 == 0;
+ vrIsTrailingZeros &= lastRemovedDigit == 0;
+ lastRemovedDigit = (uint8) (vr % 10);
+ vr /= 10;
+ vp /= 10;
+ vm /= 10;
+ ++removed;
+ }
+ if (vmIsTrailingZeros)
+ {
+ while (vm % 10 == 0)
+ {
+ vrIsTrailingZeros &= lastRemovedDigit == 0;
+ lastRemovedDigit = (uint8) (vr % 10);
+ vr /= 10;
+ vp /= 10;
+ vm /= 10;
+ ++removed;
+ }
+ }
+
+ if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0)
+ {
+ /* Round even if the exact number is .....50..0. */
+ lastRemovedDigit = 4;
+ }
+
+ /*
+ * We need to take vr + 1 if vr is outside bounds or we need to round
+ * up.
+ */
+ output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
+ }
+ else
+ {
+ /*
+ * Specialized for the common case (~96.0%). Percentages below are
+ * relative to this.
+ *
+ * Loop iterations below (approximately): 0: 13.6%, 1: 70.7%, 2:
+ * 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01%
+ */
+ while (vp / 10 > vm / 10)
+ {
+ lastRemovedDigit = (uint8) (vr % 10);
+ vr /= 10;
+ vp /= 10;
+ vm /= 10;
+ ++removed;
+ }
+
+ /*
+ * We need to take vr + 1 if vr is outside bounds or we need to round
+ * up.
+ */
+ output = vr + (vr == vm || lastRemovedDigit >= 5);
+ }
+
+ const int32 exp = e10 + removed;
+
+ floating_decimal_32 fd;
+
+ fd.exponent = exp;
+ fd.mantissa = output;
+ return fd;
+}
+
+static inline int
+to_chars_f(const floating_decimal_32 v, const uint32 olength, char *const result)
+{
+ /* Step 5: Print the decimal representation. */
+ int index = 0;
+
+ uint32 output = v.mantissa;
+ int32 exp = v.exponent;
+
+ /*----
+ * On entry, mantissa * 10^exp is the result to be output.
+ * Caller has already done the - sign if needed.
+ *
+ * We want to insert the point somewhere depending on the output length
+ * and exponent, which might mean adding zeros:
+ *
+ * exp | format
+ * 1+ | ddddddddd000000
+ * 0 | ddddddddd
+ * -1 .. -len+1 | dddddddd.d to d.ddddddddd
+ * -len ... | 0.ddddddddd to 0.000dddddd
+ */
+ uint32 i = 0;
+ int32 nexp = exp + olength;
+
+ if (nexp <= 0)
+ {
+ /* -nexp is number of 0s to add after '.' */
+ Assert(nexp >= -3);
+ /* 0.000ddddd */
+ index = 2 - nexp;
+ /* copy 8 bytes rather than 5 to let compiler optimize */
+ memcpy(result, "0.000000", 8);
+ }
+ else if (exp < 0)
+ {
+ /*
+ * dddd.dddd; leave space at the start and move the '.' in after
+ */
+ index = 1;
+ }
+ else
+ {
+ /*
+ * We can save some code later by pre-filling with zeros. We know that
+ * there can be no more than 6 output digits in this form, otherwise
+ * we would not choose fixed-point output. memset 8 rather than 6
+ * bytes to let the compiler optimize it.
+ */
+ Assert(exp < 6 && exp + olength <= 6);
+ memset(result, '0', 8);
+ }
+
+ while (output >= 10000)
+ {
+ const uint32 c = output - 10000 * (output / 10000);
+ const uint32 c0 = (c % 100) << 1;
+ const uint32 c1 = (c / 100) << 1;
+
+ output /= 10000;
+
+ memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2);
+ memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2);
+ i += 4;
+ }
+ if (output >= 100)
+ {
+ const uint32 c = (output % 100) << 1;
+
+ output /= 100;
+ memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2);
+ i += 2;
+ }
+ if (output >= 10)
+ {
+ const uint32 c = output << 1;
+
+ memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2);
+ }
+ else
+ {
+ result[index] = (char) ('0' + output);
+ }
+
+ if (index == 1)
+ {
+ /*
+ * nexp is 1..6 here, representing the number of digits before the
+ * point. A value of 7+ is not possible because we switch to
+ * scientific notation when the display exponent reaches 6.
+ */
+ Assert(nexp < 7);
+ /* gcc only seems to want to optimize memmove for small 2^n */
+ if (nexp & 4)
+ {
+ memmove(result + index - 1, result + index, 4);
+ index += 4;
+ }
+ if (nexp & 2)
+ {
+ memmove(result + index - 1, result + index, 2);
+ index += 2;
+ }
+ if (nexp & 1)
+ {
+ result[index - 1] = result[index];
+ }
+ result[nexp] = '.';
+ index = olength + 1;
+ }
+ else if (exp >= 0)
+ {
+ /* we supplied the trailing zeros earlier, now just set the length. */
+ index = olength + exp;
+ }
+ else
+ {
+ index = olength + (2 - nexp);
+ }
+
+ return index;
+}
+
+static inline int
+to_chars(const floating_decimal_32 v, const bool sign, char *const result)
+{
+ /* Step 5: Print the decimal representation. */
+ int index = 0;
+
+ uint32 output = v.mantissa;
+ uint32 olength = decimalLength(output);
+ int32 exp = v.exponent + olength - 1;
+
+ if (sign)
+ result[index++] = '-';
+
+ /*
+ * The thresholds for fixed-point output are chosen to match printf
+ * defaults. Beware that both the code of to_chars_f and the value of
+ * FLOAT_SHORTEST_DECIMAL_LEN are sensitive to these thresholds.
+ */
+ if (exp >= -4 && exp < 6)
+ return to_chars_f(v, olength, result + index) + sign;
+
+ /*
+ * If v.exponent is exactly 0, we might have reached here via the small
+ * integer fast path, in which case v.mantissa might contain trailing
+ * (decimal) zeros. For scientific notation we need to move these zeros
+ * into the exponent. (For fixed point this doesn't matter, which is why
+ * we do this here rather than above.)
+ *
+ * Since we already calculated the display exponent (exp) above based on
+ * the old decimal length, that value does not change here. Instead, we
+ * just reduce the display length for each digit removed.
+ *
+ * If we didn't get here via the fast path, the raw exponent will not
+ * usually be 0, and there will be no trailing zeros, so we pay no more
+ * than one div10/multiply extra cost. We claw back half of that by
+ * checking for divisibility by 2 before dividing by 10.
+ */
+ if (v.exponent == 0)
+ {
+ while ((output & 1) == 0)
+ {
+ const uint32 q = output / 10;
+ const uint32 r = output - 10 * q;
+
+ if (r != 0)
+ break;
+ output = q;
+ --olength;
+ }
+ }
+
+ /*----
+ * Print the decimal digits.
+ * The following code is equivalent to:
+ *
+ * for (uint32 i = 0; i < olength - 1; ++i) {
+ * const uint32 c = output % 10; output /= 10;
+ * result[index + olength - i] = (char) ('0' + c);
+ * }
+ * result[index] = '0' + output % 10;
+ */
+ uint32 i = 0;
+
+ while (output >= 10000)
+ {
+ const uint32 c = output - 10000 * (output / 10000);
+ const uint32 c0 = (c % 100) << 1;
+ const uint32 c1 = (c / 100) << 1;
+
+ output /= 10000;
+
+ memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2);
+ memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2);
+ i += 4;
+ }
+ if (output >= 100)
+ {
+ const uint32 c = (output % 100) << 1;
+
+ output /= 100;
+ memcpy(result + index + olength - i - 1, DIGIT_TABLE + c, 2);
+ i += 2;
+ }
+ if (output >= 10)
+ {
+ const uint32 c = output << 1;
+
+ /*
+ * We can't use memcpy here: the decimal dot goes between these two
+ * digits.
+ */
+ result[index + olength - i] = DIGIT_TABLE[c + 1];
+ result[index] = DIGIT_TABLE[c];
+ }
+ else
+ {
+ result[index] = (char) ('0' + output);
+ }
+
+ /* Print decimal point if needed. */
+ if (olength > 1)
+ {
+ result[index + 1] = '.';
+ index += olength + 1;
+ }
+ else
+ {
+ ++index;
+ }
+
+ /* Print the exponent. */
+ result[index++] = 'e';
+ if (exp < 0)
+ {
+ result[index++] = '-';
+ exp = -exp;
+ }
+ else
+ result[index++] = '+';
+
+ memcpy(result + index, DIGIT_TABLE + 2 * exp, 2);
+ index += 2;
+
+ return index;
+}
+
+static inline bool
+f2d_small_int(const uint32 ieeeMantissa,
+ const uint32 ieeeExponent,
+ floating_decimal_32 *v)
+{
+ const int32 e2 = (int32) ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS;
+
+ /*
+ * Avoid using multiple "return false;" here since it tends to provoke the
+ * compiler into inlining multiple copies of f2d, which is undesirable.
+ */
+
+ if (e2 >= -FLOAT_MANTISSA_BITS && e2 <= 0)
+ {
+ /*----
+ * Since 2^23 <= m2 < 2^24 and 0 <= -e2 <= 23:
+ * 1 <= f = m2 / 2^-e2 < 2^24.
+ *
+ * Test if the lower -e2 bits of the significand are 0, i.e. whether
+ * the fraction is 0. We can use ieeeMantissa here, since the implied
+ * 1 bit can never be tested by this; the implied 1 can only be part
+ * of a fraction if e2 < -FLOAT_MANTISSA_BITS which we already
+ * checked. (e.g. 0.5 gives ieeeMantissa == 0 and e2 == -24)
+ */
+ const uint32 mask = (1U << -e2) - 1;
+ const uint32 fraction = ieeeMantissa & mask;
+
+ if (fraction == 0)
+ {
+ /*----
+ * f is an integer in the range [1, 2^24).
+ * Note: mantissa might contain trailing (decimal) 0's.
+ * Note: since 2^24 < 10^9, there is no need to adjust
+ * decimalLength().
+ */
+ const uint32 m2 = (1U << FLOAT_MANTISSA_BITS) | ieeeMantissa;
+
+ v->mantissa = m2 >> -e2;
+ v->exponent = 0;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Store the shortest decimal representation of the given float as an
+ * UNTERMINATED string in the caller's supplied buffer (which must be at least
+ * FLOAT_SHORTEST_DECIMAL_LEN-1 bytes long).
+ *
+ * Returns the number of bytes stored.
+ */
+int
+float_to_shortest_decimal_bufn(float f, char *result)
+{
+ /*
+ * Step 1: Decode the floating-point number, and unify normalized and
+ * subnormal cases.
+ */
+ const uint32 bits = float_to_bits(f);
+
+ /* Decode bits into sign, mantissa, and exponent. */
+ const bool ieeeSign = ((bits >> (FLOAT_MANTISSA_BITS + FLOAT_EXPONENT_BITS)) & 1) != 0;
+ const uint32 ieeeMantissa = bits & ((1u << FLOAT_MANTISSA_BITS) - 1);
+ const uint32 ieeeExponent = (bits >> FLOAT_MANTISSA_BITS) & ((1u << FLOAT_EXPONENT_BITS) - 1);
+
+ /* Case distinction; exit early for the easy cases. */
+ if (ieeeExponent == ((1u << FLOAT_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0))
+ {
+ return copy_special_str(result, ieeeSign, (ieeeExponent != 0), (ieeeMantissa != 0));
+ }
+
+ floating_decimal_32 v;
+ const bool isSmallInt = f2d_small_int(ieeeMantissa, ieeeExponent, &v);
+
+ if (!isSmallInt)
+ {
+ v = f2d(ieeeMantissa, ieeeExponent);
+ }
+
+ return to_chars(v, ieeeSign, result);
+}
+
+/*
+ * Store the shortest decimal representation of the given float as a
+ * null-terminated string in the caller's supplied buffer (which must be at
+ * least FLOAT_SHORTEST_DECIMAL_LEN bytes long).
+ *
+ * Returns the string length.
+ */
+int
+float_to_shortest_decimal_buf(float f, char *result)
+{
+ const int index = float_to_shortest_decimal_bufn(f, result);
+
+ /* Terminate the string. */
+ Assert(index < FLOAT_SHORTEST_DECIMAL_LEN);
+ result[index] = '\0';
+ return index;
+}
+
+/*
+ * Return the shortest decimal representation as a null-terminated palloc'd
+ * string (outside the backend, uses malloc() instead).
+ *
+ * Caller is responsible for freeing the result.
+ */
+char *
+float_to_shortest_decimal(float f)
+{
+ char *const result = (char *) palloc(FLOAT_SHORTEST_DECIMAL_LEN);
+
+ float_to_shortest_decimal_buf(f, result);
+ return result;
+}
diff --git a/src/common/fe_memutils.c b/src/common/fe_memutils.c
new file mode 100644
index 0000000..b027a02
--- /dev/null
+++ b/src/common/fe_memutils.c
@@ -0,0 +1,176 @@
+/*-------------------------------------------------------------------------
+ *
+ * fe_memutils.c
+ * memory management support for frontend code
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/fe_memutils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#error "This file is not expected to be compiled for backend code"
+#endif
+
+#include "postgres_fe.h"
+
+static inline void *
+pg_malloc_internal(size_t size, int flags)
+{
+ void *tmp;
+
+ /* Avoid unportable behavior of malloc(0) */
+ if (size == 0)
+ size = 1;
+ tmp = malloc(size);
+ if (tmp == NULL)
+ {
+ if ((flags & MCXT_ALLOC_NO_OOM) == 0)
+ {
+ fprintf(stderr, _("out of memory\n"));
+ exit(EXIT_FAILURE);
+ }
+ return NULL;
+ }
+
+ if ((flags & MCXT_ALLOC_ZERO) != 0)
+ MemSet(tmp, 0, size);
+ return tmp;
+}
+
+void *
+pg_malloc(size_t size)
+{
+ return pg_malloc_internal(size, 0);
+}
+
+void *
+pg_malloc0(size_t size)
+{
+ return pg_malloc_internal(size, MCXT_ALLOC_ZERO);
+}
+
+void *
+pg_malloc_extended(size_t size, int flags)
+{
+ return pg_malloc_internal(size, flags);
+}
+
+void *
+pg_realloc(void *ptr, size_t size)
+{
+ void *tmp;
+
+ /* Avoid unportable behavior of realloc(NULL, 0) */
+ if (ptr == NULL && size == 0)
+ size = 1;
+ tmp = realloc(ptr, size);
+ if (!tmp)
+ {
+ fprintf(stderr, _("out of memory\n"));
+ exit(EXIT_FAILURE);
+ }
+ return tmp;
+}
+
+/*
+ * "Safe" wrapper around strdup().
+ */
+char *
+pg_strdup(const char *in)
+{
+ char *tmp;
+
+ if (!in)
+ {
+ fprintf(stderr,
+ _("cannot duplicate null pointer (internal error)\n"));
+ exit(EXIT_FAILURE);
+ }
+ tmp = strdup(in);
+ if (!tmp)
+ {
+ fprintf(stderr, _("out of memory\n"));
+ exit(EXIT_FAILURE);
+ }
+ return tmp;
+}
+
+void
+pg_free(void *ptr)
+{
+ if (ptr != NULL)
+ free(ptr);
+}
+
+/*
+ * Frontend emulation of backend memory management functions. Useful for
+ * programs that compile backend files.
+ */
+void *
+palloc(Size size)
+{
+ return pg_malloc_internal(size, 0);
+}
+
+void *
+palloc0(Size size)
+{
+ return pg_malloc_internal(size, MCXT_ALLOC_ZERO);
+}
+
+void *
+palloc_extended(Size size, int flags)
+{
+ return pg_malloc_internal(size, flags);
+}
+
+void
+pfree(void *pointer)
+{
+ pg_free(pointer);
+}
+
+char *
+pstrdup(const char *in)
+{
+ return pg_strdup(in);
+}
+
+char *
+pnstrdup(const char *in, Size size)
+{
+ char *tmp;
+ int len;
+
+ if (!in)
+ {
+ fprintf(stderr,
+ _("cannot duplicate null pointer (internal error)\n"));
+ exit(EXIT_FAILURE);
+ }
+
+ len = strnlen(in, size);
+ tmp = malloc(len + 1);
+ if (tmp == NULL)
+ {
+ fprintf(stderr, _("out of memory\n"));
+ exit(EXIT_FAILURE);
+ }
+
+ memcpy(tmp, in, len);
+ tmp[len] = '\0';
+
+ return tmp;
+}
+
+void *
+repalloc(void *pointer, Size size)
+{
+ return pg_realloc(pointer, size);
+}
diff --git a/src/common/file_perm.c b/src/common/file_perm.c
new file mode 100644
index 0000000..96b44dc
--- /dev/null
+++ b/src/common/file_perm.c
@@ -0,0 +1,91 @@
+/*-------------------------------------------------------------------------
+ *
+ * File and directory permission routines
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/common/file_perm.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "common/file_perm.h"
+
+/* Modes for creating directories and files in the data directory */
+int pg_dir_create_mode = PG_DIR_MODE_OWNER;
+int pg_file_create_mode = PG_FILE_MODE_OWNER;
+
+/*
+ * Mode mask to pass to umask(). This is more of a preventative measure since
+ * all file/directory creates should be performed using the create modes above.
+ */
+int pg_mode_mask = PG_MODE_MASK_OWNER;
+
+/*
+ * Set create modes and mask to use when writing to PGDATA based on the data
+ * directory mode passed. If group read/execute are present in the mode, then
+ * create modes and mask will be relaxed to allow group read/execute on all
+ * newly created files and directories.
+ */
+void
+SetDataDirectoryCreatePerm(int dataDirMode)
+{
+ /* If the data directory mode has group access */
+ if ((PG_DIR_MODE_GROUP & dataDirMode) == PG_DIR_MODE_GROUP)
+ {
+ pg_dir_create_mode = PG_DIR_MODE_GROUP;
+ pg_file_create_mode = PG_FILE_MODE_GROUP;
+ pg_mode_mask = PG_MODE_MASK_GROUP;
+ }
+ /* Else use default permissions */
+ else
+ {
+ pg_dir_create_mode = PG_DIR_MODE_OWNER;
+ pg_file_create_mode = PG_FILE_MODE_OWNER;
+ pg_mode_mask = PG_MODE_MASK_OWNER;
+ }
+}
+
+#ifdef FRONTEND
+
+/*
+ * Get the create modes and mask to use when writing to PGDATA by examining the
+ * mode of the PGDATA directory and calling SetDataDirectoryCreatePerm().
+ *
+ * Errors are not handled here and should be reported by the application when
+ * false is returned.
+ *
+ * Suppress when on Windows, because there may not be proper support for Unix-y
+ * file permissions.
+ */
+bool
+GetDataDirectoryCreatePerm(const char *dataDir)
+{
+#if !defined(WIN32) && !defined(__CYGWIN__)
+ struct stat statBuf;
+
+ /*
+ * If an error occurs getting the mode then return false. The caller is
+ * responsible for generating an error, if appropriate, indicating that we
+ * were unable to access the data directory.
+ */
+ if (stat(dataDir, &statBuf) == -1)
+ return false;
+
+ /* Set permissions */
+ SetDataDirectoryCreatePerm(statBuf.st_mode);
+ return true;
+#else /* !defined(WIN32) && !defined(__CYGWIN__) */
+ /*
+ * On Windows, we don't have anything to do here since they don't have
+ * Unix-y permissions.
+ */
+ return true;
+#endif
+}
+
+
+#endif /* FRONTEND */
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
new file mode 100644
index 0000000..7584c1f
--- /dev/null
+++ b/src/common/file_utils.c
@@ -0,0 +1,391 @@
+/*-------------------------------------------------------------------------
+ *
+ * File-processing utility routines.
+ *
+ * Assorted utility functions to work on files.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/common/file_utils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres_fe.h"
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "common/file_utils.h"
+#include "common/logging.h"
+
+
+/* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
+#if defined(HAVE_SYNC_FILE_RANGE)
+#define PG_FLUSH_DATA_WORKS 1
+#elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
+#define PG_FLUSH_DATA_WORKS 1
+#endif
+
+/*
+ * pg_xlog has been renamed to pg_wal in version 10.
+ */
+#define MINIMUM_VERSION_FOR_PG_WAL 100000
+
+#ifdef PG_FLUSH_DATA_WORKS
+static int pre_sync_fname(const char *fname, bool isdir);
+#endif
+static void walkdir(const char *path,
+ int (*action) (const char *fname, bool isdir),
+ bool process_symlinks);
+
+/*
+ * Issue fsync recursively on PGDATA and all its contents.
+ *
+ * We fsync regular files and directories wherever they are, but we follow
+ * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
+ * Other symlinks are presumed to point at files we're not responsible for
+ * fsyncing, and might not have privileges to write at all.
+ *
+ * serverVersion indicates the version of the server to be fsync'd.
+ */
+void
+fsync_pgdata(const char *pg_data,
+ int serverVersion)
+{
+ bool xlog_is_symlink;
+ char pg_wal[MAXPGPATH];
+ char pg_tblspc[MAXPGPATH];
+
+ /* handle renaming of pg_xlog to pg_wal in post-10 clusters */
+ snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data,
+ serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal");
+ snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data);
+
+ /*
+ * If pg_wal is a symlink, we'll need to recurse into it separately,
+ * because the first walkdir below will ignore it.
+ */
+ xlog_is_symlink = false;
+
+#ifndef WIN32
+ {
+ struct stat st;
+
+ if (lstat(pg_wal, &st) < 0)
+ pg_log_error("could not stat file \"%s\": %m", pg_wal);
+ else if (S_ISLNK(st.st_mode))
+ xlog_is_symlink = true;
+ }
+#else
+ if (pgwin32_is_junction(pg_wal))
+ xlog_is_symlink = true;
+#endif
+
+ /*
+ * If possible, hint to the kernel that we're soon going to fsync the data
+ * directory and its contents.
+ */
+#ifdef PG_FLUSH_DATA_WORKS
+ walkdir(pg_data, pre_sync_fname, false);
+ if (xlog_is_symlink)
+ walkdir(pg_wal, pre_sync_fname, false);
+ walkdir(pg_tblspc, pre_sync_fname, true);
+#endif
+
+ /*
+ * Now we do the fsync()s in the same order.
+ *
+ * The main call ignores symlinks, so in addition to specially processing
+ * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
+ * process_symlinks = true. Note that if there are any plain directories
+ * in pg_tblspc, they'll get fsync'd twice. That's not an expected case
+ * so we don't worry about optimizing it.
+ */
+ walkdir(pg_data, fsync_fname, false);
+ if (xlog_is_symlink)
+ walkdir(pg_wal, fsync_fname, false);
+ walkdir(pg_tblspc, fsync_fname, true);
+}
+
+/*
+ * Issue fsync recursively on the given directory and all its contents.
+ *
+ * This is a convenient wrapper on top of walkdir().
+ */
+void
+fsync_dir_recurse(const char *dir)
+{
+ /*
+ * If possible, hint to the kernel that we're soon going to fsync the data
+ * directory and its contents.
+ */
+#ifdef PG_FLUSH_DATA_WORKS
+ walkdir(dir, pre_sync_fname, false);
+#endif
+
+ walkdir(dir, fsync_fname, false);
+}
+
+/*
+ * walkdir: recursively walk a directory, applying the action to each
+ * regular file and directory (including the named directory itself).
+ *
+ * If process_symlinks is true, the action and recursion are also applied
+ * to regular files and directories that are pointed to by symlinks in the
+ * given directory; otherwise symlinks are ignored. Symlinks are always
+ * ignored in subdirectories, ie we intentionally don't pass down the
+ * process_symlinks flag to recursive calls.
+ *
+ * Errors are reported but not considered fatal.
+ *
+ * See also walkdir in fd.c, which is a backend version of this logic.
+ */
+static void
+walkdir(const char *path,
+ int (*action) (const char *fname, bool isdir),
+ bool process_symlinks)
+{
+ DIR *dir;
+ struct dirent *de;
+
+ dir = opendir(path);
+ if (dir == NULL)
+ {
+ pg_log_error("could not open directory \"%s\": %m", path);
+ return;
+ }
+
+ while (errno = 0, (de = readdir(dir)) != NULL)
+ {
+ char subpath[MAXPGPATH * 2];
+ struct stat fst;
+ int sret;
+
+ if (strcmp(de->d_name, ".") == 0 ||
+ strcmp(de->d_name, "..") == 0)
+ continue;
+
+ snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
+
+ if (process_symlinks)
+ sret = stat(subpath, &fst);
+ else
+ sret = lstat(subpath, &fst);
+
+ if (sret < 0)
+ {
+ pg_log_error("could not stat file \"%s\": %m", subpath);
+ continue;
+ }
+
+ if (S_ISREG(fst.st_mode))
+ (*action) (subpath, false);
+ else if (S_ISDIR(fst.st_mode))
+ walkdir(subpath, action, false);
+ }
+
+ if (errno)
+ pg_log_error("could not read directory \"%s\": %m", path);
+
+ (void) closedir(dir);
+
+ /*
+ * It's important to fsync the destination directory itself as individual
+ * file fsyncs don't guarantee that the directory entry for the file is
+ * synced. Recent versions of ext4 have made the window much wider but
+ * it's been an issue for ext3 and other filesystems in the past.
+ */
+ (*action) (path, true);
+}
+
+/*
+ * Hint to the OS that it should get ready to fsync() this file.
+ *
+ * Ignores errors trying to open unreadable files, and reports other errors
+ * non-fatally.
+ */
+#ifdef PG_FLUSH_DATA_WORKS
+
+static int
+pre_sync_fname(const char *fname, bool isdir)
+{
+ int fd;
+
+ fd = open(fname, O_RDONLY | PG_BINARY, 0);
+
+ if (fd < 0)
+ {
+ if (errno == EACCES || (isdir && errno == EISDIR))
+ return 0;
+ pg_log_error("could not open file \"%s\": %m", fname);
+ return -1;
+ }
+
+ /*
+ * We do what pg_flush_data() would do in the backend: prefer to use
+ * sync_file_range, but fall back to posix_fadvise. We ignore errors
+ * because this is only a hint.
+ */
+#if defined(HAVE_SYNC_FILE_RANGE)
+ (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE);
+#elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
+ (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
+#else
+#error PG_FLUSH_DATA_WORKS should not have been defined
+#endif
+
+ (void) close(fd);
+ return 0;
+}
+
+#endif /* PG_FLUSH_DATA_WORKS */
+
+/*
+ * fsync_fname -- Try to fsync a file or directory
+ *
+ * Ignores errors trying to open unreadable files, or trying to fsync
+ * directories on systems where that isn't allowed/required. All other errors
+ * are fatal.
+ */
+int
+fsync_fname(const char *fname, bool isdir)
+{
+ int fd;
+ int flags;
+ int returncode;
+
+ /*
+ * Some OSs require directories to be opened read-only whereas other
+ * systems don't allow us to fsync files opened read-only; so we need both
+ * cases here. Using O_RDWR will cause us to fail to fsync files that are
+ * not writable by our userid, but we assume that's OK.
+ */
+ flags = PG_BINARY;
+ if (!isdir)
+ flags |= O_RDWR;
+ else
+ flags |= O_RDONLY;
+
+ /*
+ * Open the file, silently ignoring errors about unreadable files (or
+ * unsupported operations, e.g. opening a directory under Windows), and
+ * logging others.
+ */
+ fd = open(fname, flags, 0);
+ if (fd < 0)
+ {
+ if (errno == EACCES || (isdir && errno == EISDIR))
+ return 0;
+ pg_log_error("could not open file \"%s\": %m", fname);
+ return -1;
+ }
+
+ returncode = fsync(fd);
+
+ /*
+ * Some OSes don't allow us to fsync directories at all, so we can ignore
+ * those errors. Anything else needs to be reported.
+ */
+ if (returncode != 0 && !(isdir && (errno == EBADF || errno == EINVAL)))
+ {
+ pg_log_fatal("could not fsync file \"%s\": %m", fname);
+ (void) close(fd);
+ exit(EXIT_FAILURE);
+ }
+
+ (void) close(fd);
+ return 0;
+}
+
+/*
+ * fsync_parent_path -- fsync the parent path of a file or directory
+ *
+ * This is aimed at making file operations persistent on disk in case of
+ * an OS crash or power failure.
+ */
+int
+fsync_parent_path(const char *fname)
+{
+ char parentpath[MAXPGPATH];
+
+ strlcpy(parentpath, fname, MAXPGPATH);
+ get_parent_directory(parentpath);
+
+ /*
+ * get_parent_directory() returns an empty string if the input argument is
+ * just a file name (see comments in path.c), so handle that as being the
+ * current directory.
+ */
+ if (strlen(parentpath) == 0)
+ strlcpy(parentpath, ".", MAXPGPATH);
+
+ if (fsync_fname(parentpath, true) != 0)
+ return -1;
+
+ return 0;
+}
+
+/*
+ * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability
+ *
+ * Wrapper around rename, similar to the backend version.
+ */
+int
+durable_rename(const char *oldfile, const char *newfile)
+{
+ int fd;
+
+ /*
+ * First fsync the old and target path (if it exists), to ensure that they
+ * are properly persistent on disk. Syncing the target file is not
+ * strictly necessary, but it makes it easier to reason about crashes;
+ * because it's then guaranteed that either source or target file exists
+ * after a crash.
+ */
+ if (fsync_fname(oldfile, false) != 0)
+ return -1;
+
+ fd = open(newfile, PG_BINARY | O_RDWR, 0);
+ if (fd < 0)
+ {
+ if (errno != ENOENT)
+ {
+ pg_log_error("could not open file \"%s\": %m", newfile);
+ return -1;
+ }
+ }
+ else
+ {
+ if (fsync(fd) != 0)
+ {
+ pg_log_fatal("could not fsync file \"%s\": %m", newfile);
+ close(fd);
+ exit(EXIT_FAILURE);
+ }
+ close(fd);
+ }
+
+ /* Time to do the real deal... */
+ if (rename(oldfile, newfile) != 0)
+ {
+ pg_log_error("could not rename file \"%s\" to \"%s\": %m",
+ oldfile, newfile);
+ return -1;
+ }
+
+ /*
+ * To guarantee renaming the file is persistent, fsync the file with its
+ * new name, and its containing directory.
+ */
+ if (fsync_fname(newfile, false) != 0)
+ return -1;
+
+ if (fsync_parent_path(newfile) != 0)
+ return -1;
+
+ return 0;
+}
diff --git a/src/common/hashfn.c b/src/common/hashfn.c
new file mode 100644
index 0000000..990f18e
--- /dev/null
+++ b/src/common/hashfn.c
@@ -0,0 +1,692 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashfn.c
+ * Generic hashing functions, and hash functions for use in dynahash.c
+ * hashtables
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/hashfn.c
+ *
+ * NOTES
+ * It is expected that every bit of a hash function's 32-bit result is
+ * as random as every other; failure to ensure this is likely to lead
+ * to poor performance of hash tables. In most cases a hash
+ * function should use hash_bytes() or its variant hash_bytes_uint32(),
+ * or the wrappers hash_any() and hash_uint32 defined in hashfn.h.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "common/hashfn.h"
+
+
+/*
+ * This hash function was written by Bob Jenkins
+ * (bob_jenkins@burtleburtle.net), and superficially adapted
+ * for PostgreSQL by Neil Conway. For more information on this
+ * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
+ * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
+ *
+ * In the current code, we have adopted Bob's 2006 update of his hash
+ * function to fetch the data a word at a time when it is suitably aligned.
+ * This makes for a useful speedup, at the cost of having to maintain
+ * four code paths (aligned vs unaligned, and little-endian vs big-endian).
+ * It also uses two separate mixing functions mix() and final(), instead
+ * of a slower multi-purpose function.
+ */
+
+/* Get a bit mask of the bits set in non-uint32 aligned addresses */
+#define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
+
+/* Rotate a uint32 value left by k bits - note multiple evaluation! */
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+/*----------
+ * mix -- mix 3 32-bit values reversibly.
+ *
+ * This is reversible, so any information in (a,b,c) before mix() is
+ * still in (a,b,c) after mix().
+ *
+ * If four pairs of (a,b,c) inputs are run through mix(), or through
+ * mix() in reverse, there are at least 32 bits of the output that
+ * are sometimes the same for one pair and different for another pair.
+ * This was tested for:
+ * * pairs that differed by one bit, by two bits, in any combination
+ * of top bits of (a,b,c), or in any combination of bottom bits of
+ * (a,b,c).
+ * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ * is commonly produced by subtraction) look like a single 1-bit
+ * difference.
+ * * the base values were pseudorandom, all zero but one bit set, or
+ * all zero plus a counter that starts at zero.
+ *
+ * This does not achieve avalanche. There are input bits of (a,b,c)
+ * that fail to affect some output bits of (a,b,c), especially of a. The
+ * most thoroughly mixed value is c, but it doesn't really even achieve
+ * avalanche in c.
+ *
+ * This allows some parallelism. Read-after-writes are good at doubling
+ * the number of bits affected, so the goal of mixing pulls in the opposite
+ * direction from the goal of parallelism. I did what I could. Rotates
+ * seem to cost as much as shifts on every machine I could lay my hands on,
+ * and rotates are much kinder to the top and bottom bits, so I used rotates.
+ *----------
+ */
+#define mix(a,b,c) \
+{ \
+ a -= c; a ^= rot(c, 4); c += b; \
+ b -= a; b ^= rot(a, 6); a += c; \
+ c -= b; c ^= rot(b, 8); b += a; \
+ a -= c; a ^= rot(c,16); c += b; \
+ b -= a; b ^= rot(a,19); a += c; \
+ c -= b; c ^= rot(b, 4); b += a; \
+}
+
+/*----------
+ * final -- final mixing of 3 32-bit values (a,b,c) into c
+ *
+ * Pairs of (a,b,c) values differing in only a few bits will usually
+ * produce values of c that look totally different. This was tested for
+ * * pairs that differed by one bit, by two bits, in any combination
+ * of top bits of (a,b,c), or in any combination of bottom bits of
+ * (a,b,c).
+ * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ * is commonly produced by subtraction) look like a single 1-bit
+ * difference.
+ * * the base values were pseudorandom, all zero but one bit set, or
+ * all zero plus a counter that starts at zero.
+ *
+ * The use of separate functions for mix() and final() allow for a
+ * substantial performance increase since final() does not need to
+ * do well in reverse, but is does need to affect all output bits.
+ * mix(), on the other hand, does not need to affect all output
+ * bits (affecting 32 bits is enough). The original hash function had
+ * a single mixing operation that had to satisfy both sets of requirements
+ * and was slower as a result.
+ *----------
+ */
+#define final(a,b,c) \
+{ \
+ c ^= b; c -= rot(b,14); \
+ a ^= c; a -= rot(c,11); \
+ b ^= a; b -= rot(a,25); \
+ c ^= b; c -= rot(b,16); \
+ a ^= c; a -= rot(c, 4); \
+ b ^= a; b -= rot(a,14); \
+ c ^= b; c -= rot(b,24); \
+}
+
+/*
+ * hash_bytes() -- hash a variable-length key into a 32-bit value
+ * k : the key (the unaligned variable-length array of bytes)
+ * len : the length of the key, counting by bytes
+ *
+ * Returns a uint32 value. Every bit of the key affects every bit of
+ * the return value. Every 1-bit and 2-bit delta achieves avalanche.
+ * About 6*len+35 instructions. The best hash table sizes are powers
+ * of 2. There is no need to do mod a prime (mod is sooo slow!).
+ * If you need less than 32 bits, use a bitmask.
+ *
+ * This procedure must never throw elog(ERROR); the ResourceOwner code
+ * relies on this not to fail.
+ *
+ * Note: we could easily change this function to return a 64-bit hash value
+ * by using the final values of both b and c. b is perhaps a little less
+ * well mixed than c, however.
+ */
+uint32
+hash_bytes(const unsigned char *k, int keylen)
+{
+ uint32 a,
+ b,
+ c,
+ len;
+
+ /* Set up the internal state */
+ len = keylen;
+ a = b = c = 0x9e3779b9 + len + 3923095;
+
+ /* If the source pointer is word-aligned, we use word-wide fetches */
+ if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
+ {
+ /* Code path for aligned source data */
+ const uint32 *ka = (const uint32 *) k;
+
+ /* handle most of the key */
+ while (len >= 12)
+ {
+ a += ka[0];
+ b += ka[1];
+ c += ka[2];
+ mix(a, b, c);
+ ka += 3;
+ len -= 12;
+ }
+
+ /* handle the last 11 bytes */
+ k = (const unsigned char *) ka;
+#ifdef WORDS_BIGENDIAN
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 8);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 24);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += ka[1];
+ a += ka[0];
+ break;
+ case 7:
+ b += ((uint32) k[6] << 8);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 16);
+ /* fall through */
+ case 5:
+ b += ((uint32) k[4] << 24);
+ /* fall through */
+ case 4:
+ a += ka[0];
+ break;
+ case 3:
+ a += ((uint32) k[2] << 8);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 16);
+ /* fall through */
+ case 1:
+ a += ((uint32) k[0] << 24);
+ /* case 0: nothing left to add */
+ }
+#else /* !WORDS_BIGENDIAN */
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 24);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 8);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += ka[1];
+ a += ka[0];
+ break;
+ case 7:
+ b += ((uint32) k[6] << 16);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 8);
+ /* fall through */
+ case 5:
+ b += k[4];
+ /* fall through */
+ case 4:
+ a += ka[0];
+ break;
+ case 3:
+ a += ((uint32) k[2] << 16);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 8);
+ /* fall through */
+ case 1:
+ a += k[0];
+ /* case 0: nothing left to add */
+ }
+#endif /* WORDS_BIGENDIAN */
+ }
+ else
+ {
+ /* Code path for non-aligned source data */
+
+ /* handle most of the key */
+ while (len >= 12)
+ {
+#ifdef WORDS_BIGENDIAN
+ a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
+ b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
+ c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
+#else /* !WORDS_BIGENDIAN */
+ a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
+ b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
+ c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
+#endif /* WORDS_BIGENDIAN */
+ mix(a, b, c);
+ k += 12;
+ len -= 12;
+ }
+
+ /* handle the last 11 bytes */
+#ifdef WORDS_BIGENDIAN
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 8);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 24);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += k[7];
+ /* fall through */
+ case 7:
+ b += ((uint32) k[6] << 8);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 16);
+ /* fall through */
+ case 5:
+ b += ((uint32) k[4] << 24);
+ /* fall through */
+ case 4:
+ a += k[3];
+ /* fall through */
+ case 3:
+ a += ((uint32) k[2] << 8);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 16);
+ /* fall through */
+ case 1:
+ a += ((uint32) k[0] << 24);
+ /* case 0: nothing left to add */
+ }
+#else /* !WORDS_BIGENDIAN */
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 24);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 8);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += ((uint32) k[7] << 24);
+ /* fall through */
+ case 7:
+ b += ((uint32) k[6] << 16);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 8);
+ /* fall through */
+ case 5:
+ b += k[4];
+ /* fall through */
+ case 4:
+ a += ((uint32) k[3] << 24);
+ /* fall through */
+ case 3:
+ a += ((uint32) k[2] << 16);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 8);
+ /* fall through */
+ case 1:
+ a += k[0];
+ /* case 0: nothing left to add */
+ }
+#endif /* WORDS_BIGENDIAN */
+ }
+
+ final(a, b, c);
+
+ /* report the result */
+ return c;
+}
+
+/*
+ * hash_bytes_extended() -- hash into a 64-bit value, using an optional seed
+ * k : the key (the unaligned variable-length array of bytes)
+ * len : the length of the key, counting by bytes
+ * seed : a 64-bit seed (0 means no seed)
+ *
+ * Returns a uint64 value. Otherwise similar to hash_bytes.
+ */
+uint64
+hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)
+{
+ uint32 a,
+ b,
+ c,
+ len;
+
+ /* Set up the internal state */
+ len = keylen;
+ a = b = c = 0x9e3779b9 + len + 3923095;
+
+ /* If the seed is non-zero, use it to perturb the internal state. */
+ if (seed != 0)
+ {
+ /*
+ * In essence, the seed is treated as part of the data being hashed,
+ * but for simplicity, we pretend that it's padded with four bytes of
+ * zeroes so that the seed constitutes a 12-byte chunk.
+ */
+ a += (uint32) (seed >> 32);
+ b += (uint32) seed;
+ mix(a, b, c);
+ }
+
+ /* If the source pointer is word-aligned, we use word-wide fetches */
+ if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
+ {
+ /* Code path for aligned source data */
+ const uint32 *ka = (const uint32 *) k;
+
+ /* handle most of the key */
+ while (len >= 12)
+ {
+ a += ka[0];
+ b += ka[1];
+ c += ka[2];
+ mix(a, b, c);
+ ka += 3;
+ len -= 12;
+ }
+
+ /* handle the last 11 bytes */
+ k = (const unsigned char *) ka;
+#ifdef WORDS_BIGENDIAN
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 8);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 24);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += ka[1];
+ a += ka[0];
+ break;
+ case 7:
+ b += ((uint32) k[6] << 8);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 16);
+ /* fall through */
+ case 5:
+ b += ((uint32) k[4] << 24);
+ /* fall through */
+ case 4:
+ a += ka[0];
+ break;
+ case 3:
+ a += ((uint32) k[2] << 8);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 16);
+ /* fall through */
+ case 1:
+ a += ((uint32) k[0] << 24);
+ /* case 0: nothing left to add */
+ }
+#else /* !WORDS_BIGENDIAN */
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 24);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 8);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += ka[1];
+ a += ka[0];
+ break;
+ case 7:
+ b += ((uint32) k[6] << 16);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 8);
+ /* fall through */
+ case 5:
+ b += k[4];
+ /* fall through */
+ case 4:
+ a += ka[0];
+ break;
+ case 3:
+ a += ((uint32) k[2] << 16);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 8);
+ /* fall through */
+ case 1:
+ a += k[0];
+ /* case 0: nothing left to add */
+ }
+#endif /* WORDS_BIGENDIAN */
+ }
+ else
+ {
+ /* Code path for non-aligned source data */
+
+ /* handle most of the key */
+ while (len >= 12)
+ {
+#ifdef WORDS_BIGENDIAN
+ a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
+ b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
+ c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
+#else /* !WORDS_BIGENDIAN */
+ a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
+ b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
+ c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
+#endif /* WORDS_BIGENDIAN */
+ mix(a, b, c);
+ k += 12;
+ len -= 12;
+ }
+
+ /* handle the last 11 bytes */
+#ifdef WORDS_BIGENDIAN
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 8);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 24);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += k[7];
+ /* fall through */
+ case 7:
+ b += ((uint32) k[6] << 8);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 16);
+ /* fall through */
+ case 5:
+ b += ((uint32) k[4] << 24);
+ /* fall through */
+ case 4:
+ a += k[3];
+ /* fall through */
+ case 3:
+ a += ((uint32) k[2] << 8);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 16);
+ /* fall through */
+ case 1:
+ a += ((uint32) k[0] << 24);
+ /* case 0: nothing left to add */
+ }
+#else /* !WORDS_BIGENDIAN */
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 24);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 8);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += ((uint32) k[7] << 24);
+ /* fall through */
+ case 7:
+ b += ((uint32) k[6] << 16);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 8);
+ /* fall through */
+ case 5:
+ b += k[4];
+ /* fall through */
+ case 4:
+ a += ((uint32) k[3] << 24);
+ /* fall through */
+ case 3:
+ a += ((uint32) k[2] << 16);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 8);
+ /* fall through */
+ case 1:
+ a += k[0];
+ /* case 0: nothing left to add */
+ }
+#endif /* WORDS_BIGENDIAN */
+ }
+
+ final(a, b, c);
+
+ /* report the result */
+ return ((uint64) b << 32) | c;
+}
+
+/*
+ * hash_bytes_uint32() -- hash a 32-bit value to a 32-bit value
+ *
+ * This has the same result as
+ * hash_bytes(&k, sizeof(uint32))
+ * but is faster and doesn't force the caller to store k into memory.
+ */
+uint32
+hash_bytes_uint32(uint32 k)
+{
+ uint32 a,
+ b,
+ c;
+
+ a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
+ a += k;
+
+ final(a, b, c);
+
+ /* report the result */
+ return c;
+}
+
+/*
+ * hash_bytes_uint32_extended() -- hash 32-bit value to 64-bit value, with seed
+ *
+ * Like hash_bytes_uint32, this is a convenience function.
+ */
+uint64
+hash_bytes_uint32_extended(uint32 k, uint64 seed)
+{
+ uint32 a,
+ b,
+ c;
+
+ a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
+
+ if (seed != 0)
+ {
+ a += (uint32) (seed >> 32);
+ b += (uint32) seed;
+ mix(a, b, c);
+ }
+
+ a += k;
+
+ final(a, b, c);
+
+ /* report the result */
+ return ((uint64) b << 32) | c;
+}
+
+/*
+ * string_hash: hash function for keys that are NUL-terminated strings.
+ *
+ * NOTE: this is the default hash function if none is specified.
+ */
+uint32
+string_hash(const void *key, Size keysize)
+{
+ /*
+ * If the string exceeds keysize-1 bytes, we want to hash only that many,
+ * because when it is copied into the hash table it will be truncated at
+ * that length.
+ */
+ Size s_len = strlen((const char *) key);
+
+ s_len = Min(s_len, keysize - 1);
+ return hash_bytes((const unsigned char *) key, (int) s_len);
+}
+
+/*
+ * tag_hash: hash function for fixed-size tag values
+ */
+uint32
+tag_hash(const void *key, Size keysize)
+{
+ return hash_bytes((const unsigned char *) key, (int) keysize);
+}
+
+/*
+ * uint32_hash: hash function for keys that are uint32 or int32
+ *
+ * (tag_hash works for this case too, but is slower)
+ */
+uint32
+uint32_hash(const void *key, Size keysize)
+{
+ Assert(keysize == sizeof(uint32));
+ return hash_bytes_uint32(*((const uint32 *) key));
+}
diff --git a/src/common/ip.c b/src/common/ip.c
new file mode 100644
index 0000000..69fcca8
--- /dev/null
+++ b/src/common/ip.c
@@ -0,0 +1,259 @@
+/*-------------------------------------------------------------------------
+ *
+ * ip.c
+ * IPv6-aware network access.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/ip.c
+ *
+ * This file and the IPV6 implementation were initially provided by
+ * Nigel Kukard <nkukard@lbsd.net>, Linux Based Systems Design
+ * http://www.lbsd.net.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#ifdef HAVE_NETINET_TCP_H
+#include <netinet/tcp.h>
+#endif
+#include <arpa/inet.h>
+#include <sys/file.h>
+
+#include "common/ip.h"
+
+
+
+#ifdef HAVE_UNIX_SOCKETS
+static int getaddrinfo_unix(const char *path,
+ const struct addrinfo *hintsp,
+ struct addrinfo **result);
+
+static int getnameinfo_unix(const struct sockaddr_un *sa, int salen,
+ char *node, int nodelen,
+ char *service, int servicelen,
+ int flags);
+#endif
+
+
+/*
+ * pg_getaddrinfo_all - get address info for Unix, IPv4 and IPv6 sockets
+ */
+int
+pg_getaddrinfo_all(const char *hostname, const char *servname,
+ const struct addrinfo *hintp, struct addrinfo **result)
+{
+ int rc;
+
+ /* not all versions of getaddrinfo() zero *result on failure */
+ *result = NULL;
+
+#ifdef HAVE_UNIX_SOCKETS
+ if (hintp->ai_family == AF_UNIX)
+ return getaddrinfo_unix(servname, hintp, result);
+#endif
+
+ /* NULL has special meaning to getaddrinfo(). */
+ rc = getaddrinfo((!hostname || hostname[0] == '\0') ? NULL : hostname,
+ servname, hintp, result);
+
+ return rc;
+}
+
+
+/*
+ * pg_freeaddrinfo_all - free addrinfo structures for IPv4, IPv6, or Unix
+ *
+ * Note: the ai_family field of the original hint structure must be passed
+ * so that we can tell whether the addrinfo struct was built by the system's
+ * getaddrinfo() routine or our own getaddrinfo_unix() routine. Some versions
+ * of getaddrinfo() might be willing to return AF_UNIX addresses, so it's
+ * not safe to look at ai_family in the addrinfo itself.
+ */
+void
+pg_freeaddrinfo_all(int hint_ai_family, struct addrinfo *ai)
+{
+#ifdef HAVE_UNIX_SOCKETS
+ if (hint_ai_family == AF_UNIX)
+ {
+ /* struct was built by getaddrinfo_unix (see pg_getaddrinfo_all) */
+ while (ai != NULL)
+ {
+ struct addrinfo *p = ai;
+
+ ai = ai->ai_next;
+ free(p->ai_addr);
+ free(p);
+ }
+ }
+ else
+#endif /* HAVE_UNIX_SOCKETS */
+ {
+ /* struct was built by getaddrinfo() */
+ if (ai != NULL)
+ freeaddrinfo(ai);
+ }
+}
+
+
+/*
+ * pg_getnameinfo_all - get name info for Unix, IPv4 and IPv6 sockets
+ *
+ * The API of this routine differs from the standard getnameinfo() definition
+ * in two ways: first, the addr parameter is declared as sockaddr_storage
+ * rather than struct sockaddr, and second, the node and service fields are
+ * guaranteed to be filled with something even on failure return.
+ */
+int
+pg_getnameinfo_all(const struct sockaddr_storage *addr, int salen,
+ char *node, int nodelen,
+ char *service, int servicelen,
+ int flags)
+{
+ int rc;
+
+#ifdef HAVE_UNIX_SOCKETS
+ if (addr && addr->ss_family == AF_UNIX)
+ rc = getnameinfo_unix((const struct sockaddr_un *) addr, salen,
+ node, nodelen,
+ service, servicelen,
+ flags);
+ else
+#endif
+ rc = getnameinfo((const struct sockaddr *) addr, salen,
+ node, nodelen,
+ service, servicelen,
+ flags);
+
+ if (rc != 0)
+ {
+ if (node)
+ strlcpy(node, "???", nodelen);
+ if (service)
+ strlcpy(service, "???", servicelen);
+ }
+
+ return rc;
+}
+
+
+#if defined(HAVE_UNIX_SOCKETS)
+
+/* -------
+ * getaddrinfo_unix - get unix socket info using IPv6-compatible API
+ *
+ * Bugs: only one addrinfo is set even though hintsp is NULL or
+ * ai_socktype is 0
+ * AI_CANONNAME is not supported.
+ * -------
+ */
+static int
+getaddrinfo_unix(const char *path, const struct addrinfo *hintsp,
+ struct addrinfo **result)
+{
+ struct addrinfo hints;
+ struct addrinfo *aip;
+ struct sockaddr_un *unp;
+
+ *result = NULL;
+
+ MemSet(&hints, 0, sizeof(hints));
+
+ if (strlen(path) >= sizeof(unp->sun_path))
+ return EAI_FAIL;
+
+ if (hintsp == NULL)
+ {
+ hints.ai_family = AF_UNIX;
+ hints.ai_socktype = SOCK_STREAM;
+ }
+ else
+ memcpy(&hints, hintsp, sizeof(hints));
+
+ if (hints.ai_socktype == 0)
+ hints.ai_socktype = SOCK_STREAM;
+
+ if (hints.ai_family != AF_UNIX)
+ {
+ /* shouldn't have been called */
+ return EAI_FAIL;
+ }
+
+ aip = calloc(1, sizeof(struct addrinfo));
+ if (aip == NULL)
+ return EAI_MEMORY;
+
+ unp = calloc(1, sizeof(struct sockaddr_un));
+ if (unp == NULL)
+ {
+ free(aip);
+ return EAI_MEMORY;
+ }
+
+ aip->ai_family = AF_UNIX;
+ aip->ai_socktype = hints.ai_socktype;
+ aip->ai_protocol = hints.ai_protocol;
+ aip->ai_next = NULL;
+ aip->ai_canonname = NULL;
+ *result = aip;
+
+ unp->sun_family = AF_UNIX;
+ aip->ai_addr = (struct sockaddr *) unp;
+ aip->ai_addrlen = sizeof(struct sockaddr_un);
+
+ strcpy(unp->sun_path, path);
+
+#ifdef HAVE_STRUCT_SOCKADDR_STORAGE_SS_LEN
+ unp->sun_len = sizeof(struct sockaddr_un);
+#endif
+
+ return 0;
+}
+
+/*
+ * Convert an address to a hostname.
+ */
+static int
+getnameinfo_unix(const struct sockaddr_un *sa, int salen,
+ char *node, int nodelen,
+ char *service, int servicelen,
+ int flags)
+{
+ int ret;
+
+ /* Invalid arguments. */
+ if (sa == NULL || sa->sun_family != AF_UNIX ||
+ (node == NULL && service == NULL))
+ return EAI_FAIL;
+
+ if (node)
+ {
+ ret = snprintf(node, nodelen, "%s", "[local]");
+ if (ret < 0 || ret >= nodelen)
+ return EAI_MEMORY;
+ }
+
+ if (service)
+ {
+ ret = snprintf(service, servicelen, "%s", sa->sun_path);
+ if (ret < 0 || ret >= servicelen)
+ return EAI_MEMORY;
+ }
+
+ return 0;
+}
+#endif /* HAVE_UNIX_SOCKETS */
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
new file mode 100644
index 0000000..6fe17a3
--- /dev/null
+++ b/src/common/jsonapi.c
@@ -0,0 +1,1132 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonapi.c
+ * JSON parser and lexer interfaces
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/jsonapi.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/jsonapi.h"
+#include "mb/pg_wchar.h"
+
+#ifdef FRONTEND
+#include "common/logging.h"
+#else
+#include "miscadmin.h"
+#endif
+
+#ifdef FRONTEND
+#define check_stack_depth()
+#define json_log_and_abort(...) \
+ do { pg_log_fatal(__VA_ARGS__); exit(1); } while(0)
+#else
+#define json_log_and_abort(...) elog(ERROR, __VA_ARGS__)
+#endif
+
+/*
+ * The context of the parser is maintained by the recursive descent
+ * mechanism, but is passed explicitly to the error reporting routine
+ * for better diagnostics.
+ */
+typedef enum /* contexts of JSON parser */
+{
+ JSON_PARSE_VALUE, /* expecting a value */
+ JSON_PARSE_STRING, /* expecting a string (for a field name) */
+ JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */
+ JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */
+ JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */
+ JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */
+ JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */
+ JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */
+ JSON_PARSE_END /* saw the end of a document, expect nothing */
+} JsonParseContext;
+
+static inline JsonParseErrorType json_lex_string(JsonLexContext *lex);
+static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, char *s,
+ bool *num_err, int *total_len);
+static inline JsonParseErrorType parse_scalar(JsonLexContext *lex, JsonSemAction *sem);
+static JsonParseErrorType parse_object_field(JsonLexContext *lex, JsonSemAction *sem);
+static JsonParseErrorType parse_object(JsonLexContext *lex, JsonSemAction *sem);
+static JsonParseErrorType parse_array_element(JsonLexContext *lex, JsonSemAction *sem);
+static JsonParseErrorType parse_array(JsonLexContext *lex, JsonSemAction *sem);
+static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
+static char *extract_token(JsonLexContext *lex);
+
+/* the null action object used for pure validation */
+JsonSemAction nullSemAction =
+{
+ NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL
+};
+
+/* Recursive Descent parser support routines */
+
+/*
+ * lex_peek
+ *
+ * what is the current look_ahead token?
+*/
+static inline JsonTokenType
+lex_peek(JsonLexContext *lex)
+{
+ return lex->token_type;
+}
+
+/*
+ * lex_expect
+ *
+ * move the lexer to the next token if the current look_ahead token matches
+ * the parameter token. Otherwise, report an error.
+ */
+static inline JsonParseErrorType
+lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
+{
+ if (lex_peek(lex) == token)
+ return json_lex(lex);
+ else
+ return report_parse_error(ctx, lex);
+}
+
+/* chars to consider as part of an alphanumeric token */
+#define JSON_ALPHANUMERIC_CHAR(c) \
+ (((c) >= 'a' && (c) <= 'z') || \
+ ((c) >= 'A' && (c) <= 'Z') || \
+ ((c) >= '0' && (c) <= '9') || \
+ (c) == '_' || \
+ IS_HIGHBIT_SET(c))
+
+/*
+ * Utility function to check if a string is a valid JSON number.
+ *
+ * str is of length len, and need not be null-terminated.
+ */
+bool
+IsValidJsonNumber(const char *str, int len)
+{
+ bool numeric_error;
+ int total_len;
+ JsonLexContext dummy_lex;
+
+ if (len <= 0)
+ return false;
+
+ /*
+ * json_lex_number expects a leading '-' to have been eaten already.
+ *
+ * having to cast away the constness of str is ugly, but there's not much
+ * easy alternative.
+ */
+ if (*str == '-')
+ {
+ dummy_lex.input = unconstify(char *, str) + 1;
+ dummy_lex.input_length = len - 1;
+ }
+ else
+ {
+ dummy_lex.input = unconstify(char *, str);
+ dummy_lex.input_length = len;
+ }
+
+ json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
+
+ return (!numeric_error) && (total_len == dummy_lex.input_length);
+}
+
+/*
+ * makeJsonLexContextCstringLen
+ *
+ * lex constructor, with or without StringInfo object for de-escaped lexemes.
+ *
+ * Without is better as it makes the processing faster, so only make one
+ * if really required.
+ */
+JsonLexContext *
+makeJsonLexContextCstringLen(char *json, int len, int encoding, bool need_escapes)
+{
+ JsonLexContext *lex = palloc0(sizeof(JsonLexContext));
+
+ lex->input = lex->token_terminator = lex->line_start = json;
+ lex->line_number = 1;
+ lex->input_length = len;
+ lex->input_encoding = encoding;
+ if (need_escapes)
+ lex->strval = makeStringInfo();
+ return lex;
+}
+
+/*
+ * pg_parse_json
+ *
+ * Publicly visible entry point for the JSON parser.
+ *
+ * lex is a lexing context, set up for the json to be processed by calling
+ * makeJsonLexContext(). sem is a structure of function pointers to semantic
+ * action routines to be called at appropriate spots during parsing, and a
+ * pointer to a state object to be passed to those routines.
+ */
+JsonParseErrorType
+pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
+{
+ JsonTokenType tok;
+ JsonParseErrorType result;
+
+ /* get the initial token */
+ result = json_lex(lex);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ tok = lex_peek(lex);
+
+ /* parse by recursive descent */
+ switch (tok)
+ {
+ case JSON_TOKEN_OBJECT_START:
+ result = parse_object(lex, sem);
+ break;
+ case JSON_TOKEN_ARRAY_START:
+ result = parse_array(lex, sem);
+ break;
+ default:
+ result = parse_scalar(lex, sem); /* json can be a bare scalar */
+ }
+
+ if (result == JSON_SUCCESS)
+ result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
+
+ return result;
+}
+
+/*
+ * json_count_array_elements
+ *
+ * Returns number of array elements in lex context at start of array token
+ * until end of array token at same nesting level.
+ *
+ * Designed to be called from array_start routines.
+ */
+JsonParseErrorType
+json_count_array_elements(JsonLexContext *lex, int *elements)
+{
+ JsonLexContext copylex;
+ int count;
+ JsonParseErrorType result;
+
+ /*
+ * It's safe to do this with a shallow copy because the lexical routines
+ * don't scribble on the input. They do scribble on the other pointers
+ * etc, so doing this with a copy makes that safe.
+ */
+ memcpy(&copylex, lex, sizeof(JsonLexContext));
+ copylex.strval = NULL; /* not interested in values here */
+ copylex.lex_level++;
+
+ count = 0;
+ result = lex_expect(JSON_PARSE_ARRAY_START, &copylex,
+ JSON_TOKEN_ARRAY_START);
+ if (result != JSON_SUCCESS)
+ return result;
+ if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
+ {
+ while (1)
+ {
+ count++;
+ result = parse_array_element(&copylex, &nullSemAction);
+ if (result != JSON_SUCCESS)
+ return result;
+ if (copylex.token_type != JSON_TOKEN_COMMA)
+ break;
+ result = json_lex(&copylex);
+ if (result != JSON_SUCCESS)
+ return result;
+ }
+ }
+ result = lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex,
+ JSON_TOKEN_ARRAY_END);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ *elements = count;
+ return JSON_SUCCESS;
+}
+
+/*
+ * Recursive Descent parse routines. There is one for each structural
+ * element in a json document:
+ * - scalar (string, number, true, false, null)
+ * - array ( [ ] )
+ * - array element
+ * - object ( { } )
+ * - object field
+ */
+static inline JsonParseErrorType
+parse_scalar(JsonLexContext *lex, JsonSemAction *sem)
+{
+ char *val = NULL;
+ json_scalar_action sfunc = sem->scalar;
+ JsonTokenType tok = lex_peek(lex);
+ JsonParseErrorType result;
+
+ /* a scalar must be a string, a number, true, false, or null */
+ if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
+ tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
+ tok != JSON_TOKEN_NULL)
+ return report_parse_error(JSON_PARSE_VALUE, lex);
+
+ /* if no semantic function, just consume the token */
+ if (sfunc == NULL)
+ return json_lex(lex);
+
+ /* extract the de-escaped string value, or the raw lexeme */
+ if (lex_peek(lex) == JSON_TOKEN_STRING)
+ {
+ if (lex->strval != NULL)
+ val = pstrdup(lex->strval->data);
+ }
+ else
+ {
+ int len = (lex->token_terminator - lex->token_start);
+
+ val = palloc(len + 1);
+ memcpy(val, lex->token_start, len);
+ val[len] = '\0';
+ }
+
+ /* consume the token */
+ result = json_lex(lex);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ /* invoke the callback */
+ (*sfunc) (sem->semstate, val, tok);
+
+ return JSON_SUCCESS;
+}
+
+static JsonParseErrorType
+parse_object_field(JsonLexContext *lex, JsonSemAction *sem)
+{
+ /*
+ * An object field is "fieldname" : value where value can be a scalar,
+ * object or array. Note: in user-facing docs and error messages, we
+ * generally call a field name a "key".
+ */
+
+ char *fname = NULL; /* keep compiler quiet */
+ json_ofield_action ostart = sem->object_field_start;
+ json_ofield_action oend = sem->object_field_end;
+ bool isnull;
+ JsonTokenType tok;
+ JsonParseErrorType result;
+
+ if (lex_peek(lex) != JSON_TOKEN_STRING)
+ return report_parse_error(JSON_PARSE_STRING, lex);
+ if ((ostart != NULL || oend != NULL) && lex->strval != NULL)
+ fname = pstrdup(lex->strval->data);
+ result = json_lex(lex);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ tok = lex_peek(lex);
+ isnull = tok == JSON_TOKEN_NULL;
+
+ if (ostart != NULL)
+ (*ostart) (sem->semstate, fname, isnull);
+
+ switch (tok)
+ {
+ case JSON_TOKEN_OBJECT_START:
+ result = parse_object(lex, sem);
+ break;
+ case JSON_TOKEN_ARRAY_START:
+ result = parse_array(lex, sem);
+ break;
+ default:
+ result = parse_scalar(lex, sem);
+ }
+ if (result != JSON_SUCCESS)
+ return result;
+
+ if (oend != NULL)
+ (*oend) (sem->semstate, fname, isnull);
+ return JSON_SUCCESS;
+}
+
+static JsonParseErrorType
+parse_object(JsonLexContext *lex, JsonSemAction *sem)
+{
+ /*
+ * an object is a possibly empty sequence of object fields, separated by
+ * commas and surrounded by curly braces.
+ */
+ json_struct_action ostart = sem->object_start;
+ json_struct_action oend = sem->object_end;
+ JsonTokenType tok;
+ JsonParseErrorType result;
+
+ check_stack_depth();
+
+ if (ostart != NULL)
+ (*ostart) (sem->semstate);
+
+ /*
+ * Data inside an object is at a higher nesting level than the object
+ * itself. Note that we increment this after we call the semantic routine
+ * for the object start and restore it before we call the routine for the
+ * object end.
+ */
+ lex->lex_level++;
+
+ Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START);
+ result = json_lex(lex);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ tok = lex_peek(lex);
+ switch (tok)
+ {
+ case JSON_TOKEN_STRING:
+ result = parse_object_field(lex, sem);
+ while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
+ {
+ result = json_lex(lex);
+ if (result != JSON_SUCCESS)
+ break;
+ result = parse_object_field(lex, sem);
+ }
+ break;
+ case JSON_TOKEN_OBJECT_END:
+ break;
+ default:
+ /* case of an invalid initial token inside the object */
+ result = report_parse_error(JSON_PARSE_OBJECT_START, lex);
+ }
+ if (result != JSON_SUCCESS)
+ return result;
+
+ result = lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ lex->lex_level--;
+
+ if (oend != NULL)
+ (*oend) (sem->semstate);
+
+ return JSON_SUCCESS;
+}
+
+static JsonParseErrorType
+parse_array_element(JsonLexContext *lex, JsonSemAction *sem)
+{
+ json_aelem_action astart = sem->array_element_start;
+ json_aelem_action aend = sem->array_element_end;
+ JsonTokenType tok = lex_peek(lex);
+ JsonParseErrorType result;
+
+ bool isnull;
+
+ isnull = tok == JSON_TOKEN_NULL;
+
+ if (astart != NULL)
+ (*astart) (sem->semstate, isnull);
+
+ /* an array element is any object, array or scalar */
+ switch (tok)
+ {
+ case JSON_TOKEN_OBJECT_START:
+ result = parse_object(lex, sem);
+ break;
+ case JSON_TOKEN_ARRAY_START:
+ result = parse_array(lex, sem);
+ break;
+ default:
+ result = parse_scalar(lex, sem);
+ }
+
+ if (result != JSON_SUCCESS)
+ return result;
+
+ if (aend != NULL)
+ (*aend) (sem->semstate, isnull);
+
+ return JSON_SUCCESS;
+}
+
+static JsonParseErrorType
+parse_array(JsonLexContext *lex, JsonSemAction *sem)
+{
+ /*
+ * an array is a possibly empty sequence of array elements, separated by
+ * commas and surrounded by square brackets.
+ */
+ json_struct_action astart = sem->array_start;
+ json_struct_action aend = sem->array_end;
+ JsonParseErrorType result;
+
+ check_stack_depth();
+
+ if (astart != NULL)
+ (*astart) (sem->semstate);
+
+ /*
+ * Data inside an array is at a higher nesting level than the array
+ * itself. Note that we increment this after we call the semantic routine
+ * for the array start and restore it before we call the routine for the
+ * array end.
+ */
+ lex->lex_level++;
+
+ result = lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
+ if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
+ {
+ result = parse_array_element(lex, sem);
+
+ while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
+ {
+ result = json_lex(lex);
+ if (result != JSON_SUCCESS)
+ break;
+ result = parse_array_element(lex, sem);
+ }
+ }
+ if (result != JSON_SUCCESS)
+ return result;
+
+ result = lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ lex->lex_level--;
+
+ if (aend != NULL)
+ (*aend) (sem->semstate);
+
+ return JSON_SUCCESS;
+}
+
+/*
+ * Lex one token from the input stream.
+ */
+JsonParseErrorType
+json_lex(JsonLexContext *lex)
+{
+ char *s;
+ int len;
+ JsonParseErrorType result;
+
+ /* Skip leading whitespace. */
+ s = lex->token_terminator;
+ len = s - lex->input;
+ while (len < lex->input_length &&
+ (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
+ {
+ if (*s == '\n')
+ ++lex->line_number;
+ ++s;
+ ++len;
+ }
+ lex->token_start = s;
+
+ /* Determine token type. */
+ if (len >= lex->input_length)
+ {
+ lex->token_start = NULL;
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s;
+ lex->token_type = JSON_TOKEN_END;
+ }
+ else
+ {
+ switch (*s)
+ {
+ /* Single-character token, some kind of punctuation mark. */
+ case '{':
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ lex->token_type = JSON_TOKEN_OBJECT_START;
+ break;
+ case '}':
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ lex->token_type = JSON_TOKEN_OBJECT_END;
+ break;
+ case '[':
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ lex->token_type = JSON_TOKEN_ARRAY_START;
+ break;
+ case ']':
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ lex->token_type = JSON_TOKEN_ARRAY_END;
+ break;
+ case ',':
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ lex->token_type = JSON_TOKEN_COMMA;
+ break;
+ case ':':
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ lex->token_type = JSON_TOKEN_COLON;
+ break;
+ case '"':
+ /* string */
+ result = json_lex_string(lex);
+ if (result != JSON_SUCCESS)
+ return result;
+ lex->token_type = JSON_TOKEN_STRING;
+ break;
+ case '-':
+ /* Negative number. */
+ result = json_lex_number(lex, s + 1, NULL, NULL);
+ if (result != JSON_SUCCESS)
+ return result;
+ lex->token_type = JSON_TOKEN_NUMBER;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ /* Positive number. */
+ result = json_lex_number(lex, s, NULL, NULL);
+ if (result != JSON_SUCCESS)
+ return result;
+ lex->token_type = JSON_TOKEN_NUMBER;
+ break;
+ default:
+ {
+ char *p;
+
+ /*
+ * We're not dealing with a string, number, legal
+ * punctuation mark, or end of string. The only legal
+ * tokens we might find here are true, false, and null,
+ * but for error reporting purposes we scan until we see a
+ * non-alphanumeric character. That way, we can report
+ * the whole word as an unexpected token, rather than just
+ * some unintuitive prefix thereof.
+ */
+ for (p = s; p - s < lex->input_length - len && JSON_ALPHANUMERIC_CHAR(*p); p++)
+ /* skip */ ;
+
+ /*
+ * We got some sort of unexpected punctuation or an
+ * otherwise unexpected character, so just complain about
+ * that one character.
+ */
+ if (p == s)
+ {
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ return JSON_INVALID_TOKEN;
+ }
+
+ /*
+ * We've got a real alphanumeric token here. If it
+ * happens to be true, false, or null, all is well. If
+ * not, error out.
+ */
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = p;
+ if (p - s == 4)
+ {
+ if (memcmp(s, "true", 4) == 0)
+ lex->token_type = JSON_TOKEN_TRUE;
+ else if (memcmp(s, "null", 4) == 0)
+ lex->token_type = JSON_TOKEN_NULL;
+ else
+ return JSON_INVALID_TOKEN;
+ }
+ else if (p - s == 5 && memcmp(s, "false", 5) == 0)
+ lex->token_type = JSON_TOKEN_FALSE;
+ else
+ return JSON_INVALID_TOKEN;
+
+ }
+ } /* end of switch */
+ }
+
+ return JSON_SUCCESS;
+}
+
+/*
+ * The next token in the input stream is known to be a string; lex it.
+ */
+static inline JsonParseErrorType
+json_lex_string(JsonLexContext *lex)
+{
+ char *s;
+ int len;
+ int hi_surrogate = -1;
+
+ if (lex->strval != NULL)
+ resetStringInfo(lex->strval);
+
+ Assert(lex->input_length > 0);
+ s = lex->token_start;
+ len = lex->token_start - lex->input;
+ for (;;)
+ {
+ s++;
+ len++;
+ /* Premature end of the string. */
+ if (len >= lex->input_length)
+ {
+ lex->token_terminator = s;
+ return JSON_INVALID_TOKEN;
+ }
+ else if (*s == '"')
+ break;
+ else if ((unsigned char) *s < 32)
+ {
+ /* Per RFC4627, these characters MUST be escaped. */
+ /* Since *s isn't printable, exclude it from the context string */
+ lex->token_terminator = s;
+ return JSON_ESCAPING_REQUIRED;
+ }
+ else if (*s == '\\')
+ {
+ /* OK, we have an escape character. */
+ s++;
+ len++;
+ if (len >= lex->input_length)
+ {
+ lex->token_terminator = s;
+ return JSON_INVALID_TOKEN;
+ }
+ else if (*s == 'u')
+ {
+ int i;
+ int ch = 0;
+
+ for (i = 1; i <= 4; i++)
+ {
+ s++;
+ len++;
+ if (len >= lex->input_length)
+ {
+ lex->token_terminator = s;
+ return JSON_INVALID_TOKEN;
+ }
+ else if (*s >= '0' && *s <= '9')
+ ch = (ch * 16) + (*s - '0');
+ else if (*s >= 'a' && *s <= 'f')
+ ch = (ch * 16) + (*s - 'a') + 10;
+ else if (*s >= 'A' && *s <= 'F')
+ ch = (ch * 16) + (*s - 'A') + 10;
+ else
+ {
+ lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s);
+ return JSON_UNICODE_ESCAPE_FORMAT;
+ }
+ }
+ if (lex->strval != NULL)
+ {
+ /*
+ * Combine surrogate pairs.
+ */
+ if (is_utf16_surrogate_first(ch))
+ {
+ if (hi_surrogate != -1)
+ return JSON_UNICODE_HIGH_SURROGATE;
+ hi_surrogate = ch;
+ continue;
+ }
+ else if (is_utf16_surrogate_second(ch))
+ {
+ if (hi_surrogate == -1)
+ return JSON_UNICODE_LOW_SURROGATE;
+ ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
+ hi_surrogate = -1;
+ }
+
+ if (hi_surrogate != -1)
+ return JSON_UNICODE_LOW_SURROGATE;
+
+ /*
+ * Reject invalid cases. We can't have a value above
+ * 0xFFFF here (since we only accepted 4 hex digits
+ * above), so no need to test for out-of-range chars.
+ */
+ if (ch == 0)
+ {
+ /* We can't allow this, since our TEXT type doesn't */
+ return JSON_UNICODE_CODE_POINT_ZERO;
+ }
+
+ /*
+ * Add the represented character to lex->strval. In the
+ * backend, we can let pg_unicode_to_server() handle any
+ * required character set conversion; in frontend, we can
+ * only deal with trivial conversions.
+ *
+ * Note: pg_unicode_to_server() will throw an error for a
+ * conversion failure, rather than returning a failure
+ * indication. That seems OK.
+ */
+#ifndef FRONTEND
+ {
+ char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
+
+ pg_unicode_to_server(ch, (unsigned char *) cbuf);
+ appendStringInfoString(lex->strval, cbuf);
+ }
+#else
+ if (lex->input_encoding == PG_UTF8)
+ {
+ /* OK, we can map the code point to UTF8 easily */
+ char utf8str[5];
+ int utf8len;
+
+ unicode_to_utf8(ch, (unsigned char *) utf8str);
+ utf8len = pg_utf_mblen((unsigned char *) utf8str);
+ appendBinaryStringInfo(lex->strval, utf8str, utf8len);
+ }
+ else if (ch <= 0x007f)
+ {
+ /* The ASCII range is the same in all encodings */
+ appendStringInfoChar(lex->strval, (char) ch);
+ }
+ else
+ return JSON_UNICODE_HIGH_ESCAPE;
+#endif /* FRONTEND */
+ }
+ }
+ else if (lex->strval != NULL)
+ {
+ if (hi_surrogate != -1)
+ return JSON_UNICODE_LOW_SURROGATE;
+
+ switch (*s)
+ {
+ case '"':
+ case '\\':
+ case '/':
+ appendStringInfoChar(lex->strval, *s);
+ break;
+ case 'b':
+ appendStringInfoChar(lex->strval, '\b');
+ break;
+ case 'f':
+ appendStringInfoChar(lex->strval, '\f');
+ break;
+ case 'n':
+ appendStringInfoChar(lex->strval, '\n');
+ break;
+ case 'r':
+ appendStringInfoChar(lex->strval, '\r');
+ break;
+ case 't':
+ appendStringInfoChar(lex->strval, '\t');
+ break;
+ default:
+ /* Not a valid string escape, so signal error. */
+ lex->token_start = s;
+ lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s);
+ return JSON_ESCAPING_INVALID;
+ }
+ }
+ else if (strchr("\"\\/bfnrt", *s) == NULL)
+ {
+ /*
+ * Simpler processing if we're not bothered about de-escaping
+ *
+ * It's very tempting to remove the strchr() call here and
+ * replace it with a switch statement, but testing so far has
+ * shown it's not a performance win.
+ */
+ lex->token_start = s;
+ lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s);
+ return JSON_ESCAPING_INVALID;
+ }
+
+ }
+ else if (lex->strval != NULL)
+ {
+ if (hi_surrogate != -1)
+ return JSON_UNICODE_LOW_SURROGATE;
+
+ appendStringInfoChar(lex->strval, *s);
+ }
+
+ }
+
+ if (hi_surrogate != -1)
+ return JSON_UNICODE_LOW_SURROGATE;
+
+ /* Hooray, we found the end of the string! */
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ return JSON_SUCCESS;
+}
+
+/*
+ * The next token in the input stream is known to be a number; lex it.
+ *
+ * In JSON, a number consists of four parts:
+ *
+ * (1) An optional minus sign ('-').
+ *
+ * (2) Either a single '0', or a string of one or more digits that does not
+ * begin with a '0'.
+ *
+ * (3) An optional decimal part, consisting of a period ('.') followed by
+ * one or more digits. (Note: While this part can be omitted
+ * completely, it's not OK to have only the decimal point without
+ * any digits afterwards.)
+ *
+ * (4) An optional exponent part, consisting of 'e' or 'E', optionally
+ * followed by '+' or '-', followed by one or more digits. (Note:
+ * As with the decimal part, if 'e' or 'E' is present, it must be
+ * followed by at least one digit.)
+ *
+ * The 's' argument to this function points to the ostensible beginning
+ * of part 2 - i.e. the character after any optional minus sign, or the
+ * first character of the string if there is none.
+ *
+ * If num_err is not NULL, we return an error flag to *num_err rather than
+ * raising an error for a badly-formed number. Also, if total_len is not NULL
+ * the distance from lex->input to the token end+1 is returned to *total_len.
+ */
+static inline JsonParseErrorType
+json_lex_number(JsonLexContext *lex, char *s,
+ bool *num_err, int *total_len)
+{
+ bool error = false;
+ int len = s - lex->input;
+
+ /* Part (1): leading sign indicator. */
+ /* Caller already did this for us; so do nothing. */
+
+ /* Part (2): parse main digit string. */
+ if (len < lex->input_length && *s == '0')
+ {
+ s++;
+ len++;
+ }
+ else if (len < lex->input_length && *s >= '1' && *s <= '9')
+ {
+ do
+ {
+ s++;
+ len++;
+ } while (len < lex->input_length && *s >= '0' && *s <= '9');
+ }
+ else
+ error = true;
+
+ /* Part (3): parse optional decimal portion. */
+ if (len < lex->input_length && *s == '.')
+ {
+ s++;
+ len++;
+ if (len == lex->input_length || *s < '0' || *s > '9')
+ error = true;
+ else
+ {
+ do
+ {
+ s++;
+ len++;
+ } while (len < lex->input_length && *s >= '0' && *s <= '9');
+ }
+ }
+
+ /* Part (4): parse optional exponent. */
+ if (len < lex->input_length && (*s == 'e' || *s == 'E'))
+ {
+ s++;
+ len++;
+ if (len < lex->input_length && (*s == '+' || *s == '-'))
+ {
+ s++;
+ len++;
+ }
+ if (len == lex->input_length || *s < '0' || *s > '9')
+ error = true;
+ else
+ {
+ do
+ {
+ s++;
+ len++;
+ } while (len < lex->input_length && *s >= '0' && *s <= '9');
+ }
+ }
+
+ /*
+ * Check for trailing garbage. As in json_lex(), any alphanumeric stuff
+ * here should be considered part of the token for error-reporting
+ * purposes.
+ */
+ for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
+ error = true;
+
+ if (total_len != NULL)
+ *total_len = len;
+
+ if (num_err != NULL)
+ {
+ /* let the caller handle any error */
+ *num_err = error;
+ }
+ else
+ {
+ /* return token endpoint */
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s;
+ /* handle error if any */
+ if (error)
+ return JSON_INVALID_TOKEN;
+ }
+
+ return JSON_SUCCESS;
+}
+
+/*
+ * Report a parse error.
+ *
+ * lex->token_start and lex->token_terminator must identify the current token.
+ */
+static JsonParseErrorType
+report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
+{
+ /* Handle case where the input ended prematurely. */
+ if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
+ return JSON_EXPECTED_MORE;
+
+ /* Otherwise choose the error type based on the parsing context. */
+ switch (ctx)
+ {
+ case JSON_PARSE_END:
+ return JSON_EXPECTED_END;
+ case JSON_PARSE_VALUE:
+ return JSON_EXPECTED_JSON;
+ case JSON_PARSE_STRING:
+ return JSON_EXPECTED_STRING;
+ case JSON_PARSE_ARRAY_START:
+ return JSON_EXPECTED_ARRAY_FIRST;
+ case JSON_PARSE_ARRAY_NEXT:
+ return JSON_EXPECTED_ARRAY_NEXT;
+ case JSON_PARSE_OBJECT_START:
+ return JSON_EXPECTED_OBJECT_FIRST;
+ case JSON_PARSE_OBJECT_LABEL:
+ return JSON_EXPECTED_COLON;
+ case JSON_PARSE_OBJECT_NEXT:
+ return JSON_EXPECTED_OBJECT_NEXT;
+ case JSON_PARSE_OBJECT_COMMA:
+ return JSON_EXPECTED_STRING;
+ }
+
+ /*
+ * We don't use a default: case, so that the compiler will warn about
+ * unhandled enum values. But this needs to be here anyway to cover the
+ * possibility of an incorrect input.
+ */
+ json_log_and_abort("unexpected json parse state: %d", (int) ctx);
+ return JSON_SUCCESS; /* silence stupider compilers */
+}
+
+/*
+ * Construct a detail message for a JSON error.
+ */
+char *
+json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
+{
+ switch (error)
+ {
+ case JSON_SUCCESS:
+ /* fall through to the error code after switch */
+ break;
+ case JSON_ESCAPING_INVALID:
+ return psprintf(_("Escape sequence \"\\%s\" is invalid."),
+ extract_token(lex));
+ case JSON_ESCAPING_REQUIRED:
+ return psprintf(_("Character with value 0x%02x must be escaped."),
+ (unsigned char) *(lex->token_terminator));
+ case JSON_EXPECTED_END:
+ return psprintf(_("Expected end of input, but found \"%s\"."),
+ extract_token(lex));
+ case JSON_EXPECTED_ARRAY_FIRST:
+ return psprintf(_("Expected array element or \"]\", but found \"%s\"."),
+ extract_token(lex));
+ case JSON_EXPECTED_ARRAY_NEXT:
+ return psprintf(_("Expected \",\" or \"]\", but found \"%s\"."),
+ extract_token(lex));
+ case JSON_EXPECTED_COLON:
+ return psprintf(_("Expected \":\", but found \"%s\"."),
+ extract_token(lex));
+ case JSON_EXPECTED_JSON:
+ return psprintf(_("Expected JSON value, but found \"%s\"."),
+ extract_token(lex));
+ case JSON_EXPECTED_MORE:
+ return _("The input string ended unexpectedly.");
+ case JSON_EXPECTED_OBJECT_FIRST:
+ return psprintf(_("Expected string or \"}\", but found \"%s\"."),
+ extract_token(lex));
+ case JSON_EXPECTED_OBJECT_NEXT:
+ return psprintf(_("Expected \",\" or \"}\", but found \"%s\"."),
+ extract_token(lex));
+ case JSON_EXPECTED_STRING:
+ return psprintf(_("Expected string, but found \"%s\"."),
+ extract_token(lex));
+ case JSON_INVALID_TOKEN:
+ return psprintf(_("Token \"%s\" is invalid."),
+ extract_token(lex));
+ case JSON_UNICODE_CODE_POINT_ZERO:
+ return _("\\u0000 cannot be converted to text.");
+ case JSON_UNICODE_ESCAPE_FORMAT:
+ return _("\"\\u\" must be followed by four hexadecimal digits.");
+ case JSON_UNICODE_HIGH_ESCAPE:
+ /* note: this case is only reachable in frontend not backend */
+ return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
+ case JSON_UNICODE_HIGH_SURROGATE:
+ return _("Unicode high surrogate must not follow a high surrogate.");
+ case JSON_UNICODE_LOW_SURROGATE:
+ return _("Unicode low surrogate must follow a high surrogate.");
+ }
+
+ /*
+ * We don't use a default: case, so that the compiler will warn about
+ * unhandled enum values. But this needs to be here anyway to cover the
+ * possibility of an incorrect input.
+ */
+ json_log_and_abort("unexpected json parse error type: %d", (int) error);
+ return NULL; /* silence stupider compilers */
+}
+
+/*
+ * Extract the current token from a lexing context, for error reporting.
+ */
+static char *
+extract_token(JsonLexContext *lex)
+{
+ int toklen = lex->token_terminator - lex->token_start;
+ char *token = palloc(toklen + 1);
+
+ memcpy(token, lex->token_start, toklen);
+ token[toklen] = '\0';
+ return token;
+}
diff --git a/src/common/keywords.c b/src/common/keywords.c
new file mode 100644
index 0000000..54ed977
--- /dev/null
+++ b/src/common/keywords.c
@@ -0,0 +1,33 @@
+/*-------------------------------------------------------------------------
+ *
+ * keywords.c
+ * PostgreSQL's list of SQL keywords
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/keywords.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "common/keywords.h"
+
+
+/* ScanKeywordList lookup data for SQL keywords */
+
+#include "kwlist_d.h"
+
+/* Keyword categories for SQL keywords */
+
+#define PG_KEYWORD(kwname, value, category) category,
+
+const uint8 ScanKeywordCategories[SCANKEYWORDS_NUM_KEYWORDS] = {
+#include "parser/kwlist.h"
+};
+
+#undef PG_KEYWORD
diff --git a/src/common/kwlist_d.h b/src/common/kwlist_d.h
new file mode 100644
index 0000000..d536e47
--- /dev/null
+++ b/src/common/kwlist_d.h
@@ -0,0 +1,1072 @@
+/*-------------------------------------------------------------------------
+ *
+ * kwlist_d.h
+ * List of keywords represented as a ScanKeywordList.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES
+ * ******************************
+ * *** DO NOT EDIT THIS FILE! ***
+ * ******************************
+ *
+ * It has been GENERATED by src/tools/gen_keywordlist.pl
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef KWLIST_D_H
+#define KWLIST_D_H
+
+#include "common/kwlookup.h"
+
+static const char ScanKeywords_kw_string[] =
+ "abort\0"
+ "absolute\0"
+ "access\0"
+ "action\0"
+ "add\0"
+ "admin\0"
+ "after\0"
+ "aggregate\0"
+ "all\0"
+ "also\0"
+ "alter\0"
+ "always\0"
+ "analyse\0"
+ "analyze\0"
+ "and\0"
+ "any\0"
+ "array\0"
+ "as\0"
+ "asc\0"
+ "assertion\0"
+ "assignment\0"
+ "asymmetric\0"
+ "at\0"
+ "attach\0"
+ "attribute\0"
+ "authorization\0"
+ "backward\0"
+ "before\0"
+ "begin\0"
+ "between\0"
+ "bigint\0"
+ "binary\0"
+ "bit\0"
+ "boolean\0"
+ "both\0"
+ "by\0"
+ "cache\0"
+ "call\0"
+ "called\0"
+ "cascade\0"
+ "cascaded\0"
+ "case\0"
+ "cast\0"
+ "catalog\0"
+ "chain\0"
+ "char\0"
+ "character\0"
+ "characteristics\0"
+ "check\0"
+ "checkpoint\0"
+ "class\0"
+ "close\0"
+ "cluster\0"
+ "coalesce\0"
+ "collate\0"
+ "collation\0"
+ "column\0"
+ "columns\0"
+ "comment\0"
+ "comments\0"
+ "commit\0"
+ "committed\0"
+ "concurrently\0"
+ "configuration\0"
+ "conflict\0"
+ "connection\0"
+ "constraint\0"
+ "constraints\0"
+ "content\0"
+ "continue\0"
+ "conversion\0"
+ "copy\0"
+ "cost\0"
+ "create\0"
+ "cross\0"
+ "csv\0"
+ "cube\0"
+ "current\0"
+ "current_catalog\0"
+ "current_date\0"
+ "current_role\0"
+ "current_schema\0"
+ "current_time\0"
+ "current_timestamp\0"
+ "current_user\0"
+ "cursor\0"
+ "cycle\0"
+ "data\0"
+ "database\0"
+ "day\0"
+ "deallocate\0"
+ "dec\0"
+ "decimal\0"
+ "declare\0"
+ "default\0"
+ "defaults\0"
+ "deferrable\0"
+ "deferred\0"
+ "definer\0"
+ "delete\0"
+ "delimiter\0"
+ "delimiters\0"
+ "depends\0"
+ "desc\0"
+ "detach\0"
+ "dictionary\0"
+ "disable\0"
+ "discard\0"
+ "distinct\0"
+ "do\0"
+ "document\0"
+ "domain\0"
+ "double\0"
+ "drop\0"
+ "each\0"
+ "else\0"
+ "enable\0"
+ "encoding\0"
+ "encrypted\0"
+ "end\0"
+ "enum\0"
+ "escape\0"
+ "event\0"
+ "except\0"
+ "exclude\0"
+ "excluding\0"
+ "exclusive\0"
+ "execute\0"
+ "exists\0"
+ "explain\0"
+ "expression\0"
+ "extension\0"
+ "external\0"
+ "extract\0"
+ "false\0"
+ "family\0"
+ "fetch\0"
+ "filter\0"
+ "first\0"
+ "float\0"
+ "following\0"
+ "for\0"
+ "force\0"
+ "foreign\0"
+ "forward\0"
+ "freeze\0"
+ "from\0"
+ "full\0"
+ "function\0"
+ "functions\0"
+ "generated\0"
+ "global\0"
+ "grant\0"
+ "granted\0"
+ "greatest\0"
+ "group\0"
+ "grouping\0"
+ "groups\0"
+ "handler\0"
+ "having\0"
+ "header\0"
+ "hold\0"
+ "hour\0"
+ "identity\0"
+ "if\0"
+ "ilike\0"
+ "immediate\0"
+ "immutable\0"
+ "implicit\0"
+ "import\0"
+ "in\0"
+ "include\0"
+ "including\0"
+ "increment\0"
+ "index\0"
+ "indexes\0"
+ "inherit\0"
+ "inherits\0"
+ "initially\0"
+ "inline\0"
+ "inner\0"
+ "inout\0"
+ "input\0"
+ "insensitive\0"
+ "insert\0"
+ "instead\0"
+ "int\0"
+ "integer\0"
+ "intersect\0"
+ "interval\0"
+ "into\0"
+ "invoker\0"
+ "is\0"
+ "isnull\0"
+ "isolation\0"
+ "join\0"
+ "key\0"
+ "label\0"
+ "language\0"
+ "large\0"
+ "last\0"
+ "lateral\0"
+ "leading\0"
+ "leakproof\0"
+ "least\0"
+ "left\0"
+ "level\0"
+ "like\0"
+ "limit\0"
+ "listen\0"
+ "load\0"
+ "local\0"
+ "localtime\0"
+ "localtimestamp\0"
+ "location\0"
+ "lock\0"
+ "locked\0"
+ "logged\0"
+ "mapping\0"
+ "match\0"
+ "materialized\0"
+ "maxvalue\0"
+ "method\0"
+ "minute\0"
+ "minvalue\0"
+ "mode\0"
+ "month\0"
+ "move\0"
+ "name\0"
+ "names\0"
+ "national\0"
+ "natural\0"
+ "nchar\0"
+ "new\0"
+ "next\0"
+ "nfc\0"
+ "nfd\0"
+ "nfkc\0"
+ "nfkd\0"
+ "no\0"
+ "none\0"
+ "normalize\0"
+ "normalized\0"
+ "not\0"
+ "nothing\0"
+ "notify\0"
+ "notnull\0"
+ "nowait\0"
+ "null\0"
+ "nullif\0"
+ "nulls\0"
+ "numeric\0"
+ "object\0"
+ "of\0"
+ "off\0"
+ "offset\0"
+ "oids\0"
+ "old\0"
+ "on\0"
+ "only\0"
+ "operator\0"
+ "option\0"
+ "options\0"
+ "or\0"
+ "order\0"
+ "ordinality\0"
+ "others\0"
+ "out\0"
+ "outer\0"
+ "over\0"
+ "overlaps\0"
+ "overlay\0"
+ "overriding\0"
+ "owned\0"
+ "owner\0"
+ "parallel\0"
+ "parser\0"
+ "partial\0"
+ "partition\0"
+ "passing\0"
+ "password\0"
+ "placing\0"
+ "plans\0"
+ "policy\0"
+ "position\0"
+ "preceding\0"
+ "precision\0"
+ "prepare\0"
+ "prepared\0"
+ "preserve\0"
+ "primary\0"
+ "prior\0"
+ "privileges\0"
+ "procedural\0"
+ "procedure\0"
+ "procedures\0"
+ "program\0"
+ "publication\0"
+ "quote\0"
+ "range\0"
+ "read\0"
+ "real\0"
+ "reassign\0"
+ "recheck\0"
+ "recursive\0"
+ "ref\0"
+ "references\0"
+ "referencing\0"
+ "refresh\0"
+ "reindex\0"
+ "relative\0"
+ "release\0"
+ "rename\0"
+ "repeatable\0"
+ "replace\0"
+ "replica\0"
+ "reset\0"
+ "restart\0"
+ "restrict\0"
+ "returning\0"
+ "returns\0"
+ "revoke\0"
+ "right\0"
+ "role\0"
+ "rollback\0"
+ "rollup\0"
+ "routine\0"
+ "routines\0"
+ "row\0"
+ "rows\0"
+ "rule\0"
+ "savepoint\0"
+ "schema\0"
+ "schemas\0"
+ "scroll\0"
+ "search\0"
+ "second\0"
+ "security\0"
+ "select\0"
+ "sequence\0"
+ "sequences\0"
+ "serializable\0"
+ "server\0"
+ "session\0"
+ "session_user\0"
+ "set\0"
+ "setof\0"
+ "sets\0"
+ "share\0"
+ "show\0"
+ "similar\0"
+ "simple\0"
+ "skip\0"
+ "smallint\0"
+ "snapshot\0"
+ "some\0"
+ "sql\0"
+ "stable\0"
+ "standalone\0"
+ "start\0"
+ "statement\0"
+ "statistics\0"
+ "stdin\0"
+ "stdout\0"
+ "storage\0"
+ "stored\0"
+ "strict\0"
+ "strip\0"
+ "subscription\0"
+ "substring\0"
+ "support\0"
+ "symmetric\0"
+ "sysid\0"
+ "system\0"
+ "table\0"
+ "tables\0"
+ "tablesample\0"
+ "tablespace\0"
+ "temp\0"
+ "template\0"
+ "temporary\0"
+ "text\0"
+ "then\0"
+ "ties\0"
+ "time\0"
+ "timestamp\0"
+ "to\0"
+ "trailing\0"
+ "transaction\0"
+ "transform\0"
+ "treat\0"
+ "trigger\0"
+ "trim\0"
+ "true\0"
+ "truncate\0"
+ "trusted\0"
+ "type\0"
+ "types\0"
+ "uescape\0"
+ "unbounded\0"
+ "uncommitted\0"
+ "unencrypted\0"
+ "union\0"
+ "unique\0"
+ "unknown\0"
+ "unlisten\0"
+ "unlogged\0"
+ "until\0"
+ "update\0"
+ "user\0"
+ "using\0"
+ "vacuum\0"
+ "valid\0"
+ "validate\0"
+ "validator\0"
+ "value\0"
+ "values\0"
+ "varchar\0"
+ "variadic\0"
+ "varying\0"
+ "verbose\0"
+ "version\0"
+ "view\0"
+ "views\0"
+ "volatile\0"
+ "when\0"
+ "where\0"
+ "whitespace\0"
+ "window\0"
+ "with\0"
+ "within\0"
+ "without\0"
+ "work\0"
+ "wrapper\0"
+ "write\0"
+ "xml\0"
+ "xmlattributes\0"
+ "xmlconcat\0"
+ "xmlelement\0"
+ "xmlexists\0"
+ "xmlforest\0"
+ "xmlnamespaces\0"
+ "xmlparse\0"
+ "xmlpi\0"
+ "xmlroot\0"
+ "xmlserialize\0"
+ "xmltable\0"
+ "year\0"
+ "yes\0"
+ "zone";
+
+static const uint16 ScanKeywords_kw_offsets[] = {
+ 0,
+ 6,
+ 15,
+ 22,
+ 29,
+ 33,
+ 39,
+ 45,
+ 55,
+ 59,
+ 64,
+ 70,
+ 77,
+ 85,
+ 93,
+ 97,
+ 101,
+ 107,
+ 110,
+ 114,
+ 124,
+ 135,
+ 146,
+ 149,
+ 156,
+ 166,
+ 180,
+ 189,
+ 196,
+ 202,
+ 210,
+ 217,
+ 224,
+ 228,
+ 236,
+ 241,
+ 244,
+ 250,
+ 255,
+ 262,
+ 270,
+ 279,
+ 284,
+ 289,
+ 297,
+ 303,
+ 308,
+ 318,
+ 334,
+ 340,
+ 351,
+ 357,
+ 363,
+ 371,
+ 380,
+ 388,
+ 398,
+ 405,
+ 413,
+ 421,
+ 430,
+ 437,
+ 447,
+ 460,
+ 474,
+ 483,
+ 494,
+ 505,
+ 517,
+ 525,
+ 534,
+ 545,
+ 550,
+ 555,
+ 562,
+ 568,
+ 572,
+ 577,
+ 585,
+ 601,
+ 614,
+ 627,
+ 642,
+ 655,
+ 673,
+ 686,
+ 693,
+ 699,
+ 704,
+ 713,
+ 717,
+ 728,
+ 732,
+ 740,
+ 748,
+ 756,
+ 765,
+ 776,
+ 785,
+ 793,
+ 800,
+ 810,
+ 821,
+ 829,
+ 834,
+ 841,
+ 852,
+ 860,
+ 868,
+ 877,
+ 880,
+ 889,
+ 896,
+ 903,
+ 908,
+ 913,
+ 918,
+ 925,
+ 934,
+ 944,
+ 948,
+ 953,
+ 960,
+ 966,
+ 973,
+ 981,
+ 991,
+ 1001,
+ 1009,
+ 1016,
+ 1024,
+ 1035,
+ 1045,
+ 1054,
+ 1062,
+ 1068,
+ 1075,
+ 1081,
+ 1088,
+ 1094,
+ 1100,
+ 1110,
+ 1114,
+ 1120,
+ 1128,
+ 1136,
+ 1143,
+ 1148,
+ 1153,
+ 1162,
+ 1172,
+ 1182,
+ 1189,
+ 1195,
+ 1203,
+ 1212,
+ 1218,
+ 1227,
+ 1234,
+ 1242,
+ 1249,
+ 1256,
+ 1261,
+ 1266,
+ 1275,
+ 1278,
+ 1284,
+ 1294,
+ 1304,
+ 1313,
+ 1320,
+ 1323,
+ 1331,
+ 1341,
+ 1351,
+ 1357,
+ 1365,
+ 1373,
+ 1382,
+ 1392,
+ 1399,
+ 1405,
+ 1411,
+ 1417,
+ 1429,
+ 1436,
+ 1444,
+ 1448,
+ 1456,
+ 1466,
+ 1475,
+ 1480,
+ 1488,
+ 1491,
+ 1498,
+ 1508,
+ 1513,
+ 1517,
+ 1523,
+ 1532,
+ 1538,
+ 1543,
+ 1551,
+ 1559,
+ 1569,
+ 1575,
+ 1580,
+ 1586,
+ 1591,
+ 1597,
+ 1604,
+ 1609,
+ 1615,
+ 1625,
+ 1640,
+ 1649,
+ 1654,
+ 1661,
+ 1668,
+ 1676,
+ 1682,
+ 1695,
+ 1704,
+ 1711,
+ 1718,
+ 1727,
+ 1732,
+ 1738,
+ 1743,
+ 1748,
+ 1754,
+ 1763,
+ 1771,
+ 1777,
+ 1781,
+ 1786,
+ 1790,
+ 1794,
+ 1799,
+ 1804,
+ 1807,
+ 1812,
+ 1822,
+ 1833,
+ 1837,
+ 1845,
+ 1852,
+ 1860,
+ 1867,
+ 1872,
+ 1879,
+ 1885,
+ 1893,
+ 1900,
+ 1903,
+ 1907,
+ 1914,
+ 1919,
+ 1923,
+ 1926,
+ 1931,
+ 1940,
+ 1947,
+ 1955,
+ 1958,
+ 1964,
+ 1975,
+ 1982,
+ 1986,
+ 1992,
+ 1997,
+ 2006,
+ 2014,
+ 2025,
+ 2031,
+ 2037,
+ 2046,
+ 2053,
+ 2061,
+ 2071,
+ 2079,
+ 2088,
+ 2096,
+ 2102,
+ 2109,
+ 2118,
+ 2128,
+ 2138,
+ 2146,
+ 2155,
+ 2164,
+ 2172,
+ 2178,
+ 2189,
+ 2200,
+ 2210,
+ 2221,
+ 2229,
+ 2241,
+ 2247,
+ 2253,
+ 2258,
+ 2263,
+ 2272,
+ 2280,
+ 2290,
+ 2294,
+ 2305,
+ 2317,
+ 2325,
+ 2333,
+ 2342,
+ 2350,
+ 2357,
+ 2368,
+ 2376,
+ 2384,
+ 2390,
+ 2398,
+ 2407,
+ 2417,
+ 2425,
+ 2432,
+ 2438,
+ 2443,
+ 2452,
+ 2459,
+ 2467,
+ 2476,
+ 2480,
+ 2485,
+ 2490,
+ 2500,
+ 2507,
+ 2515,
+ 2522,
+ 2529,
+ 2536,
+ 2545,
+ 2552,
+ 2561,
+ 2571,
+ 2584,
+ 2591,
+ 2599,
+ 2612,
+ 2616,
+ 2622,
+ 2627,
+ 2633,
+ 2638,
+ 2646,
+ 2653,
+ 2658,
+ 2667,
+ 2676,
+ 2681,
+ 2685,
+ 2692,
+ 2703,
+ 2709,
+ 2719,
+ 2730,
+ 2736,
+ 2743,
+ 2751,
+ 2758,
+ 2765,
+ 2771,
+ 2784,
+ 2794,
+ 2802,
+ 2812,
+ 2818,
+ 2825,
+ 2831,
+ 2838,
+ 2850,
+ 2861,
+ 2866,
+ 2875,
+ 2885,
+ 2890,
+ 2895,
+ 2900,
+ 2905,
+ 2915,
+ 2918,
+ 2927,
+ 2939,
+ 2949,
+ 2955,
+ 2963,
+ 2968,
+ 2973,
+ 2982,
+ 2990,
+ 2995,
+ 3001,
+ 3009,
+ 3019,
+ 3031,
+ 3043,
+ 3049,
+ 3056,
+ 3064,
+ 3073,
+ 3082,
+ 3088,
+ 3095,
+ 3100,
+ 3106,
+ 3113,
+ 3119,
+ 3128,
+ 3138,
+ 3144,
+ 3151,
+ 3159,
+ 3168,
+ 3176,
+ 3184,
+ 3192,
+ 3197,
+ 3203,
+ 3212,
+ 3217,
+ 3223,
+ 3234,
+ 3241,
+ 3246,
+ 3253,
+ 3261,
+ 3266,
+ 3274,
+ 3280,
+ 3284,
+ 3298,
+ 3308,
+ 3319,
+ 3329,
+ 3339,
+ 3353,
+ 3362,
+ 3368,
+ 3376,
+ 3389,
+ 3398,
+ 3403,
+ 3407,
+};
+
+#define SCANKEYWORDS_NUM_KEYWORDS 450
+
+static int
+ScanKeywords_hash_func(const void *key, size_t keylen)
+{
+ static const int16 h[901] = {
+ -172, 32767, 32767, 32767, 0, 56, 362, 32767,
+ 0, 90, -251, 332, 32767, 66, 298, 0,
+ 32767, 88, 27, 22, -125, 317, 0, 32767,
+ -35, 0, 32767, 32767, 32767, 52, 32767, 275,
+ 32767, -277, 32767, 32767, 316, 177, 32767, 0,
+ 398, 58, 272, -88, 0, -147, 242, 143,
+ 401, 0, 32767, 0, 293, 355, 0, 0,
+ 303, 32767, 225, 580, 0, 378, 0, 0,
+ 32767, -25, -312, 32767, -210, 0, 32767, -19,
+ 32767, 91, 307, 354, 32767, 32767, -18, 36,
+ 32767, 32767, 402, 292, 165, 274, 0, 32767,
+ 10, -20, 32767, 294, 32767, -622, 326, 32767,
+ -128, 0, 33, 32767, 32767, 325, 32767, 276,
+ 343, 32767, 96, 173, 32767, 32767, 32767, 0,
+ 32767, 320, -128, 32767, 357, 24, -9, 427,
+ 32767, 32767, 0, 32767, 0, 0, 32767, 32767,
+ 405, 32767, 125, 32767, 0, 32767, -40, 32767,
+ 0, 80, 0, 32767, 0, 95, -146, 0,
+ 32767, 26, 165, 32767, 41, 32767, 418, 32767,
+ 32767, 40, 32767, 147, 32767, 32767, 32767, 91,
+ 110, 9, 305, -340, 32767, 103, 32767, 0,
+ 329, 32767, 32767, 310, 32767, 32767, 147, -262,
+ 422, 393, 382, 136, 32767, 177, 0, 32767,
+ 32767, 102, 0, 32767, 83, 141, 28, 167,
+ -421, 32767, 38, 111, 0, 479, 32767, 32767,
+ 29, 439, 32767, 427, -6, 182, 32767, 74,
+ 32767, 360, 236, 32767, 32767, 327, -63, 162,
+ 154, 184, 90, 263, 115, 127, 539, 347,
+ -72, 32767, -159, 32767, -249, 152, 32767, 357,
+ 404, -110, 32767, 404, 345, 32767, 80, 352,
+ 422, 61, 32767, 32767, 32767, 243, 126, 117,
+ 32767, 0, 32767, 32767, 208, 32767, 32767, -20,
+ 430, -195, 32767, 32767, 32767, 118, -62, 32767,
+ 32767, 98, 136, 0, 0, 32767, 261, 32767,
+ 0, 32767, 230, 37, 750, 154, 32767, -86,
+ 87, 32767, 6, 61, 32767, 205, 32767, 517,
+ 107, 66, 304, 32767, 0, 32767, 32767, 32767,
+ 333, 32767, 357, 239, 32767, 32767, 437, -26,
+ 32767, 210, 0, 20, 369, 10, 32767, -78,
+ 616, 70, 0, 32767, 86, 32767, 0, 32767,
+ 14, 60, 32, 32767, 373, 32767, -218, 320,
+ 32767, 268, 32767, 32767, 425, -67, 0, 32767,
+ 252, 32767, -93, 193, -362, -140, 32767, -328,
+ 32767, 0, 319, 336, 32767, 121, 0, 32767,
+ 207, 32767, 481, -176, 276, 47, 235, 32767,
+ 0, 221, 32767, 228, 32767, 84, 32767, 32767,
+ 32767, 32767, 32767, 32767, 290, 32767, 32767, 84,
+ -170, 82, 246, -43, 201, 155, -7, 299,
+ 32767, 32767, 241, 32767, -418, 32767, 32767, -303,
+ 32767, 32767, -40, 308, 32767, 0, 178, -67,
+ 32767, 273, 32767, 0, 32767, 69, 387, 32767,
+ 341, 0, 0, -19, 324, -88, 166, 32767,
+ 6, 20, 32767, 17, 22, 359, 32767, 32767,
+ 172, 0, 32767, 0, 206, 88, 0, 32767,
+ 35, 0, 275, 114, 339, 32767, 32767, 32767,
+ 32767, 32767, 524, 0, -52, 191, -115, 9,
+ 200, 102, 0, 32767, 3, 0, 0, 0,
+ -288, 0, -132, 32767, 0, 32767, 300, 0,
+ 0, 0, 212, 32767, 260, 32767, -279, 104,
+ 506, 0, 640, 38, 268, 32767, 131, 32767,
+ 32767, 32767, 32767, 0, 0, 32767, 32767, 18,
+ 100, 44, 67, 270, 245, 283, 224, -66,
+ 32767, 32767, 32767, -67, 32767, 389, 32767, 0,
+ 125, 354, 32767, 32767, -21, 124, 0, 204,
+ 32767, 0, 99, 32767, 267, 32767, 0, 32767,
+ 32767, 231, 32767, -129, -113, 108, 23, 0,
+ 0, 32767, 32767, 0, 32767, 269, 32767, 32767,
+ -133, 32767, -384, 32767, 32767, 0, 32767, 32767,
+ 32767, 0, 32767, 32767, 32767, 0, 322, 424,
+ 117, -29, 32767, 32767, 145, 0, 0, 323,
+ 119, 32767, 32767, 32767, 392, -164, -143, -25,
+ -106, 0, 97, 32767, 32767, 235, 32767, 0,
+ 32767, 411, 363, 32767, 31, 32767, 443, 32767,
+ 76, 29, 32767, 323, 32767, 142, 32767, 0,
+ 244, 0, 32767, 28, 32767, 0, 377, -8,
+ -45, 32767, 37, 0, 350, 0, 32767, 32767,
+ 38, 416, 32767, 265, -78, -308, -51, 705,
+ 32767, 32767, 0, 66, 0, 32767, 32767, 32767,
+ 81, -250, 32767, 1, 421, 32767, 32767, 0,
+ 32767, 401, 198, 32767, 256, 290, 210, 32767,
+ 32767, 32767, 32767, 0, 444, 211, -99, 220,
+ 64, 32767, 32767, 32767, 32767, 190, 19, 0,
+ 216, 32767, 262, 32767, 404, 562, -222, 0,
+ 0, -162, 32767, 192, 65, 32767, 150, 191,
+ 0, 218, 267, 32767, 32767, 32767, 399, 189,
+ 356, 532, 0, 32767, 429, 32767, 32767, 54,
+ 279, 151, 0, 353, 394, 176, 215, 32767,
+ 15, 153, 137, 92, 420, 47, 253, 0,
+ 298, 32767, 32767, 32767, 234, 164, 241, 32767,
+ 0, 409, 0, 32767, 0, 32767, 0, 70,
+ 32767, 32767, 32767, 0, 32767, 32767, 32767, -138,
+ 32767, 32767, -46, 32767, 97, 32767, 0, -126,
+ 32767, 169, 322, 392, 32767, 0, 32767, 32767,
+ 0, 282, 296, 32767, 0, 403, 32767, -24,
+ 32767, -34, 32767, 32767, 32767, 418, 32767, 0,
+ 97, 238, 32767, 144, 428, 32767, 2, 32767,
+ 32767, 32767, 194, -182, 12, 32767, 391, 32767,
+ 880, 32767, 32767, 50, 331, 0, 0, 358,
+ 20, 32767, 32767, 222, 317, 32767, 0, 125,
+ 32767, 0, 32767, 284, 32767, 32767, 197, 0,
+ 63, 32767, 0, 188, 32767, 32767, 147, 190,
+ 344, 32767, -42, 32767, 0, 187, 32767, 32767,
+ 350, 32767, 0, 0, 127, 32767, 32767, 161,
+ 32767, 0, 0, 382, 32767, 32767, 316, 130,
+ 32767, 379, 32767, 383, 368, 93, -195, 0,
+ 233, 73, 32767, 100, 32767, 338, 90, 32767,
+ 258, 32767, 32767, 235, 175, 32767, 32767, 0,
+ -9, 0, 0, 32767, 32767, 111, 417, 330,
+ 0, 0, 225, 32767, 0, 0, 32767, 0,
+ 64, 32767, 32767, 16, 113, 29, 0, 32767,
+ 32767, 0, 43, 0, 365, -41, -284, 0,
+ 0, 0, 61, 314, 255, 0, 32767, 145,
+ 32767, -445, 32767, 32767, 154,
+ };
+
+ const unsigned char *k = (const unsigned char *) key;
+ uint32 a = 0;
+ uint32 b = 3;
+
+ while (keylen--)
+ {
+ unsigned char c = *k++ | 0x20;
+
+ a = a * 31 + c;
+ b = b * 127 + c;
+ }
+ return h[a % 901] + h[b % 901];
+}
+
+const ScanKeywordList ScanKeywords = {
+ ScanKeywords_kw_string,
+ ScanKeywords_kw_offsets,
+ ScanKeywords_hash_func,
+ SCANKEYWORDS_NUM_KEYWORDS,
+ 17
+};
+
+#endif /* KWLIST_D_H */
diff --git a/src/common/kwlookup.c b/src/common/kwlookup.c
new file mode 100644
index 0000000..11c49a1
--- /dev/null
+++ b/src/common/kwlookup.c
@@ -0,0 +1,85 @@
+/*-------------------------------------------------------------------------
+ *
+ * kwlookup.c
+ * Key word lookup for PostgreSQL
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/kwlookup.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "common/kwlookup.h"
+
+
+/*
+ * ScanKeywordLookup - see if a given word is a keyword
+ *
+ * The list of keywords to be matched against is passed as a ScanKeywordList.
+ *
+ * Returns the keyword number (0..N-1) of the keyword, or -1 if no match.
+ * Callers typically use the keyword number to index into information
+ * arrays, but that is no concern of this code.
+ *
+ * The match is done case-insensitively. Note that we deliberately use a
+ * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
+ * even if we are in a locale where tolower() would produce more or different
+ * translations. This is to conform to the SQL99 spec, which says that
+ * keywords are to be matched in this way even though non-keyword identifiers
+ * receive a different case-normalization mapping.
+ */
+int
+ScanKeywordLookup(const char *str,
+ const ScanKeywordList *keywords)
+{
+ size_t len;
+ int h;
+ const char *kw;
+
+ /*
+ * Reject immediately if too long to be any keyword. This saves useless
+ * hashing and downcasing work on long strings.
+ */
+ len = strlen(str);
+ if (len > keywords->max_kw_len)
+ return -1;
+
+ /*
+ * Compute the hash function. We assume it was generated to produce
+ * case-insensitive results. Since it's a perfect hash, we need only
+ * match to the specific keyword it identifies.
+ */
+ h = keywords->hash(str, len);
+
+ /* An out-of-range result implies no match */
+ if (h < 0 || h >= keywords->num_keywords)
+ return -1;
+
+ /*
+ * Compare character-by-character to see if we have a match, applying an
+ * ASCII-only downcasing to the input characters. We must not use
+ * tolower() since it may produce the wrong translation in some locales
+ * (eg, Turkish).
+ */
+ kw = GetScanKeyword(h, keywords);
+ while (*str != '\0')
+ {
+ char ch = *str++;
+
+ if (ch >= 'A' && ch <= 'Z')
+ ch += 'a' - 'A';
+ if (ch != *kw++)
+ return -1;
+ }
+ if (*kw != '\0')
+ return -1;
+
+ /* Success! */
+ return h;
+}
diff --git a/src/common/link-canary.c b/src/common/link-canary.c
new file mode 100644
index 0000000..28c6ae7
--- /dev/null
+++ b/src/common/link-canary.c
@@ -0,0 +1,36 @@
+/*-------------------------------------------------------------------------
+ * link-canary.c
+ * Detect whether src/common functions came from frontend or backend.
+ *
+ * Copyright (c) 2018-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/link-canary.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "common/link-canary.h"
+
+/*
+ * This function just reports whether this file was compiled for frontend
+ * or backend environment. We need this because in some systems, mainly
+ * ELF-based platforms, it is possible for a shlib (such as libpq) loaded
+ * into the backend to call a backend function named XYZ in preference to
+ * the shlib's own function XYZ. That's bad if the two functions don't
+ * act identically. This exact situation comes up for many functions in
+ * src/common and src/port, where the same function names exist in both
+ * libpq and the backend but they don't act quite identically. To verify
+ * that appropriate measures have been taken to prevent incorrect symbol
+ * resolution, libpq should test that this function returns true.
+ */
+bool
+pg_link_canary_is_frontend(void)
+{
+#ifdef FRONTEND
+ return true;
+#else
+ return false;
+#endif
+}
diff --git a/src/common/logging.c b/src/common/logging.c
new file mode 100644
index 0000000..f3fc0b8
--- /dev/null
+++ b/src/common/logging.c
@@ -0,0 +1,285 @@
+/*-------------------------------------------------------------------------
+ * Logging framework for frontend programs
+ *
+ * Copyright (c) 2018-2020, PostgreSQL Global Development Group
+ *
+ * src/common/logging.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres_fe.h"
+
+#include <unistd.h>
+
+#include "common/logging.h"
+
+enum pg_log_level __pg_log_level;
+
+static const char *progname;
+static int log_flags;
+
+static void (*log_pre_callback) (void);
+static void (*log_locus_callback) (const char **, uint64 *);
+
+static const char *sgr_error = NULL;
+static const char *sgr_warning = NULL;
+static const char *sgr_locus = NULL;
+
+#define SGR_ERROR_DEFAULT "01;31"
+#define SGR_WARNING_DEFAULT "01;35"
+#define SGR_LOCUS_DEFAULT "01"
+
+#define ANSI_ESCAPE_FMT "\x1b[%sm"
+#define ANSI_ESCAPE_RESET "\x1b[0m"
+
+#ifdef WIN32
+
+#ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING
+#define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x0004
+#endif
+
+/*
+ * Attempt to enable VT100 sequence processing for colorization on Windows.
+ * If current environment is not VT100-compatible or if this mode could not
+ * be enabled, return false.
+ */
+static bool
+enable_vt_processing(void)
+{
+ /* Check stderr */
+ HANDLE hOut = GetStdHandle(STD_ERROR_HANDLE);
+ DWORD dwMode = 0;
+
+ if (hOut == INVALID_HANDLE_VALUE)
+ return false;
+
+ /*
+ * Look for the current console settings and check if VT100 is already
+ * enabled.
+ */
+ if (!GetConsoleMode(hOut, &dwMode))
+ return false;
+ if ((dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING) != 0)
+ return true;
+
+ dwMode |= ENABLE_VIRTUAL_TERMINAL_PROCESSING;
+ if (!SetConsoleMode(hOut, dwMode))
+ return false;
+ return true;
+}
+#endif /* WIN32 */
+
+/*
+ * This should be called before any output happens.
+ */
+void
+pg_logging_init(const char *argv0)
+{
+ const char *pg_color_env = getenv("PG_COLOR");
+ bool log_color = false;
+ bool color_terminal = isatty(fileno(stderr));
+
+#ifdef WIN32
+
+ /*
+ * On Windows, check if environment is VT100-compatible if using a
+ * terminal.
+ */
+ if (color_terminal)
+ color_terminal = enable_vt_processing();
+#endif
+
+ /* usually the default, but not on Windows */
+ setvbuf(stderr, NULL, _IONBF, 0);
+
+ progname = get_progname(argv0);
+ __pg_log_level = PG_LOG_INFO;
+
+ if (pg_color_env)
+ {
+ if (strcmp(pg_color_env, "always") == 0 ||
+ (strcmp(pg_color_env, "auto") == 0 && color_terminal))
+ log_color = true;
+ }
+
+ if (log_color)
+ {
+ const char *pg_colors_env = getenv("PG_COLORS");
+
+ if (pg_colors_env)
+ {
+ char *colors = strdup(pg_colors_env);
+
+ if (colors)
+ {
+ for (char *token = strtok(colors, ":"); token; token = strtok(NULL, ":"))
+ {
+ char *e = strchr(token, '=');
+
+ if (e)
+ {
+ char *name;
+ char *value;
+
+ *e = '\0';
+ name = token;
+ value = e + 1;
+
+ if (strcmp(name, "error") == 0)
+ sgr_error = strdup(value);
+ if (strcmp(name, "warning") == 0)
+ sgr_warning = strdup(value);
+ if (strcmp(name, "locus") == 0)
+ sgr_locus = strdup(value);
+ }
+ }
+
+ free(colors);
+ }
+ }
+ else
+ {
+ sgr_error = SGR_ERROR_DEFAULT;
+ sgr_warning = SGR_WARNING_DEFAULT;
+ sgr_locus = SGR_LOCUS_DEFAULT;
+ }
+ }
+}
+
+void
+pg_logging_config(int new_flags)
+{
+ log_flags = new_flags;
+}
+
+void
+pg_logging_set_level(enum pg_log_level new_level)
+{
+ __pg_log_level = new_level;
+}
+
+void
+pg_logging_set_pre_callback(void (*cb) (void))
+{
+ log_pre_callback = cb;
+}
+
+void
+pg_logging_set_locus_callback(void (*cb) (const char **filename, uint64 *lineno))
+{
+ log_locus_callback = cb;
+}
+
+void
+pg_log_generic(enum pg_log_level level, const char *pg_restrict fmt,...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ pg_log_generic_v(level, fmt, ap);
+ va_end(ap);
+}
+
+void
+pg_log_generic_v(enum pg_log_level level, const char *pg_restrict fmt, va_list ap)
+{
+ int save_errno = errno;
+ const char *filename = NULL;
+ uint64 lineno = 0;
+ va_list ap2;
+ size_t required_len;
+ char *buf;
+
+ Assert(progname);
+ Assert(level);
+ Assert(fmt);
+ Assert(fmt[strlen(fmt) - 1] != '\n');
+
+ /*
+ * Flush stdout before output to stderr, to ensure sync even when stdout
+ * is buffered.
+ */
+ fflush(stdout);
+
+ if (log_pre_callback)
+ log_pre_callback();
+
+ if (log_locus_callback)
+ log_locus_callback(&filename, &lineno);
+
+ fmt = _(fmt);
+
+ if (!(log_flags & PG_LOG_FLAG_TERSE) || filename)
+ {
+ if (sgr_locus)
+ fprintf(stderr, ANSI_ESCAPE_FMT, sgr_locus);
+ if (!(log_flags & PG_LOG_FLAG_TERSE))
+ fprintf(stderr, "%s:", progname);
+ if (filename)
+ {
+ fprintf(stderr, "%s:", filename);
+ if (lineno > 0)
+ fprintf(stderr, UINT64_FORMAT ":", lineno);
+ }
+ fprintf(stderr, " ");
+ if (sgr_locus)
+ fprintf(stderr, ANSI_ESCAPE_RESET);
+ }
+
+ if (!(log_flags & PG_LOG_FLAG_TERSE))
+ {
+ switch (level)
+ {
+ case PG_LOG_FATAL:
+ if (sgr_error)
+ fprintf(stderr, ANSI_ESCAPE_FMT, sgr_error);
+ fprintf(stderr, _("fatal: "));
+ if (sgr_error)
+ fprintf(stderr, ANSI_ESCAPE_RESET);
+ break;
+ case PG_LOG_ERROR:
+ if (sgr_error)
+ fprintf(stderr, ANSI_ESCAPE_FMT, sgr_error);
+ fprintf(stderr, _("error: "));
+ if (sgr_error)
+ fprintf(stderr, ANSI_ESCAPE_RESET);
+ break;
+ case PG_LOG_WARNING:
+ if (sgr_warning)
+ fprintf(stderr, ANSI_ESCAPE_FMT, sgr_warning);
+ fprintf(stderr, _("warning: "));
+ if (sgr_warning)
+ fprintf(stderr, ANSI_ESCAPE_RESET);
+ break;
+ default:
+ break;
+ }
+ }
+
+ errno = save_errno;
+
+ va_copy(ap2, ap);
+ required_len = vsnprintf(NULL, 0, fmt, ap2) + 1;
+ va_end(ap2);
+
+ buf = pg_malloc_extended(required_len, MCXT_ALLOC_NO_OOM);
+
+ errno = save_errno; /* malloc might change errno */
+
+ if (!buf)
+ {
+ /* memory trouble, just print what we can and get out of here */
+ vfprintf(stderr, fmt, ap);
+ return;
+ }
+
+ vsnprintf(buf, required_len, fmt, ap);
+
+ /* strip one newline, for PQerrorMessage() */
+ if (required_len >= 2 && buf[required_len - 2] == '\n')
+ buf[required_len - 2] = '\0';
+
+ fprintf(stderr, "%s\n", buf);
+
+ free(buf);
+}
diff --git a/src/common/md5.c b/src/common/md5.c
new file mode 100644
index 0000000..5f790c6
--- /dev/null
+++ b/src/common/md5.c
@@ -0,0 +1,348 @@
+/*
+ * md5.c
+ *
+ * Implements the MD5 Message-Digest Algorithm as specified in
+ * RFC 1321. This implementation is a simple one, in that it
+ * needs every input byte to be buffered before doing any
+ * calculations. I do not expect this file to be used for
+ * general purpose MD5'ing of large amounts of data, only for
+ * generating hashed passwords from limited input.
+ *
+ * Sverre H. Huseby <sverrehu@online.no>
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/md5.c
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/md5.h"
+
+
+/*
+ * PRIVATE FUNCTIONS
+ */
+
+
+/*
+ * The returned array is allocated using malloc. the caller should free it
+ * when it is no longer needed.
+ */
+static uint8 *
+createPaddedCopyWithLength(const uint8 *b, uint32 *l)
+{
+ uint8 *ret;
+ uint32 q;
+ uint32 len,
+ newLen448;
+ uint32 len_high,
+ len_low; /* 64-bit value split into 32-bit sections */
+
+ len = ((b == NULL) ? 0 : *l);
+ newLen448 = len + 64 - (len % 64) - 8;
+ if (newLen448 <= len)
+ newLen448 += 64;
+
+ *l = newLen448 + 8;
+ if ((ret = (uint8 *) malloc(sizeof(uint8) * *l)) == NULL)
+ return NULL;
+
+ if (b != NULL)
+ memcpy(ret, b, sizeof(uint8) * len);
+
+ /* pad */
+ ret[len] = 0x80;
+ for (q = len + 1; q < newLen448; q++)
+ ret[q] = 0x00;
+
+ /* append length as a 64 bit bitcount */
+ len_low = len;
+ /* split into two 32-bit values */
+ /* we only look at the bottom 32-bits */
+ len_high = len >> 29;
+ len_low <<= 3;
+ q = newLen448;
+ ret[q++] = (len_low & 0xff);
+ len_low >>= 8;
+ ret[q++] = (len_low & 0xff);
+ len_low >>= 8;
+ ret[q++] = (len_low & 0xff);
+ len_low >>= 8;
+ ret[q++] = (len_low & 0xff);
+ ret[q++] = (len_high & 0xff);
+ len_high >>= 8;
+ ret[q++] = (len_high & 0xff);
+ len_high >>= 8;
+ ret[q++] = (len_high & 0xff);
+ len_high >>= 8;
+ ret[q] = (len_high & 0xff);
+
+ return ret;
+}
+
+#define F(x, y, z) (((x) & (y)) | (~(x) & (z)))
+#define G(x, y, z) (((x) & (z)) | ((y) & ~(z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define I(x, y, z) ((y) ^ ((x) | ~(z)))
+#define ROT_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+
+static void
+doTheRounds(uint32 X[16], uint32 state[4])
+{
+ uint32 a,
+ b,
+ c,
+ d;
+
+ a = state[0];
+ b = state[1];
+ c = state[2];
+ d = state[3];
+
+ /* round 1 */
+ a = b + ROT_LEFT((a + F(b, c, d) + X[0] + 0xd76aa478), 7); /* 1 */
+ d = a + ROT_LEFT((d + F(a, b, c) + X[1] + 0xe8c7b756), 12); /* 2 */
+ c = d + ROT_LEFT((c + F(d, a, b) + X[2] + 0x242070db), 17); /* 3 */
+ b = c + ROT_LEFT((b + F(c, d, a) + X[3] + 0xc1bdceee), 22); /* 4 */
+ a = b + ROT_LEFT((a + F(b, c, d) + X[4] + 0xf57c0faf), 7); /* 5 */
+ d = a + ROT_LEFT((d + F(a, b, c) + X[5] + 0x4787c62a), 12); /* 6 */
+ c = d + ROT_LEFT((c + F(d, a, b) + X[6] + 0xa8304613), 17); /* 7 */
+ b = c + ROT_LEFT((b + F(c, d, a) + X[7] + 0xfd469501), 22); /* 8 */
+ a = b + ROT_LEFT((a + F(b, c, d) + X[8] + 0x698098d8), 7); /* 9 */
+ d = a + ROT_LEFT((d + F(a, b, c) + X[9] + 0x8b44f7af), 12); /* 10 */
+ c = d + ROT_LEFT((c + F(d, a, b) + X[10] + 0xffff5bb1), 17); /* 11 */
+ b = c + ROT_LEFT((b + F(c, d, a) + X[11] + 0x895cd7be), 22); /* 12 */
+ a = b + ROT_LEFT((a + F(b, c, d) + X[12] + 0x6b901122), 7); /* 13 */
+ d = a + ROT_LEFT((d + F(a, b, c) + X[13] + 0xfd987193), 12); /* 14 */
+ c = d + ROT_LEFT((c + F(d, a, b) + X[14] + 0xa679438e), 17); /* 15 */
+ b = c + ROT_LEFT((b + F(c, d, a) + X[15] + 0x49b40821), 22); /* 16 */
+
+ /* round 2 */
+ a = b + ROT_LEFT((a + G(b, c, d) + X[1] + 0xf61e2562), 5); /* 17 */
+ d = a + ROT_LEFT((d + G(a, b, c) + X[6] + 0xc040b340), 9); /* 18 */
+ c = d + ROT_LEFT((c + G(d, a, b) + X[11] + 0x265e5a51), 14); /* 19 */
+ b = c + ROT_LEFT((b + G(c, d, a) + X[0] + 0xe9b6c7aa), 20); /* 20 */
+ a = b + ROT_LEFT((a + G(b, c, d) + X[5] + 0xd62f105d), 5); /* 21 */
+ d = a + ROT_LEFT((d + G(a, b, c) + X[10] + 0x02441453), 9); /* 22 */
+ c = d + ROT_LEFT((c + G(d, a, b) + X[15] + 0xd8a1e681), 14); /* 23 */
+ b = c + ROT_LEFT((b + G(c, d, a) + X[4] + 0xe7d3fbc8), 20); /* 24 */
+ a = b + ROT_LEFT((a + G(b, c, d) + X[9] + 0x21e1cde6), 5); /* 25 */
+ d = a + ROT_LEFT((d + G(a, b, c) + X[14] + 0xc33707d6), 9); /* 26 */
+ c = d + ROT_LEFT((c + G(d, a, b) + X[3] + 0xf4d50d87), 14); /* 27 */
+ b = c + ROT_LEFT((b + G(c, d, a) + X[8] + 0x455a14ed), 20); /* 28 */
+ a = b + ROT_LEFT((a + G(b, c, d) + X[13] + 0xa9e3e905), 5); /* 29 */
+ d = a + ROT_LEFT((d + G(a, b, c) + X[2] + 0xfcefa3f8), 9); /* 30 */
+ c = d + ROT_LEFT((c + G(d, a, b) + X[7] + 0x676f02d9), 14); /* 31 */
+ b = c + ROT_LEFT((b + G(c, d, a) + X[12] + 0x8d2a4c8a), 20); /* 32 */
+
+ /* round 3 */
+ a = b + ROT_LEFT((a + H(b, c, d) + X[5] + 0xfffa3942), 4); /* 33 */
+ d = a + ROT_LEFT((d + H(a, b, c) + X[8] + 0x8771f681), 11); /* 34 */
+ c = d + ROT_LEFT((c + H(d, a, b) + X[11] + 0x6d9d6122), 16); /* 35 */
+ b = c + ROT_LEFT((b + H(c, d, a) + X[14] + 0xfde5380c), 23); /* 36 */
+ a = b + ROT_LEFT((a + H(b, c, d) + X[1] + 0xa4beea44), 4); /* 37 */
+ d = a + ROT_LEFT((d + H(a, b, c) + X[4] + 0x4bdecfa9), 11); /* 38 */
+ c = d + ROT_LEFT((c + H(d, a, b) + X[7] + 0xf6bb4b60), 16); /* 39 */
+ b = c + ROT_LEFT((b + H(c, d, a) + X[10] + 0xbebfbc70), 23); /* 40 */
+ a = b + ROT_LEFT((a + H(b, c, d) + X[13] + 0x289b7ec6), 4); /* 41 */
+ d = a + ROT_LEFT((d + H(a, b, c) + X[0] + 0xeaa127fa), 11); /* 42 */
+ c = d + ROT_LEFT((c + H(d, a, b) + X[3] + 0xd4ef3085), 16); /* 43 */
+ b = c + ROT_LEFT((b + H(c, d, a) + X[6] + 0x04881d05), 23); /* 44 */
+ a = b + ROT_LEFT((a + H(b, c, d) + X[9] + 0xd9d4d039), 4); /* 45 */
+ d = a + ROT_LEFT((d + H(a, b, c) + X[12] + 0xe6db99e5), 11); /* 46 */
+ c = d + ROT_LEFT((c + H(d, a, b) + X[15] + 0x1fa27cf8), 16); /* 47 */
+ b = c + ROT_LEFT((b + H(c, d, a) + X[2] + 0xc4ac5665), 23); /* 48 */
+
+ /* round 4 */
+ a = b + ROT_LEFT((a + I(b, c, d) + X[0] + 0xf4292244), 6); /* 49 */
+ d = a + ROT_LEFT((d + I(a, b, c) + X[7] + 0x432aff97), 10); /* 50 */
+ c = d + ROT_LEFT((c + I(d, a, b) + X[14] + 0xab9423a7), 15); /* 51 */
+ b = c + ROT_LEFT((b + I(c, d, a) + X[5] + 0xfc93a039), 21); /* 52 */
+ a = b + ROT_LEFT((a + I(b, c, d) + X[12] + 0x655b59c3), 6); /* 53 */
+ d = a + ROT_LEFT((d + I(a, b, c) + X[3] + 0x8f0ccc92), 10); /* 54 */
+ c = d + ROT_LEFT((c + I(d, a, b) + X[10] + 0xffeff47d), 15); /* 55 */
+ b = c + ROT_LEFT((b + I(c, d, a) + X[1] + 0x85845dd1), 21); /* 56 */
+ a = b + ROT_LEFT((a + I(b, c, d) + X[8] + 0x6fa87e4f), 6); /* 57 */
+ d = a + ROT_LEFT((d + I(a, b, c) + X[15] + 0xfe2ce6e0), 10); /* 58 */
+ c = d + ROT_LEFT((c + I(d, a, b) + X[6] + 0xa3014314), 15); /* 59 */
+ b = c + ROT_LEFT((b + I(c, d, a) + X[13] + 0x4e0811a1), 21); /* 60 */
+ a = b + ROT_LEFT((a + I(b, c, d) + X[4] + 0xf7537e82), 6); /* 61 */
+ d = a + ROT_LEFT((d + I(a, b, c) + X[11] + 0xbd3af235), 10); /* 62 */
+ c = d + ROT_LEFT((c + I(d, a, b) + X[2] + 0x2ad7d2bb), 15); /* 63 */
+ b = c + ROT_LEFT((b + I(c, d, a) + X[9] + 0xeb86d391), 21); /* 64 */
+
+ state[0] += a;
+ state[1] += b;
+ state[2] += c;
+ state[3] += d;
+}
+
+static int
+calculateDigestFromBuffer(const uint8 *b, uint32 len, uint8 sum[16])
+{
+ register uint32 i,
+ j,
+ k,
+ newI;
+ uint32 l;
+ uint8 *input;
+ register uint32 *wbp;
+ uint32 workBuff[16],
+ state[4];
+
+ l = len;
+
+ state[0] = 0x67452301;
+ state[1] = 0xEFCDAB89;
+ state[2] = 0x98BADCFE;
+ state[3] = 0x10325476;
+
+ if ((input = createPaddedCopyWithLength(b, &l)) == NULL)
+ return 0;
+
+ for (i = 0;;)
+ {
+ if ((newI = i + 16 * 4) > l)
+ break;
+ k = i + 3;
+ for (j = 0; j < 16; j++)
+ {
+ wbp = (workBuff + j);
+ *wbp = input[k--];
+ *wbp <<= 8;
+ *wbp |= input[k--];
+ *wbp <<= 8;
+ *wbp |= input[k--];
+ *wbp <<= 8;
+ *wbp |= input[k];
+ k += 7;
+ }
+ doTheRounds(workBuff, state);
+ i = newI;
+ }
+ free(input);
+
+ j = 0;
+ for (i = 0; i < 4; i++)
+ {
+ k = state[i];
+ sum[j++] = (k & 0xff);
+ k >>= 8;
+ sum[j++] = (k & 0xff);
+ k >>= 8;
+ sum[j++] = (k & 0xff);
+ k >>= 8;
+ sum[j++] = (k & 0xff);
+ }
+ return 1;
+}
+
+static void
+bytesToHex(uint8 b[16], char *s)
+{
+ static const char *hex = "0123456789abcdef";
+ int q,
+ w;
+
+ for (q = 0, w = 0; q < 16; q++)
+ {
+ s[w++] = hex[(b[q] >> 4) & 0x0F];
+ s[w++] = hex[b[q] & 0x0F];
+ }
+ s[w] = '\0';
+}
+
+/*
+ * PUBLIC FUNCTIONS
+ */
+
+/*
+ * pg_md5_hash
+ *
+ * Calculates the MD5 sum of the bytes in a buffer.
+ *
+ * SYNOPSIS #include "md5.h"
+ * int pg_md5_hash(const void *buff, size_t len, char *hexsum)
+ *
+ * INPUT buff the buffer containing the bytes that you want
+ * the MD5 sum of.
+ * len number of bytes in the buffer.
+ *
+ * OUTPUT hexsum the MD5 sum as a '\0'-terminated string of
+ * hexadecimal digits. an MD5 sum is 16 bytes long.
+ * each byte is represented by two hexadecimal
+ * characters. you thus need to provide an array
+ * of 33 characters, including the trailing '\0'.
+ *
+ * RETURNS false on failure (out of memory for internal buffers) or
+ * true on success.
+ *
+ * STANDARDS MD5 is described in RFC 1321.
+ *
+ * AUTHOR Sverre H. Huseby <sverrehu@online.no>
+ *
+ */
+bool
+pg_md5_hash(const void *buff, size_t len, char *hexsum)
+{
+ uint8 sum[16];
+
+ if (!calculateDigestFromBuffer(buff, len, sum))
+ return false;
+
+ bytesToHex(sum, hexsum);
+ return true;
+}
+
+bool
+pg_md5_binary(const void *buff, size_t len, void *outbuf)
+{
+ if (!calculateDigestFromBuffer(buff, len, outbuf))
+ return false;
+ return true;
+}
+
+
+/*
+ * Computes MD5 checksum of "passwd" (a null-terminated string) followed
+ * by "salt" (which need not be null-terminated).
+ *
+ * Output format is "md5" followed by a 32-hex-digit MD5 checksum.
+ * Hence, the output buffer "buf" must be at least 36 bytes long.
+ *
+ * Returns true if okay, false on error (out of memory).
+ */
+bool
+pg_md5_encrypt(const char *passwd, const char *salt, size_t salt_len,
+ char *buf)
+{
+ size_t passwd_len = strlen(passwd);
+
+ /* +1 here is just to avoid risk of unportable malloc(0) */
+ char *crypt_buf = malloc(passwd_len + salt_len + 1);
+ bool ret;
+
+ if (!crypt_buf)
+ return false;
+
+ /*
+ * Place salt at the end because it may be known by users trying to crack
+ * the MD5 output.
+ */
+ memcpy(crypt_buf, passwd, passwd_len);
+ memcpy(crypt_buf + passwd_len, salt, salt_len);
+
+ strcpy(buf, "md5");
+ ret = pg_md5_hash(crypt_buf, passwd_len + salt_len, buf + 3);
+
+ free(crypt_buf);
+
+ return ret;
+}
diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c
new file mode 100644
index 0000000..f9c2982
--- /dev/null
+++ b/src/common/pg_lzcompress.c
@@ -0,0 +1,872 @@
+/* ----------
+ * pg_lzcompress.c -
+ *
+ * This is an implementation of LZ compression for PostgreSQL.
+ * It uses a simple history table and generates 2-3 byte tags
+ * capable of backward copy information for 3-273 bytes with
+ * a max offset of 4095.
+ *
+ * Entry routines:
+ *
+ * int32
+ * pglz_compress(const char *source, int32 slen, char *dest,
+ * const PGLZ_Strategy *strategy);
+ *
+ * source is the input data to be compressed.
+ *
+ * slen is the length of the input data.
+ *
+ * dest is the output area for the compressed result.
+ * It must be at least as big as PGLZ_MAX_OUTPUT(slen).
+ *
+ * strategy is a pointer to some information controlling
+ * the compression algorithm. If NULL, the compiled
+ * in default strategy is used.
+ *
+ * The return value is the number of bytes written in the
+ * buffer dest, or -1 if compression fails; in the latter
+ * case the contents of dest are undefined.
+ *
+ * int32
+ * pglz_decompress(const char *source, int32 slen, char *dest,
+ * int32 rawsize, bool check_complete)
+ *
+ * source is the compressed input.
+ *
+ * slen is the length of the compressed input.
+ *
+ * dest is the area where the uncompressed data will be
+ * written to. It is the callers responsibility to
+ * provide enough space.
+ *
+ * The data is written to buff exactly as it was handed
+ * to pglz_compress(). No terminating zero byte is added.
+ *
+ * rawsize is the length of the uncompressed data.
+ *
+ * check_complete is a flag to let us know if -1 should be
+ * returned in cases where we don't reach the end of the
+ * source or dest buffers, or not. This should be false
+ * if the caller is asking for only a partial result and
+ * true otherwise.
+ *
+ * The return value is the number of bytes written in the
+ * buffer dest, or -1 if decompression fails.
+ *
+ * The decompression algorithm and internal data format:
+ *
+ * It is made with the compressed data itself.
+ *
+ * The data representation is easiest explained by describing
+ * the process of decompression.
+ *
+ * If compressed_size == rawsize, then the data
+ * is stored uncompressed as plain bytes. Thus, the decompressor
+ * simply copies rawsize bytes to the destination.
+ *
+ * Otherwise the first byte tells what to do the next 8 times.
+ * We call this the control byte.
+ *
+ * An unset bit in the control byte means, that one uncompressed
+ * byte follows, which is copied from input to output.
+ *
+ * A set bit in the control byte means, that a tag of 2-3 bytes
+ * follows. A tag contains information to copy some bytes, that
+ * are already in the output buffer, to the current location in
+ * the output. Let's call the three tag bytes T1, T2 and T3. The
+ * position of the data to copy is coded as an offset from the
+ * actual output position.
+ *
+ * The offset is in the upper nibble of T1 and in T2.
+ * The length is in the lower nibble of T1.
+ *
+ * So the 16 bits of a 2 byte tag are coded as
+ *
+ * 7---T1--0 7---T2--0
+ * OOOO LLLL OOOO OOOO
+ *
+ * This limits the offset to 1-4095 (12 bits) and the length
+ * to 3-18 (4 bits) because 3 is always added to it. To emit
+ * a tag of 2 bytes with a length of 2 only saves one control
+ * bit. But we lose one byte in the possible length of a tag.
+ *
+ * In the actual implementation, the 2 byte tag's length is
+ * limited to 3-17, because the value 0xF in the length nibble
+ * has special meaning. It means, that the next following
+ * byte (T3) has to be added to the length value of 18. That
+ * makes total limits of 1-4095 for offset and 3-273 for length.
+ *
+ * Now that we have successfully decoded a tag. We simply copy
+ * the output that occurred <offset> bytes back to the current
+ * output location in the specified <length>. Thus, a
+ * sequence of 200 spaces (think about bpchar fields) could be
+ * coded in 4 bytes. One literal space and a three byte tag to
+ * copy 199 bytes with a -1 offset. Whow - that's a compression
+ * rate of 98%! Well, the implementation needs to save the
+ * original data size too, so we need another 4 bytes for it
+ * and end up with a total compression rate of 96%, what's still
+ * worth a Whow.
+ *
+ * The compression algorithm
+ *
+ * The following uses numbers used in the default strategy.
+ *
+ * The compressor works best for attributes of a size between
+ * 1K and 1M. For smaller items there's not that much chance of
+ * redundancy in the character sequence (except for large areas
+ * of identical bytes like trailing spaces) and for bigger ones
+ * our 4K maximum look-back distance is too small.
+ *
+ * The compressor creates a table for lists of positions.
+ * For each input position (except the last 3), a hash key is
+ * built from the 4 next input bytes and the position remembered
+ * in the appropriate list. Thus, the table points to linked
+ * lists of likely to be at least in the first 4 characters
+ * matching strings. This is done on the fly while the input
+ * is compressed into the output area. Table entries are only
+ * kept for the last 4096 input positions, since we cannot use
+ * back-pointers larger than that anyway. The size of the hash
+ * table is chosen based on the size of the input - a larger table
+ * has a larger startup cost, as it needs to be initialized to
+ * zero, but reduces the number of hash collisions on long inputs.
+ *
+ * For each byte in the input, its hash key (built from this
+ * byte and the next 3) is used to find the appropriate list
+ * in the table. The lists remember the positions of all bytes
+ * that had the same hash key in the past in increasing backward
+ * offset order. Now for all entries in the used lists, the
+ * match length is computed by comparing the characters from the
+ * entries position with the characters from the actual input
+ * position.
+ *
+ * The compressor starts with a so called "good_match" of 128.
+ * It is a "prefer speed against compression ratio" optimizer.
+ * So if the first entry looked at already has 128 or more
+ * matching characters, the lookup stops and that position is
+ * used for the next tag in the output.
+ *
+ * For each subsequent entry in the history list, the "good_match"
+ * is lowered by 10%. So the compressor will be more happy with
+ * short matches the farer it has to go back in the history.
+ * Another "speed against ratio" preference characteristic of
+ * the algorithm.
+ *
+ * Thus there are 3 stop conditions for the lookup of matches:
+ *
+ * - a match >= good_match is found
+ * - there are no more history entries to look at
+ * - the next history entry is already too far back
+ * to be coded into a tag.
+ *
+ * Finally the match algorithm checks that at least a match
+ * of 3 or more bytes has been found, because that is the smallest
+ * amount of copy information to code into a tag. If so, a tag
+ * is omitted and all the input bytes covered by that are just
+ * scanned for the history add's, otherwise a literal character
+ * is omitted and only his history entry added.
+ *
+ * Acknowledgments:
+ *
+ * Many thanks to Adisak Pochanayon, who's article about SLZ
+ * inspired me to write the PostgreSQL compression this way.
+ *
+ * Jan Wieck
+ *
+ * Copyright (c) 1999-2020, PostgreSQL Global Development Group
+ *
+ * src/common/pg_lzcompress.c
+ * ----------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <limits.h>
+
+#include "common/pg_lzcompress.h"
+
+
+/* ----------
+ * Local definitions
+ * ----------
+ */
+#define PGLZ_MAX_HISTORY_LISTS 8192 /* must be power of 2 */
+#define PGLZ_HISTORY_SIZE 4096
+#define PGLZ_MAX_MATCH 273
+
+
+/* ----------
+ * PGLZ_HistEntry -
+ *
+ * Linked list for the backward history lookup
+ *
+ * All the entries sharing a hash key are linked in a doubly linked list.
+ * This makes it easy to remove an entry when it's time to recycle it
+ * (because it's more than 4K positions old).
+ * ----------
+ */
+typedef struct PGLZ_HistEntry
+{
+ struct PGLZ_HistEntry *next; /* links for my hash key's list */
+ struct PGLZ_HistEntry *prev;
+ int hindex; /* my current hash key */
+ const char *pos; /* my input position */
+} PGLZ_HistEntry;
+
+
+/* ----------
+ * The provided standard strategies
+ * ----------
+ */
+static const PGLZ_Strategy strategy_default_data = {
+ 32, /* Data chunks less than 32 bytes are not
+ * compressed */
+ INT_MAX, /* No upper limit on what we'll try to
+ * compress */
+ 25, /* Require 25% compression rate, or not worth
+ * it */
+ 1024, /* Give up if no compression in the first 1KB */
+ 128, /* Stop history lookup if a match of 128 bytes
+ * is found */
+ 10 /* Lower good match size by 10% at every loop
+ * iteration */
+};
+const PGLZ_Strategy *const PGLZ_strategy_default = &strategy_default_data;
+
+
+static const PGLZ_Strategy strategy_always_data = {
+ 0, /* Chunks of any size are compressed */
+ INT_MAX,
+ 0, /* It's enough to save one single byte */
+ INT_MAX, /* Never give up early */
+ 128, /* Stop history lookup if a match of 128 bytes
+ * is found */
+ 6 /* Look harder for a good match */
+};
+const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data;
+
+
+/* ----------
+ * Statically allocated work arrays for history
+ * ----------
+ */
+static int16 hist_start[PGLZ_MAX_HISTORY_LISTS];
+static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE + 1];
+
+/*
+ * Element 0 in hist_entries is unused, and means 'invalid'. Likewise,
+ * INVALID_ENTRY_PTR in next/prev pointers mean 'invalid'.
+ */
+#define INVALID_ENTRY 0
+#define INVALID_ENTRY_PTR (&hist_entries[INVALID_ENTRY])
+
+/* ----------
+ * pglz_hist_idx -
+ *
+ * Computes the history table slot for the lookup by the next 4
+ * characters in the input.
+ *
+ * NB: because we use the next 4 characters, we are not guaranteed to
+ * find 3-character matches; they very possibly will be in the wrong
+ * hash list. This seems an acceptable tradeoff for spreading out the
+ * hash keys more.
+ * ----------
+ */
+#define pglz_hist_idx(_s,_e, _mask) ( \
+ ((((_e) - (_s)) < 4) ? (int) (_s)[0] : \
+ (((_s)[0] << 6) ^ ((_s)[1] << 4) ^ \
+ ((_s)[2] << 2) ^ (_s)[3])) & (_mask) \
+ )
+
+
+/* ----------
+ * pglz_hist_add -
+ *
+ * Adds a new entry to the history table.
+ *
+ * If _recycle is true, then we are recycling a previously used entry,
+ * and must first delink it from its old hashcode's linked list.
+ *
+ * NOTE: beware of multiple evaluations of macro's arguments, and note that
+ * _hn and _recycle are modified in the macro.
+ * ----------
+ */
+#define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e, _mask) \
+do { \
+ int __hindex = pglz_hist_idx((_s),(_e), (_mask)); \
+ int16 *__myhsp = &(_hs)[__hindex]; \
+ PGLZ_HistEntry *__myhe = &(_he)[_hn]; \
+ if (_recycle) { \
+ if (__myhe->prev == NULL) \
+ (_hs)[__myhe->hindex] = __myhe->next - (_he); \
+ else \
+ __myhe->prev->next = __myhe->next; \
+ if (__myhe->next != NULL) \
+ __myhe->next->prev = __myhe->prev; \
+ } \
+ __myhe->next = &(_he)[*__myhsp]; \
+ __myhe->prev = NULL; \
+ __myhe->hindex = __hindex; \
+ __myhe->pos = (_s); \
+ /* If there was an existing entry in this hash slot, link */ \
+ /* this new entry to it. However, the 0th entry in the */ \
+ /* entries table is unused, so we can freely scribble on it. */ \
+ /* So don't bother checking if the slot was used - we'll */ \
+ /* scribble on the unused entry if it was not, but that's */ \
+ /* harmless. Avoiding the branch in this critical path */ \
+ /* speeds this up a little bit. */ \
+ /* if (*__myhsp != INVALID_ENTRY) */ \
+ (_he)[(*__myhsp)].prev = __myhe; \
+ *__myhsp = _hn; \
+ if (++(_hn) >= PGLZ_HISTORY_SIZE + 1) { \
+ (_hn) = 1; \
+ (_recycle) = true; \
+ } \
+} while (0)
+
+
+/* ----------
+ * pglz_out_ctrl -
+ *
+ * Outputs the last and allocates a new control byte if needed.
+ * ----------
+ */
+#define pglz_out_ctrl(__ctrlp,__ctrlb,__ctrl,__buf) \
+do { \
+ if ((__ctrl & 0xff) == 0) \
+ { \
+ *(__ctrlp) = __ctrlb; \
+ __ctrlp = (__buf)++; \
+ __ctrlb = 0; \
+ __ctrl = 1; \
+ } \
+} while (0)
+
+
+/* ----------
+ * pglz_out_literal -
+ *
+ * Outputs a literal byte to the destination buffer including the
+ * appropriate control bit.
+ * ----------
+ */
+#define pglz_out_literal(_ctrlp,_ctrlb,_ctrl,_buf,_byte) \
+do { \
+ pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \
+ *(_buf)++ = (unsigned char)(_byte); \
+ _ctrl <<= 1; \
+} while (0)
+
+
+/* ----------
+ * pglz_out_tag -
+ *
+ * Outputs a backward reference tag of 2-4 bytes (depending on
+ * offset and length) to the destination buffer including the
+ * appropriate control bit.
+ * ----------
+ */
+#define pglz_out_tag(_ctrlp,_ctrlb,_ctrl,_buf,_len,_off) \
+do { \
+ pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \
+ _ctrlb |= _ctrl; \
+ _ctrl <<= 1; \
+ if (_len > 17) \
+ { \
+ (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | 0x0f); \
+ (_buf)[1] = (unsigned char)(((_off) & 0xff)); \
+ (_buf)[2] = (unsigned char)((_len) - 18); \
+ (_buf) += 3; \
+ } else { \
+ (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | ((_len) - 3)); \
+ (_buf)[1] = (unsigned char)((_off) & 0xff); \
+ (_buf) += 2; \
+ } \
+} while (0)
+
+
+/* ----------
+ * pglz_find_match -
+ *
+ * Lookup the history table if the actual input stream matches
+ * another sequence of characters, starting somewhere earlier
+ * in the input buffer.
+ * ----------
+ */
+static inline int
+pglz_find_match(int16 *hstart, const char *input, const char *end,
+ int *lenp, int *offp, int good_match, int good_drop, int mask)
+{
+ PGLZ_HistEntry *hent;
+ int16 hentno;
+ int32 len = 0;
+ int32 off = 0;
+
+ /*
+ * Traverse the linked history list until a good enough match is found.
+ */
+ hentno = hstart[pglz_hist_idx(input, end, mask)];
+ hent = &hist_entries[hentno];
+ while (hent != INVALID_ENTRY_PTR)
+ {
+ const char *ip = input;
+ const char *hp = hent->pos;
+ int32 thisoff;
+ int32 thislen;
+
+ /*
+ * Stop if the offset does not fit into our tag anymore.
+ */
+ thisoff = ip - hp;
+ if (thisoff >= 0x0fff)
+ break;
+
+ /*
+ * Determine length of match. A better match must be larger than the
+ * best so far. And if we already have a match of 16 or more bytes,
+ * it's worth the call overhead to use memcmp() to check if this match
+ * is equal for the same size. After that we must fallback to
+ * character by character comparison to know the exact position where
+ * the diff occurred.
+ */
+ thislen = 0;
+ if (len >= 16)
+ {
+ if (memcmp(ip, hp, len) == 0)
+ {
+ thislen = len;
+ ip += len;
+ hp += len;
+ while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH)
+ {
+ thislen++;
+ ip++;
+ hp++;
+ }
+ }
+ }
+ else
+ {
+ while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH)
+ {
+ thislen++;
+ ip++;
+ hp++;
+ }
+ }
+
+ /*
+ * Remember this match as the best (if it is)
+ */
+ if (thislen > len)
+ {
+ len = thislen;
+ off = thisoff;
+ }
+
+ /*
+ * Advance to the next history entry
+ */
+ hent = hent->next;
+
+ /*
+ * Be happy with lesser good matches the more entries we visited. But
+ * no point in doing calculation if we're at end of list.
+ */
+ if (hent != INVALID_ENTRY_PTR)
+ {
+ if (len >= good_match)
+ break;
+ good_match -= (good_match * good_drop) / 100;
+ }
+ }
+
+ /*
+ * Return match information only if it results at least in one byte
+ * reduction.
+ */
+ if (len > 2)
+ {
+ *lenp = len;
+ *offp = off;
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/* ----------
+ * pglz_compress -
+ *
+ * Compresses source into dest using strategy. Returns the number of
+ * bytes written in buffer dest, or -1 if compression fails.
+ * ----------
+ */
+int32
+pglz_compress(const char *source, int32 slen, char *dest,
+ const PGLZ_Strategy *strategy)
+{
+ unsigned char *bp = (unsigned char *) dest;
+ unsigned char *bstart = bp;
+ int hist_next = 1;
+ bool hist_recycle = false;
+ const char *dp = source;
+ const char *dend = source + slen;
+ unsigned char ctrl_dummy = 0;
+ unsigned char *ctrlp = &ctrl_dummy;
+ unsigned char ctrlb = 0;
+ unsigned char ctrl = 0;
+ bool found_match = false;
+ int32 match_len;
+ int32 match_off;
+ int32 good_match;
+ int32 good_drop;
+ int32 result_size;
+ int32 result_max;
+ int32 need_rate;
+ int hashsz;
+ int mask;
+
+ /*
+ * Our fallback strategy is the default.
+ */
+ if (strategy == NULL)
+ strategy = PGLZ_strategy_default;
+
+ /*
+ * If the strategy forbids compression (at all or if source chunk size out
+ * of range), fail.
+ */
+ if (strategy->match_size_good <= 0 ||
+ slen < strategy->min_input_size ||
+ slen > strategy->max_input_size)
+ return -1;
+
+ /*
+ * Limit the match parameters to the supported range.
+ */
+ good_match = strategy->match_size_good;
+ if (good_match > PGLZ_MAX_MATCH)
+ good_match = PGLZ_MAX_MATCH;
+ else if (good_match < 17)
+ good_match = 17;
+
+ good_drop = strategy->match_size_drop;
+ if (good_drop < 0)
+ good_drop = 0;
+ else if (good_drop > 100)
+ good_drop = 100;
+
+ need_rate = strategy->min_comp_rate;
+ if (need_rate < 0)
+ need_rate = 0;
+ else if (need_rate > 99)
+ need_rate = 99;
+
+ /*
+ * Compute the maximum result size allowed by the strategy, namely the
+ * input size minus the minimum wanted compression rate. This had better
+ * be <= slen, else we might overrun the provided output buffer.
+ */
+ if (slen > (INT_MAX / 100))
+ {
+ /* Approximate to avoid overflow */
+ result_max = (slen / 100) * (100 - need_rate);
+ }
+ else
+ result_max = (slen * (100 - need_rate)) / 100;
+
+ /*
+ * Experiments suggest that these hash sizes work pretty well. A large
+ * hash table minimizes collision, but has a higher startup cost. For a
+ * small input, the startup cost dominates. The table size must be a power
+ * of two.
+ */
+ if (slen < 128)
+ hashsz = 512;
+ else if (slen < 256)
+ hashsz = 1024;
+ else if (slen < 512)
+ hashsz = 2048;
+ else if (slen < 1024)
+ hashsz = 4096;
+ else
+ hashsz = 8192;
+ mask = hashsz - 1;
+
+ /*
+ * Initialize the history lists to empty. We do not need to zero the
+ * hist_entries[] array; its entries are initialized as they are used.
+ */
+ memset(hist_start, 0, hashsz * sizeof(int16));
+
+ /*
+ * Compress the source directly into the output buffer.
+ */
+ while (dp < dend)
+ {
+ /*
+ * If we already exceeded the maximum result size, fail.
+ *
+ * We check once per loop; since the loop body could emit as many as 4
+ * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better
+ * allow 4 slop bytes.
+ */
+ if (bp - bstart >= result_max)
+ return -1;
+
+ /*
+ * If we've emitted more than first_success_by bytes without finding
+ * anything compressible at all, fail. This lets us fall out
+ * reasonably quickly when looking at incompressible input (such as
+ * pre-compressed data).
+ */
+ if (!found_match && bp - bstart >= strategy->first_success_by)
+ return -1;
+
+ /*
+ * Try to find a match in the history
+ */
+ if (pglz_find_match(hist_start, dp, dend, &match_len,
+ &match_off, good_match, good_drop, mask))
+ {
+ /*
+ * Create the tag and add history entries for all matched
+ * characters.
+ */
+ pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off);
+ while (match_len--)
+ {
+ pglz_hist_add(hist_start, hist_entries,
+ hist_next, hist_recycle,
+ dp, dend, mask);
+ dp++; /* Do not do this ++ in the line above! */
+ /* The macro would do it four times - Jan. */
+ }
+ found_match = true;
+ }
+ else
+ {
+ /*
+ * No match found. Copy one literal byte.
+ */
+ pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp);
+ pglz_hist_add(hist_start, hist_entries,
+ hist_next, hist_recycle,
+ dp, dend, mask);
+ dp++; /* Do not do this ++ in the line above! */
+ /* The macro would do it four times - Jan. */
+ }
+ }
+
+ /*
+ * Write out the last control byte and check that we haven't overrun the
+ * output size allowed by the strategy.
+ */
+ *ctrlp = ctrlb;
+ result_size = bp - bstart;
+ if (result_size >= result_max)
+ return -1;
+
+ /* success */
+ return result_size;
+}
+
+
+/* ----------
+ * pglz_decompress -
+ *
+ * Decompresses source into dest. Returns the number of bytes
+ * decompressed into the destination buffer, or -1 if the
+ * compressed data is corrupted.
+ *
+ * If check_complete is true, the data is considered corrupted
+ * if we don't exactly fill the destination buffer. Callers that
+ * are extracting a slice typically can't apply this check.
+ * ----------
+ */
+int32
+pglz_decompress(const char *source, int32 slen, char *dest,
+ int32 rawsize, bool check_complete)
+{
+ const unsigned char *sp;
+ const unsigned char *srcend;
+ unsigned char *dp;
+ unsigned char *destend;
+
+ sp = (const unsigned char *) source;
+ srcend = ((const unsigned char *) source) + slen;
+ dp = (unsigned char *) dest;
+ destend = dp + rawsize;
+
+ while (sp < srcend && dp < destend)
+ {
+ /*
+ * Read one control byte and process the next 8 items (or as many as
+ * remain in the compressed input).
+ */
+ unsigned char ctrl = *sp++;
+ int ctrlc;
+
+ for (ctrlc = 0; ctrlc < 8 && sp < srcend && dp < destend; ctrlc++)
+ {
+ if (ctrl & 1)
+ {
+ /*
+ * Set control bit means we must read a match tag. The match
+ * is coded with two bytes. First byte uses lower nibble to
+ * code length - 3. Higher nibble contains upper 4 bits of the
+ * offset. The next following byte contains the lower 8 bits
+ * of the offset. If the length is coded as 18, another
+ * extension tag byte tells how much longer the match really
+ * was (0-255).
+ */
+ int32 len;
+ int32 off;
+
+ len = (sp[0] & 0x0f) + 3;
+ off = ((sp[0] & 0xf0) << 4) | sp[1];
+ sp += 2;
+ if (len == 18)
+ len += *sp++;
+
+ /*
+ * Check for corrupt data: if we fell off the end of the
+ * source, or if we obtained off = 0, we have problems. (We
+ * must check this, else we risk an infinite loop below in the
+ * face of corrupt data.)
+ */
+ if (unlikely(sp > srcend || off == 0))
+ return -1;
+
+ /*
+ * Don't emit more data than requested.
+ */
+ len = Min(len, destend - dp);
+
+ /*
+ * Now we copy the bytes specified by the tag from OUTPUT to
+ * OUTPUT (copy len bytes from dp - off to dp). The copied
+ * areas could overlap, so to avoid undefined behavior in
+ * memcpy(), be careful to copy only non-overlapping regions.
+ *
+ * Note that we cannot use memmove() instead, since while its
+ * behavior is well-defined, it's also not what we want.
+ */
+ while (off < len)
+ {
+ /*
+ * We can safely copy "off" bytes since that clearly
+ * results in non-overlapping source and destination.
+ */
+ memcpy(dp, dp - off, off);
+ len -= off;
+ dp += off;
+
+ /*----------
+ * This bit is less obvious: we can double "off" after
+ * each such step. Consider this raw input:
+ * 112341234123412341234
+ * This will be encoded as 5 literal bytes "11234" and
+ * then a match tag with length 16 and offset 4. After
+ * memcpy'ing the first 4 bytes, we will have emitted
+ * 112341234
+ * so we can double "off" to 8, then after the next step
+ * we have emitted
+ * 11234123412341234
+ * Then we can double "off" again, after which it is more
+ * than the remaining "len" so we fall out of this loop
+ * and finish with a non-overlapping copy of the
+ * remainder. In general, a match tag with off < len
+ * implies that the decoded data has a repeat length of
+ * "off". We can handle 1, 2, 4, etc repetitions of the
+ * repeated string per memcpy until we get to a situation
+ * where the final copy step is non-overlapping.
+ *
+ * (Another way to understand this is that we are keeping
+ * the copy source point dp - off the same throughout.)
+ *----------
+ */
+ off += off;
+ }
+ memcpy(dp, dp - off, len);
+ dp += len;
+ }
+ else
+ {
+ /*
+ * An unset control bit means LITERAL BYTE. So we just copy
+ * one from INPUT to OUTPUT.
+ */
+ *dp++ = *sp++;
+ }
+
+ /*
+ * Advance the control bit
+ */
+ ctrl >>= 1;
+ }
+ }
+
+ /*
+ * If requested, check we decompressed the right amount.
+ */
+ if (check_complete && (dp != destend || sp != srcend))
+ return -1;
+
+ /*
+ * That's it.
+ */
+ return (char *) dp - dest;
+}
+
+
+/* ----------
+ * pglz_max_compressed_size -
+ *
+ * Calculate the maximum compressed size for a given amount of raw data.
+ * Return the maximum size, or total compressed size if maximum size is
+ * larger than total compressed size.
+ *
+ * We can't use PGLZ_MAX_OUTPUT for this purpose, because that's used to size
+ * the compression buffer (and abort the compression). It does not really say
+ * what's the maximum compressed size for an input of a given length, and it
+ * may happen that while the whole value is compressible (and thus fits into
+ * PGLZ_MAX_OUTPUT nicely), the prefix is not compressible at all.
+ * ----------
+ */
+int32
+pglz_maximum_compressed_size(int32 rawsize, int32 total_compressed_size)
+{
+ int64 compressed_size;
+
+ /*
+ * pglz uses one control bit per byte, so if the entire desired prefix is
+ * represented as literal bytes, we'll need (rawsize * 9) bits. We care
+ * about bytes though, so be sure to round up not down.
+ *
+ * Use int64 here to prevent overflow during calculation.
+ */
+ compressed_size = ((int64) rawsize * 9 + 7) / 8;
+
+ /*
+ * The above fails to account for a corner case: we could have compressed
+ * data that starts with N-1 or N-2 literal bytes and then has a match tag
+ * of 2 or 3 bytes. It's therefore possible that we need to fetch 1 or 2
+ * more bytes in order to have the whole match tag. (Match tags earlier
+ * in the compressed data don't cause a problem, since they should
+ * represent more decompressed bytes than they occupy themselves.)
+ */
+ compressed_size += 2;
+
+ /*
+ * Maximum compressed size can't be larger than total compressed size.
+ * (This also ensures that our result fits in int32.)
+ */
+ compressed_size = Min(compressed_size, total_compressed_size);
+
+ return (int32) compressed_size;
+}
diff --git a/src/common/pgfnames.c b/src/common/pgfnames.c
new file mode 100644
index 0000000..c12f256
--- /dev/null
+++ b/src/common/pgfnames.c
@@ -0,0 +1,94 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgfnames.c
+ * directory handling functions
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/pgfnames.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <dirent.h>
+
+#ifndef FRONTEND
+#define pg_log_warning(...) elog(WARNING, __VA_ARGS__)
+#else
+#include "common/logging.h"
+#endif
+
+/*
+ * pgfnames
+ *
+ * return a list of the names of objects in the argument directory. Caller
+ * must call pgfnames_cleanup later to free the memory allocated by this
+ * function.
+ */
+char **
+pgfnames(const char *path)
+{
+ DIR *dir;
+ struct dirent *file;
+ char **filenames;
+ int numnames = 0;
+ int fnsize = 200; /* enough for many small dbs */
+
+ dir = opendir(path);
+ if (dir == NULL)
+ {
+ pg_log_warning("could not open directory \"%s\": %m", path);
+ return NULL;
+ }
+
+ filenames = (char **) palloc(fnsize * sizeof(char *));
+
+ while (errno = 0, (file = readdir(dir)) != NULL)
+ {
+ if (strcmp(file->d_name, ".") != 0 && strcmp(file->d_name, "..") != 0)
+ {
+ if (numnames + 1 >= fnsize)
+ {
+ fnsize *= 2;
+ filenames = (char **) repalloc(filenames,
+ fnsize * sizeof(char *));
+ }
+ filenames[numnames++] = pstrdup(file->d_name);
+ }
+ }
+
+ if (errno)
+ pg_log_warning("could not read directory \"%s\": %m", path);
+
+ filenames[numnames] = NULL;
+
+ if (closedir(dir))
+ pg_log_warning("could not close directory \"%s\": %m", path);
+
+ return filenames;
+}
+
+
+/*
+ * pgfnames_cleanup
+ *
+ * deallocate memory used for filenames
+ */
+void
+pgfnames_cleanup(char **filenames)
+{
+ char **fn;
+
+ for (fn = filenames; *fn; fn++)
+ pfree(*fn);
+
+ pfree(filenames);
+}
diff --git a/src/common/protocol_openssl.c b/src/common/protocol_openssl.c
new file mode 100644
index 0000000..bf6c624
--- /dev/null
+++ b/src/common/protocol_openssl.c
@@ -0,0 +1,117 @@
+/*-------------------------------------------------------------------------
+ *
+ * protocol_openssl.c
+ * OpenSSL functionality shared between frontend and backend
+ *
+ * This should only be used if code is compiled with OpenSSL support.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/protocol_openssl.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/openssl.h"
+
+/*
+ * Replacements for APIs introduced in OpenSSL 1.1.0.
+ */
+#ifndef SSL_CTX_set_min_proto_version
+
+/*
+ * OpenSSL versions that support TLS 1.3 shouldn't get here because they
+ * already have these functions. So we don't have to keep updating the below
+ * code for every new TLS version, and eventually it can go away. But let's
+ * just check this to make sure ...
+ */
+#ifdef TLS1_3_VERSION
+#error OpenSSL version mismatch
+#endif
+
+int
+SSL_CTX_set_min_proto_version(SSL_CTX *ctx, int version)
+{
+ int ssl_options = SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3;
+
+ if (version > TLS1_VERSION)
+ ssl_options |= SSL_OP_NO_TLSv1;
+
+ /*
+ * Some OpenSSL versions define TLS*_VERSION macros but not the
+ * corresponding SSL_OP_NO_* macro, so in those cases we have to return
+ * unsuccessfully here.
+ */
+#ifdef TLS1_1_VERSION
+ if (version > TLS1_1_VERSION)
+ {
+#ifdef SSL_OP_NO_TLSv1_1
+ ssl_options |= SSL_OP_NO_TLSv1_1;
+#else
+ return 0;
+#endif
+ }
+#endif
+#ifdef TLS1_2_VERSION
+ if (version > TLS1_2_VERSION)
+ {
+#ifdef SSL_OP_NO_TLSv1_2
+ ssl_options |= SSL_OP_NO_TLSv1_2;
+#else
+ return 0;
+#endif
+ }
+#endif
+
+ SSL_CTX_set_options(ctx, ssl_options);
+
+ return 1; /* success */
+}
+
+int
+SSL_CTX_set_max_proto_version(SSL_CTX *ctx, int version)
+{
+ int ssl_options = 0;
+
+ AssertArg(version != 0);
+
+ /*
+ * Some OpenSSL versions define TLS*_VERSION macros but not the
+ * corresponding SSL_OP_NO_* macro, so in those cases we have to return
+ * unsuccessfully here.
+ */
+#ifdef TLS1_1_VERSION
+ if (version < TLS1_1_VERSION)
+ {
+#ifdef SSL_OP_NO_TLSv1_1
+ ssl_options |= SSL_OP_NO_TLSv1_1;
+#else
+ return 0;
+#endif
+ }
+#endif
+#ifdef TLS1_2_VERSION
+ if (version < TLS1_2_VERSION)
+ {
+#ifdef SSL_OP_NO_TLSv1_2
+ ssl_options |= SSL_OP_NO_TLSv1_2;
+#else
+ return 0;
+#endif
+ }
+#endif
+
+ SSL_CTX_set_options(ctx, ssl_options);
+
+ return 1; /* success */
+}
+
+#endif /* !SSL_CTX_set_min_proto_version */
diff --git a/src/common/psprintf.c b/src/common/psprintf.c
new file mode 100644
index 0000000..4d9d15e
--- /dev/null
+++ b/src/common/psprintf.c
@@ -0,0 +1,151 @@
+/*-------------------------------------------------------------------------
+ *
+ * psprintf.c
+ * sprintf into an allocated-on-demand buffer
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/psprintf.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+
+#include "postgres.h"
+
+#include "utils/memutils.h"
+
+#else
+
+#include "postgres_fe.h"
+
+/* It's possible we could use a different value for this in frontend code */
+#define MaxAllocSize ((Size) 0x3fffffff) /* 1 gigabyte - 1 */
+
+#endif
+
+
+/*
+ * psprintf
+ *
+ * Format text data under the control of fmt (an sprintf-style format string)
+ * and return it in an allocated-on-demand buffer. The buffer is allocated
+ * with palloc in the backend, or malloc in frontend builds. Caller is
+ * responsible to free the buffer when no longer needed, if appropriate.
+ *
+ * Errors are not returned to the caller, but are reported via elog(ERROR)
+ * in the backend, or printf-to-stderr-and-exit() in frontend builds.
+ * One should therefore think twice about using this in libpq.
+ */
+char *
+psprintf(const char *fmt,...)
+{
+ int save_errno = errno;
+ size_t len = 128; /* initial assumption about buffer size */
+
+ for (;;)
+ {
+ char *result;
+ va_list args;
+ size_t newlen;
+
+ /*
+ * Allocate result buffer. Note that in frontend this maps to malloc
+ * with exit-on-error.
+ */
+ result = (char *) palloc(len);
+
+ /* Try to format the data. */
+ errno = save_errno;
+ va_start(args, fmt);
+ newlen = pvsnprintf(result, len, fmt, args);
+ va_end(args);
+
+ if (newlen < len)
+ return result; /* success */
+
+ /* Release buffer and loop around to try again with larger len. */
+ pfree(result);
+ len = newlen;
+ }
+}
+
+/*
+ * pvsnprintf
+ *
+ * Attempt to format text data under the control of fmt (an sprintf-style
+ * format string) and insert it into buf (which has length len).
+ *
+ * If successful, return the number of bytes emitted, not counting the
+ * trailing zero byte. This will always be strictly less than len.
+ *
+ * If there's not enough space in buf, return an estimate of the buffer size
+ * needed to succeed (this *must* be more than the given len, else callers
+ * might loop infinitely).
+ *
+ * Other error cases do not return, but exit via elog(ERROR) or exit().
+ * Hence, this shouldn't be used inside libpq.
+ *
+ * Caution: callers must be sure to preserve their entry-time errno
+ * when looping, in case the fmt contains "%m".
+ *
+ * Note that the semantics of the return value are not exactly C99's.
+ * First, we don't promise that the estimated buffer size is exactly right;
+ * callers must be prepared to loop multiple times to get the right size.
+ * (Given a C99-compliant vsnprintf, that won't happen, but it is rumored
+ * that some implementations don't always return the same value ...)
+ * Second, we return the recommended buffer size, not one less than that;
+ * this lets overflow concerns be handled here rather than in the callers.
+ */
+size_t
+pvsnprintf(char *buf, size_t len, const char *fmt, va_list args)
+{
+ int nprinted;
+
+ nprinted = vsnprintf(buf, len, fmt, args);
+
+ /* We assume failure means the fmt is bogus, hence hard failure is OK */
+ if (unlikely(nprinted < 0))
+ {
+#ifndef FRONTEND
+ elog(ERROR, "vsnprintf failed: %m with format string \"%s\"", fmt);
+#else
+ fprintf(stderr, "vsnprintf failed: %s with format string \"%s\"\n",
+ strerror(errno), fmt);
+ exit(EXIT_FAILURE);
+#endif
+ }
+
+ if ((size_t) nprinted < len)
+ {
+ /* Success. Note nprinted does not include trailing null. */
+ return (size_t) nprinted;
+ }
+
+ /*
+ * We assume a C99-compliant vsnprintf, so believe its estimate of the
+ * required space, and add one for the trailing null. (If it's wrong, the
+ * logic will still work, but we may loop multiple times.)
+ *
+ * Choke if the required space would exceed MaxAllocSize. Note we use
+ * this palloc-oriented overflow limit even when in frontend.
+ */
+ if (unlikely((size_t) nprinted > MaxAllocSize - 1))
+ {
+#ifndef FRONTEND
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("out of memory")));
+#else
+ fprintf(stderr, _("out of memory\n"));
+ exit(EXIT_FAILURE);
+#endif
+ }
+
+ return nprinted + 1;
+}
diff --git a/src/common/relpath.c b/src/common/relpath.c
new file mode 100644
index 0000000..ad733d1
--- /dev/null
+++ b/src/common/relpath.c
@@ -0,0 +1,210 @@
+/*-------------------------------------------------------------------------
+ * relpath.c
+ * Shared frontend/backend code to compute pathnames of relation files
+ *
+ * This module also contains some logic associated with fork names.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/relpath.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "catalog/pg_tablespace_d.h"
+#include "common/relpath.h"
+#include "storage/backendid.h"
+
+
+/*
+ * Lookup table of fork name by fork number.
+ *
+ * If you add a new entry, remember to update the errhint in
+ * forkname_to_number() below, and update the SGML documentation for
+ * pg_relation_size().
+ */
+const char *const forkNames[] = {
+ "main", /* MAIN_FORKNUM */
+ "fsm", /* FSM_FORKNUM */
+ "vm", /* VISIBILITYMAP_FORKNUM */
+ "init" /* INIT_FORKNUM */
+};
+
+StaticAssertDecl(lengthof(forkNames) == (MAX_FORKNUM + 1),
+ "array length mismatch");
+
+/*
+ * forkname_to_number - look up fork number by name
+ *
+ * In backend, we throw an error for no match; in frontend, we just
+ * return InvalidForkNumber.
+ */
+ForkNumber
+forkname_to_number(const char *forkName)
+{
+ ForkNumber forkNum;
+
+ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
+ if (strcmp(forkNames[forkNum], forkName) == 0)
+ return forkNum;
+
+#ifndef FRONTEND
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid fork name"),
+ errhint("Valid fork names are \"main\", \"fsm\", "
+ "\"vm\", and \"init\".")));
+#endif
+
+ return InvalidForkNumber;
+}
+
+/*
+ * forkname_chars
+ * We use this to figure out whether a filename could be a relation
+ * fork (as opposed to an oddly named stray file that somehow ended
+ * up in the database directory). If the passed string begins with
+ * a fork name (other than the main fork name), we return its length,
+ * and set *fork (if not NULL) to the fork number. If not, we return 0.
+ *
+ * Note that the present coding assumes that there are no fork names which
+ * are prefixes of other fork names.
+ */
+int
+forkname_chars(const char *str, ForkNumber *fork)
+{
+ ForkNumber forkNum;
+
+ for (forkNum = 1; forkNum <= MAX_FORKNUM; forkNum++)
+ {
+ int len = strlen(forkNames[forkNum]);
+
+ if (strncmp(forkNames[forkNum], str, len) == 0)
+ {
+ if (fork)
+ *fork = forkNum;
+ return len;
+ }
+ }
+ if (fork)
+ *fork = InvalidForkNumber;
+ return 0;
+}
+
+
+/*
+ * GetDatabasePath - construct path to a database directory
+ *
+ * Result is a palloc'd string.
+ *
+ * XXX this must agree with GetRelationPath()!
+ */
+char *
+GetDatabasePath(Oid dbNode, Oid spcNode)
+{
+ if (spcNode == GLOBALTABLESPACE_OID)
+ {
+ /* Shared system relations live in {datadir}/global */
+ Assert(dbNode == 0);
+ return pstrdup("global");
+ }
+ else if (spcNode == DEFAULTTABLESPACE_OID)
+ {
+ /* The default tablespace is {datadir}/base */
+ return psprintf("base/%u", dbNode);
+ }
+ else
+ {
+ /* All other tablespaces are accessed via symlinks */
+ return psprintf("pg_tblspc/%u/%s/%u",
+ spcNode, TABLESPACE_VERSION_DIRECTORY, dbNode);
+ }
+}
+
+/*
+ * GetRelationPath - construct path to a relation's file
+ *
+ * Result is a palloc'd string.
+ *
+ * Note: ideally, backendId would be declared as type BackendId, but relpath.h
+ * would have to include a backend-only header to do that; doesn't seem worth
+ * the trouble considering BackendId is just int anyway.
+ */
+char *
+GetRelationPath(Oid dbNode, Oid spcNode, Oid relNode,
+ int backendId, ForkNumber forkNumber)
+{
+ char *path;
+
+ if (spcNode == GLOBALTABLESPACE_OID)
+ {
+ /* Shared system relations live in {datadir}/global */
+ Assert(dbNode == 0);
+ Assert(backendId == InvalidBackendId);
+ if (forkNumber != MAIN_FORKNUM)
+ path = psprintf("global/%u_%s",
+ relNode, forkNames[forkNumber]);
+ else
+ path = psprintf("global/%u", relNode);
+ }
+ else if (spcNode == DEFAULTTABLESPACE_OID)
+ {
+ /* The default tablespace is {datadir}/base */
+ if (backendId == InvalidBackendId)
+ {
+ if (forkNumber != MAIN_FORKNUM)
+ path = psprintf("base/%u/%u_%s",
+ dbNode, relNode,
+ forkNames[forkNumber]);
+ else
+ path = psprintf("base/%u/%u",
+ dbNode, relNode);
+ }
+ else
+ {
+ if (forkNumber != MAIN_FORKNUM)
+ path = psprintf("base/%u/t%d_%u_%s",
+ dbNode, backendId, relNode,
+ forkNames[forkNumber]);
+ else
+ path = psprintf("base/%u/t%d_%u",
+ dbNode, backendId, relNode);
+ }
+ }
+ else
+ {
+ /* All other tablespaces are accessed via symlinks */
+ if (backendId == InvalidBackendId)
+ {
+ if (forkNumber != MAIN_FORKNUM)
+ path = psprintf("pg_tblspc/%u/%s/%u/%u_%s",
+ spcNode, TABLESPACE_VERSION_DIRECTORY,
+ dbNode, relNode,
+ forkNames[forkNumber]);
+ else
+ path = psprintf("pg_tblspc/%u/%s/%u/%u",
+ spcNode, TABLESPACE_VERSION_DIRECTORY,
+ dbNode, relNode);
+ }
+ else
+ {
+ if (forkNumber != MAIN_FORKNUM)
+ path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u_%s",
+ spcNode, TABLESPACE_VERSION_DIRECTORY,
+ dbNode, backendId, relNode,
+ forkNames[forkNumber]);
+ else
+ path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u",
+ spcNode, TABLESPACE_VERSION_DIRECTORY,
+ dbNode, backendId, relNode);
+ }
+ }
+ return path;
+}
diff --git a/src/common/restricted_token.c b/src/common/restricted_token.c
new file mode 100644
index 0000000..d8d3aef
--- /dev/null
+++ b/src/common/restricted_token.c
@@ -0,0 +1,202 @@
+/*-------------------------------------------------------------------------
+ *
+ * restricted_token.c
+ * helper routine to ensure restricted token on Windows
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/restricted_token.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#error "This file is not expected to be compiled for backend code"
+#endif
+
+#include "postgres_fe.h"
+
+#include "common/logging.h"
+#include "common/restricted_token.h"
+
+#ifdef WIN32
+
+/* internal vars */
+char *restrict_env;
+
+typedef BOOL (WINAPI * __CreateRestrictedToken) (HANDLE, DWORD, DWORD, PSID_AND_ATTRIBUTES, DWORD, PLUID_AND_ATTRIBUTES, DWORD, PSID_AND_ATTRIBUTES, PHANDLE);
+
+/* Windows API define missing from some versions of MingW headers */
+#ifndef DISABLE_MAX_PRIVILEGE
+#define DISABLE_MAX_PRIVILEGE 0x1
+#endif
+
+/*
+ * Create a restricted token and execute the specified process with it.
+ *
+ * Returns restricted token on success and 0 on failure.
+ *
+ * On any system not containing the required functions, do nothing
+ * but still report an error.
+ */
+HANDLE
+CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION *processInfo)
+{
+ BOOL b;
+ STARTUPINFO si;
+ HANDLE origToken;
+ HANDLE restrictedToken;
+ SID_IDENTIFIER_AUTHORITY NtAuthority = {SECURITY_NT_AUTHORITY};
+ SID_AND_ATTRIBUTES dropSids[2];
+ __CreateRestrictedToken _CreateRestrictedToken;
+ HANDLE Advapi32Handle;
+
+ ZeroMemory(&si, sizeof(si));
+ si.cb = sizeof(si);
+
+ Advapi32Handle = LoadLibrary("ADVAPI32.DLL");
+ if (Advapi32Handle == NULL)
+ {
+ pg_log_error("could not load library \"%s\": error code %lu",
+ "ADVAPI32.DLL", GetLastError());
+ return 0;
+ }
+
+ _CreateRestrictedToken = (__CreateRestrictedToken) GetProcAddress(Advapi32Handle, "CreateRestrictedToken");
+
+ if (_CreateRestrictedToken == NULL)
+ {
+ pg_log_error("cannot create restricted tokens on this platform: error code %lu",
+ GetLastError());
+ FreeLibrary(Advapi32Handle);
+ return 0;
+ }
+
+ /* Open the current token to use as a base for the restricted one */
+ if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ALL_ACCESS, &origToken))
+ {
+ pg_log_error("could not open process token: error code %lu",
+ GetLastError());
+ FreeLibrary(Advapi32Handle);
+ return 0;
+ }
+
+ /* Allocate list of SIDs to remove */
+ ZeroMemory(&dropSids, sizeof(dropSids));
+ if (!AllocateAndInitializeSid(&NtAuthority, 2,
+ SECURITY_BUILTIN_DOMAIN_RID, DOMAIN_ALIAS_RID_ADMINS, 0, 0, 0, 0, 0,
+ 0, &dropSids[0].Sid) ||
+ !AllocateAndInitializeSid(&NtAuthority, 2,
+ SECURITY_BUILTIN_DOMAIN_RID, DOMAIN_ALIAS_RID_POWER_USERS, 0, 0, 0, 0, 0,
+ 0, &dropSids[1].Sid))
+ {
+ pg_log_error("could not allocate SIDs: error code %lu",
+ GetLastError());
+ CloseHandle(origToken);
+ FreeLibrary(Advapi32Handle);
+ return 0;
+ }
+
+ b = _CreateRestrictedToken(origToken,
+ DISABLE_MAX_PRIVILEGE,
+ sizeof(dropSids) / sizeof(dropSids[0]),
+ dropSids,
+ 0, NULL,
+ 0, NULL,
+ &restrictedToken);
+
+ FreeSid(dropSids[1].Sid);
+ FreeSid(dropSids[0].Sid);
+ CloseHandle(origToken);
+ FreeLibrary(Advapi32Handle);
+
+ if (!b)
+ {
+ pg_log_error("could not create restricted token: error code %lu", GetLastError());
+ return 0;
+ }
+
+#ifndef __CYGWIN__
+ AddUserToTokenDacl(restrictedToken);
+#endif
+
+ if (!CreateProcessAsUser(restrictedToken,
+ NULL,
+ cmd,
+ NULL,
+ NULL,
+ TRUE,
+ CREATE_SUSPENDED,
+ NULL,
+ NULL,
+ &si,
+ processInfo))
+
+ {
+ pg_log_error("could not start process for command \"%s\": error code %lu", cmd, GetLastError());
+ return 0;
+ }
+
+ ResumeThread(processInfo->hThread);
+ return restrictedToken;
+}
+#endif
+
+/*
+ * On Windows make sure that we are running with a restricted token,
+ * On other platforms do nothing.
+ */
+void
+get_restricted_token(void)
+{
+#ifdef WIN32
+ HANDLE restrictedToken;
+
+ /*
+ * Before we execute another program, make sure that we are running with a
+ * restricted token. If not, re-execute ourselves with one.
+ */
+
+ if ((restrict_env = getenv("PG_RESTRICT_EXEC")) == NULL
+ || strcmp(restrict_env, "1") != 0)
+ {
+ PROCESS_INFORMATION pi;
+ char *cmdline;
+
+ ZeroMemory(&pi, sizeof(pi));
+
+ cmdline = pg_strdup(GetCommandLine());
+
+ putenv("PG_RESTRICT_EXEC=1");
+
+ if ((restrictedToken = CreateRestrictedProcess(cmdline, &pi)) == 0)
+ {
+ pg_log_error("could not re-execute with restricted token: error code %lu", GetLastError());
+ }
+ else
+ {
+ /*
+ * Successfully re-executed. Now wait for child process to capture
+ * the exit code.
+ */
+ DWORD x;
+
+ CloseHandle(restrictedToken);
+ CloseHandle(pi.hThread);
+ WaitForSingleObject(pi.hProcess, INFINITE);
+
+ if (!GetExitCodeProcess(pi.hProcess, &x))
+ {
+ pg_log_error("could not get exit code from subprocess: error code %lu", GetLastError());
+ exit(1);
+ }
+ exit(x);
+ }
+ pg_free(cmdline);
+ }
+#endif
+}
diff --git a/src/common/rmtree.c b/src/common/rmtree.c
new file mode 100644
index 0000000..9e8eb9e
--- /dev/null
+++ b/src/common/rmtree.c
@@ -0,0 +1,122 @@
+/*-------------------------------------------------------------------------
+ *
+ * rmtree.c
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/rmtree.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <unistd.h>
+#include <sys/stat.h>
+
+#ifndef FRONTEND
+#define pg_log_warning(...) elog(WARNING, __VA_ARGS__)
+#else
+#include "common/logging.h"
+#endif
+
+
+/*
+ * rmtree
+ *
+ * Delete a directory tree recursively.
+ * Assumes path points to a valid directory.
+ * Deletes everything under path.
+ * If rmtopdir is true deletes the directory too.
+ * Returns true if successful, false if there was any problem.
+ * (The details of the problem are reported already, so caller
+ * doesn't really have to say anything more, but most do.)
+ */
+bool
+rmtree(const char *path, bool rmtopdir)
+{
+ bool result = true;
+ char pathbuf[MAXPGPATH];
+ char **filenames;
+ char **filename;
+ struct stat statbuf;
+
+ /*
+ * we copy all the names out of the directory before we start modifying
+ * it.
+ */
+ filenames = pgfnames(path);
+
+ if (filenames == NULL)
+ return false;
+
+ /* now we have the names we can start removing things */
+ for (filename = filenames; *filename; filename++)
+ {
+ snprintf(pathbuf, MAXPGPATH, "%s/%s", path, *filename);
+
+ /*
+ * It's ok if the file is not there anymore; we were just about to
+ * delete it anyway.
+ *
+ * This is not an academic possibility. One scenario where this
+ * happens is when bgwriter has a pending unlink request for a file in
+ * a database that's being dropped. In dropdb(), we call
+ * ForgetDatabaseSyncRequests() to flush out any such pending unlink
+ * requests, but because that's asynchronous, it's not guaranteed that
+ * the bgwriter receives the message in time.
+ */
+ if (lstat(pathbuf, &statbuf) != 0)
+ {
+ if (errno != ENOENT)
+ {
+ pg_log_warning("could not stat file or directory \"%s\": %m",
+ pathbuf);
+ result = false;
+ }
+ continue;
+ }
+
+ if (S_ISDIR(statbuf.st_mode))
+ {
+ /* call ourselves recursively for a directory */
+ if (!rmtree(pathbuf, true))
+ {
+ /* we already reported the error */
+ result = false;
+ }
+ }
+ else
+ {
+ if (unlink(pathbuf) != 0)
+ {
+ if (errno != ENOENT)
+ {
+ pg_log_warning("could not remove file or directory \"%s\": %m",
+ pathbuf);
+ result = false;
+ }
+ }
+ }
+ }
+
+ if (rmtopdir)
+ {
+ if (rmdir(path) != 0)
+ {
+ pg_log_warning("could not remove file or directory \"%s\": %m",
+ path);
+ result = false;
+ }
+ }
+
+ pgfnames_cleanup(filenames);
+
+ return result;
+}
diff --git a/src/common/ryu_common.h b/src/common/ryu_common.h
new file mode 100644
index 0000000..2e3213d
--- /dev/null
+++ b/src/common/ryu_common.h
@@ -0,0 +1,133 @@
+/*---------------------------------------------------------------------------
+ *
+ * Common routines for Ryu floating-point output.
+ *
+ * Portions Copyright (c) 2018-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/ryu_common.h
+ *
+ * This is a modification of code taken from github.com/ulfjack/ryu under the
+ * terms of the Boost license (not the Apache license). The original copyright
+ * notice follows:
+ *
+ * Copyright 2018 Ulf Adams
+ *
+ * The contents of this file may be used under the terms of the Apache
+ * License, Version 2.0.
+ *
+ * (See accompanying file LICENSE-Apache or copy at
+ * http://www.apache.org/licenses/LICENSE-2.0)
+ *
+ * Alternatively, the contents of this file may be used under the terms of the
+ * Boost Software License, Version 1.0.
+ *
+ * (See accompanying file LICENSE-Boost or copy at
+ * https://www.boost.org/LICENSE_1_0.txt)
+ *
+ * Unless required by applicable law or agreed to in writing, this software is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.
+ *
+ *---------------------------------------------------------------------------
+ */
+#ifndef RYU_COMMON_H
+#define RYU_COMMON_H
+
+/*
+ * Upstream Ryu's output is always the shortest possible. But we adjust that
+ * slightly to improve portability: we avoid outputting the exact midpoint
+ * value between two representable floats, since that relies on the reader
+ * getting the round-to-even rule correct, which seems to be the common
+ * failure mode.
+ *
+ * Defining this to 1 would restore the upstream behavior.
+ */
+#define STRICTLY_SHORTEST 0
+
+#if SIZEOF_SIZE_T < 8
+#define RYU_32_BIT_PLATFORM
+#endif
+
+/* Returns e == 0 ? 1 : ceil(log_2(5^e)). */
+static inline uint32
+pow5bits(const int32 e)
+{
+ /*
+ * This approximation works up to the point that the multiplication
+ * overflows at e = 3529.
+ *
+ * If the multiplication were done in 64 bits, it would fail at 5^4004
+ * which is just greater than 2^9297.
+ */
+ Assert(e >= 0);
+ Assert(e <= 3528);
+ return ((((uint32) e) * 1217359) >> 19) + 1;
+}
+
+/* Returns floor(log_10(2^e)). */
+static inline int32
+log10Pow2(const int32 e)
+{
+ /*
+ * The first value this approximation fails for is 2^1651 which is just
+ * greater than 10^297.
+ */
+ Assert(e >= 0);
+ Assert(e <= 1650);
+ return (int32) ((((uint32) e) * 78913) >> 18);
+}
+
+/* Returns floor(log_10(5^e)). */
+static inline int32
+log10Pow5(const int32 e)
+{
+ /*
+ * The first value this approximation fails for is 5^2621 which is just
+ * greater than 10^1832.
+ */
+ Assert(e >= 0);
+ Assert(e <= 2620);
+ return (int32) ((((uint32) e) * 732923) >> 20);
+}
+
+static inline int
+copy_special_str(char *const result, const bool sign, const bool exponent, const bool mantissa)
+{
+ if (mantissa)
+ {
+ memcpy(result, "NaN", 3);
+ return 3;
+ }
+ if (sign)
+ {
+ result[0] = '-';
+ }
+ if (exponent)
+ {
+ memcpy(result + sign, "Infinity", 8);
+ return sign + 8;
+ }
+ result[sign] = '0';
+ return sign + 1;
+}
+
+static inline uint32
+float_to_bits(const float f)
+{
+ uint32 bits = 0;
+
+ memcpy(&bits, &f, sizeof(float));
+ return bits;
+}
+
+static inline uint64
+double_to_bits(const double d)
+{
+ uint64 bits = 0;
+
+ memcpy(&bits, &d, sizeof(double));
+ return bits;
+}
+
+#endif /* RYU_COMMON_H */
diff --git a/src/common/saslprep.c b/src/common/saslprep.c
new file mode 100644
index 0000000..2dedf6b
--- /dev/null
+++ b/src/common/saslprep.c
@@ -0,0 +1,1278 @@
+/*-------------------------------------------------------------------------
+ * saslprep.c
+ * SASLprep normalization, for SCRAM authentication
+ *
+ * The SASLprep algorithm is used to process a user-supplied password into
+ * canonical form. For more details, see:
+ *
+ * [RFC3454] Preparation of Internationalized Strings ("stringprep"),
+ * http://www.ietf.org/rfc/rfc3454.txt
+ *
+ * [RFC4013] SASLprep: Stringprep Profile for User Names and Passwords
+ * http://www.ietf.org/rfc/rfc4013.txt
+ *
+ *
+ * Portions Copyright (c) 2017-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/saslprep.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/saslprep.h"
+#include "common/unicode_norm.h"
+#include "mb/pg_wchar.h"
+
+/*
+ * Limit on how large password's we will try to process. A password
+ * larger than this will be treated the same as out-of-memory.
+ */
+#define MAX_PASSWORD_LENGTH 1024
+
+/*
+ * In backend, we will use palloc/pfree. In frontend, use malloc, and
+ * return SASLPREP_OOM on out-of-memory.
+ */
+#ifndef FRONTEND
+#define STRDUP(s) pstrdup(s)
+#define ALLOC(size) palloc(size)
+#define FREE(size) pfree(size)
+#else
+#define STRDUP(s) strdup(s)
+#define ALLOC(size) malloc(size)
+#define FREE(size) free(size)
+#endif
+
+/* Prototypes for local functions */
+static int codepoint_range_cmp(const void *a, const void *b);
+static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize);
+static int pg_utf8_string_len(const char *source);
+static bool pg_is_ascii_string(const char *p);
+
+/*
+ * Stringprep Mapping Tables.
+ *
+ * The stringprep specification includes a number of tables of Unicode
+ * codepoints, used in different parts of the algorithm. They are below,
+ * as arrays of codepoint ranges. Each range is a pair of codepoints,
+ * for the first and last codepoint included the range (inclusive!).
+ */
+
+/*
+ * C.1.2 Non-ASCII space characters
+ *
+ * These are all mapped to the ASCII space character (U+00A0).
+ */
+static const pg_wchar non_ascii_space_ranges[] =
+{
+ 0x00A0, 0x00A0,
+ 0x1680, 0x1680,
+ 0x2000, 0x200B,
+ 0x202F, 0x202F,
+ 0x205F, 0x205F,
+ 0x3000, 0x3000
+};
+
+/*
+ * B.1 Commonly mapped to nothing
+ *
+ * If any of these appear in the input, they are removed.
+ */
+static const pg_wchar commonly_mapped_to_nothing_ranges[] =
+{
+ 0x00AD, 0x00AD,
+ 0x034F, 0x034F,
+ 0x1806, 0x1806,
+ 0x180B, 0x180D,
+ 0x200B, 0x200D,
+ 0x2060, 0x2060,
+ 0xFE00, 0xFE0F,
+ 0xFEFF, 0xFEFF
+};
+
+/*
+ * prohibited_output_ranges is a union of all the characters from
+ * the following tables:
+ *
+ * C.1.2 Non-ASCII space characters
+ * C.2.1 ASCII control characters
+ * C.2.2 Non-ASCII control characters
+ * C.3 Private Use characters
+ * C.4 Non-character code points
+ * C.5 Surrogate code points
+ * C.6 Inappropriate for plain text characters
+ * C.7 Inappropriate for canonical representation characters
+ * C.7 Change display properties or deprecated characters
+ * C.8 Tagging characters
+ *
+ * These are the tables that are listed as "prohibited output"
+ * characters in the SASLprep profile.
+ *
+ * The comment after each code range indicates which source table
+ * the code came from. Note that there is some overlap in the source
+ * tables, so one code might originate from multiple source tables.
+ * Adjacent ranges have also been merged together, to save space.
+ */
+static const pg_wchar prohibited_output_ranges[] =
+{
+ 0x0000, 0x001F, /* C.2.1 */
+ 0x007F, 0x00A0, /* C.1.2, C.2.1, C.2.2 */
+ 0x0340, 0x0341, /* C.8 */
+ 0x06DD, 0x06DD, /* C.2.2 */
+ 0x070F, 0x070F, /* C.2.2 */
+ 0x1680, 0x1680, /* C.1.2 */
+ 0x180E, 0x180E, /* C.2.2 */
+ 0x2000, 0x200F, /* C.1.2, C.2.2, C.8 */
+ 0x2028, 0x202F, /* C.1.2, C.2.2, C.8 */
+ 0x205F, 0x2063, /* C.1.2, C.2.2 */
+ 0x206A, 0x206F, /* C.2.2, C.8 */
+ 0x2FF0, 0x2FFB, /* C.7 */
+ 0x3000, 0x3000, /* C.1.2 */
+ 0xD800, 0xF8FF, /* C.3, C.5 */
+ 0xFDD0, 0xFDEF, /* C.4 */
+ 0xFEFF, 0xFEFF, /* C.2.2 */
+ 0xFFF9, 0xFFFF, /* C.2.2, C.4, C.6 */
+ 0x1D173, 0x1D17A, /* C.2.2 */
+ 0x1FFFE, 0x1FFFF, /* C.4 */
+ 0x2FFFE, 0x2FFFF, /* C.4 */
+ 0x3FFFE, 0x3FFFF, /* C.4 */
+ 0x4FFFE, 0x4FFFF, /* C.4 */
+ 0x5FFFE, 0x5FFFF, /* C.4 */
+ 0x6FFFE, 0x6FFFF, /* C.4 */
+ 0x7FFFE, 0x7FFFF, /* C.4 */
+ 0x8FFFE, 0x8FFFF, /* C.4 */
+ 0x9FFFE, 0x9FFFF, /* C.4 */
+ 0xAFFFE, 0xAFFFF, /* C.4 */
+ 0xBFFFE, 0xBFFFF, /* C.4 */
+ 0xCFFFE, 0xCFFFF, /* C.4 */
+ 0xDFFFE, 0xDFFFF, /* C.4 */
+ 0xE0001, 0xE0001, /* C.9 */
+ 0xE0020, 0xE007F, /* C.9 */
+ 0xEFFFE, 0xEFFFF, /* C.4 */
+ 0xF0000, 0xFFFFF, /* C.3, C.4 */
+ 0x100000, 0x10FFFF /* C.3, C.4 */
+};
+
+/* A.1 Unassigned code points in Unicode 3.2 */
+static const pg_wchar unassigned_codepoint_ranges[] =
+{
+ 0x0221, 0x0221,
+ 0x0234, 0x024F,
+ 0x02AE, 0x02AF,
+ 0x02EF, 0x02FF,
+ 0x0350, 0x035F,
+ 0x0370, 0x0373,
+ 0x0376, 0x0379,
+ 0x037B, 0x037D,
+ 0x037F, 0x0383,
+ 0x038B, 0x038B,
+ 0x038D, 0x038D,
+ 0x03A2, 0x03A2,
+ 0x03CF, 0x03CF,
+ 0x03F7, 0x03FF,
+ 0x0487, 0x0487,
+ 0x04CF, 0x04CF,
+ 0x04F6, 0x04F7,
+ 0x04FA, 0x04FF,
+ 0x0510, 0x0530,
+ 0x0557, 0x0558,
+ 0x0560, 0x0560,
+ 0x0588, 0x0588,
+ 0x058B, 0x0590,
+ 0x05A2, 0x05A2,
+ 0x05BA, 0x05BA,
+ 0x05C5, 0x05CF,
+ 0x05EB, 0x05EF,
+ 0x05F5, 0x060B,
+ 0x060D, 0x061A,
+ 0x061C, 0x061E,
+ 0x0620, 0x0620,
+ 0x063B, 0x063F,
+ 0x0656, 0x065F,
+ 0x06EE, 0x06EF,
+ 0x06FF, 0x06FF,
+ 0x070E, 0x070E,
+ 0x072D, 0x072F,
+ 0x074B, 0x077F,
+ 0x07B2, 0x0900,
+ 0x0904, 0x0904,
+ 0x093A, 0x093B,
+ 0x094E, 0x094F,
+ 0x0955, 0x0957,
+ 0x0971, 0x0980,
+ 0x0984, 0x0984,
+ 0x098D, 0x098E,
+ 0x0991, 0x0992,
+ 0x09A9, 0x09A9,
+ 0x09B1, 0x09B1,
+ 0x09B3, 0x09B5,
+ 0x09BA, 0x09BB,
+ 0x09BD, 0x09BD,
+ 0x09C5, 0x09C6,
+ 0x09C9, 0x09CA,
+ 0x09CE, 0x09D6,
+ 0x09D8, 0x09DB,
+ 0x09DE, 0x09DE,
+ 0x09E4, 0x09E5,
+ 0x09FB, 0x0A01,
+ 0x0A03, 0x0A04,
+ 0x0A0B, 0x0A0E,
+ 0x0A11, 0x0A12,
+ 0x0A29, 0x0A29,
+ 0x0A31, 0x0A31,
+ 0x0A34, 0x0A34,
+ 0x0A37, 0x0A37,
+ 0x0A3A, 0x0A3B,
+ 0x0A3D, 0x0A3D,
+ 0x0A43, 0x0A46,
+ 0x0A49, 0x0A4A,
+ 0x0A4E, 0x0A58,
+ 0x0A5D, 0x0A5D,
+ 0x0A5F, 0x0A65,
+ 0x0A75, 0x0A80,
+ 0x0A84, 0x0A84,
+ 0x0A8C, 0x0A8C,
+ 0x0A8E, 0x0A8E,
+ 0x0A92, 0x0A92,
+ 0x0AA9, 0x0AA9,
+ 0x0AB1, 0x0AB1,
+ 0x0AB4, 0x0AB4,
+ 0x0ABA, 0x0ABB,
+ 0x0AC6, 0x0AC6,
+ 0x0ACA, 0x0ACA,
+ 0x0ACE, 0x0ACF,
+ 0x0AD1, 0x0ADF,
+ 0x0AE1, 0x0AE5,
+ 0x0AF0, 0x0B00,
+ 0x0B04, 0x0B04,
+ 0x0B0D, 0x0B0E,
+ 0x0B11, 0x0B12,
+ 0x0B29, 0x0B29,
+ 0x0B31, 0x0B31,
+ 0x0B34, 0x0B35,
+ 0x0B3A, 0x0B3B,
+ 0x0B44, 0x0B46,
+ 0x0B49, 0x0B4A,
+ 0x0B4E, 0x0B55,
+ 0x0B58, 0x0B5B,
+ 0x0B5E, 0x0B5E,
+ 0x0B62, 0x0B65,
+ 0x0B71, 0x0B81,
+ 0x0B84, 0x0B84,
+ 0x0B8B, 0x0B8D,
+ 0x0B91, 0x0B91,
+ 0x0B96, 0x0B98,
+ 0x0B9B, 0x0B9B,
+ 0x0B9D, 0x0B9D,
+ 0x0BA0, 0x0BA2,
+ 0x0BA5, 0x0BA7,
+ 0x0BAB, 0x0BAD,
+ 0x0BB6, 0x0BB6,
+ 0x0BBA, 0x0BBD,
+ 0x0BC3, 0x0BC5,
+ 0x0BC9, 0x0BC9,
+ 0x0BCE, 0x0BD6,
+ 0x0BD8, 0x0BE6,
+ 0x0BF3, 0x0C00,
+ 0x0C04, 0x0C04,
+ 0x0C0D, 0x0C0D,
+ 0x0C11, 0x0C11,
+ 0x0C29, 0x0C29,
+ 0x0C34, 0x0C34,
+ 0x0C3A, 0x0C3D,
+ 0x0C45, 0x0C45,
+ 0x0C49, 0x0C49,
+ 0x0C4E, 0x0C54,
+ 0x0C57, 0x0C5F,
+ 0x0C62, 0x0C65,
+ 0x0C70, 0x0C81,
+ 0x0C84, 0x0C84,
+ 0x0C8D, 0x0C8D,
+ 0x0C91, 0x0C91,
+ 0x0CA9, 0x0CA9,
+ 0x0CB4, 0x0CB4,
+ 0x0CBA, 0x0CBD,
+ 0x0CC5, 0x0CC5,
+ 0x0CC9, 0x0CC9,
+ 0x0CCE, 0x0CD4,
+ 0x0CD7, 0x0CDD,
+ 0x0CDF, 0x0CDF,
+ 0x0CE2, 0x0CE5,
+ 0x0CF0, 0x0D01,
+ 0x0D04, 0x0D04,
+ 0x0D0D, 0x0D0D,
+ 0x0D11, 0x0D11,
+ 0x0D29, 0x0D29,
+ 0x0D3A, 0x0D3D,
+ 0x0D44, 0x0D45,
+ 0x0D49, 0x0D49,
+ 0x0D4E, 0x0D56,
+ 0x0D58, 0x0D5F,
+ 0x0D62, 0x0D65,
+ 0x0D70, 0x0D81,
+ 0x0D84, 0x0D84,
+ 0x0D97, 0x0D99,
+ 0x0DB2, 0x0DB2,
+ 0x0DBC, 0x0DBC,
+ 0x0DBE, 0x0DBF,
+ 0x0DC7, 0x0DC9,
+ 0x0DCB, 0x0DCE,
+ 0x0DD5, 0x0DD5,
+ 0x0DD7, 0x0DD7,
+ 0x0DE0, 0x0DF1,
+ 0x0DF5, 0x0E00,
+ 0x0E3B, 0x0E3E,
+ 0x0E5C, 0x0E80,
+ 0x0E83, 0x0E83,
+ 0x0E85, 0x0E86,
+ 0x0E89, 0x0E89,
+ 0x0E8B, 0x0E8C,
+ 0x0E8E, 0x0E93,
+ 0x0E98, 0x0E98,
+ 0x0EA0, 0x0EA0,
+ 0x0EA4, 0x0EA4,
+ 0x0EA6, 0x0EA6,
+ 0x0EA8, 0x0EA9,
+ 0x0EAC, 0x0EAC,
+ 0x0EBA, 0x0EBA,
+ 0x0EBE, 0x0EBF,
+ 0x0EC5, 0x0EC5,
+ 0x0EC7, 0x0EC7,
+ 0x0ECE, 0x0ECF,
+ 0x0EDA, 0x0EDB,
+ 0x0EDE, 0x0EFF,
+ 0x0F48, 0x0F48,
+ 0x0F6B, 0x0F70,
+ 0x0F8C, 0x0F8F,
+ 0x0F98, 0x0F98,
+ 0x0FBD, 0x0FBD,
+ 0x0FCD, 0x0FCE,
+ 0x0FD0, 0x0FFF,
+ 0x1022, 0x1022,
+ 0x1028, 0x1028,
+ 0x102B, 0x102B,
+ 0x1033, 0x1035,
+ 0x103A, 0x103F,
+ 0x105A, 0x109F,
+ 0x10C6, 0x10CF,
+ 0x10F9, 0x10FA,
+ 0x10FC, 0x10FF,
+ 0x115A, 0x115E,
+ 0x11A3, 0x11A7,
+ 0x11FA, 0x11FF,
+ 0x1207, 0x1207,
+ 0x1247, 0x1247,
+ 0x1249, 0x1249,
+ 0x124E, 0x124F,
+ 0x1257, 0x1257,
+ 0x1259, 0x1259,
+ 0x125E, 0x125F,
+ 0x1287, 0x1287,
+ 0x1289, 0x1289,
+ 0x128E, 0x128F,
+ 0x12AF, 0x12AF,
+ 0x12B1, 0x12B1,
+ 0x12B6, 0x12B7,
+ 0x12BF, 0x12BF,
+ 0x12C1, 0x12C1,
+ 0x12C6, 0x12C7,
+ 0x12CF, 0x12CF,
+ 0x12D7, 0x12D7,
+ 0x12EF, 0x12EF,
+ 0x130F, 0x130F,
+ 0x1311, 0x1311,
+ 0x1316, 0x1317,
+ 0x131F, 0x131F,
+ 0x1347, 0x1347,
+ 0x135B, 0x1360,
+ 0x137D, 0x139F,
+ 0x13F5, 0x1400,
+ 0x1677, 0x167F,
+ 0x169D, 0x169F,
+ 0x16F1, 0x16FF,
+ 0x170D, 0x170D,
+ 0x1715, 0x171F,
+ 0x1737, 0x173F,
+ 0x1754, 0x175F,
+ 0x176D, 0x176D,
+ 0x1771, 0x1771,
+ 0x1774, 0x177F,
+ 0x17DD, 0x17DF,
+ 0x17EA, 0x17FF,
+ 0x180F, 0x180F,
+ 0x181A, 0x181F,
+ 0x1878, 0x187F,
+ 0x18AA, 0x1DFF,
+ 0x1E9C, 0x1E9F,
+ 0x1EFA, 0x1EFF,
+ 0x1F16, 0x1F17,
+ 0x1F1E, 0x1F1F,
+ 0x1F46, 0x1F47,
+ 0x1F4E, 0x1F4F,
+ 0x1F58, 0x1F58,
+ 0x1F5A, 0x1F5A,
+ 0x1F5C, 0x1F5C,
+ 0x1F5E, 0x1F5E,
+ 0x1F7E, 0x1F7F,
+ 0x1FB5, 0x1FB5,
+ 0x1FC5, 0x1FC5,
+ 0x1FD4, 0x1FD5,
+ 0x1FDC, 0x1FDC,
+ 0x1FF0, 0x1FF1,
+ 0x1FF5, 0x1FF5,
+ 0x1FFF, 0x1FFF,
+ 0x2053, 0x2056,
+ 0x2058, 0x205E,
+ 0x2064, 0x2069,
+ 0x2072, 0x2073,
+ 0x208F, 0x209F,
+ 0x20B2, 0x20CF,
+ 0x20EB, 0x20FF,
+ 0x213B, 0x213C,
+ 0x214C, 0x2152,
+ 0x2184, 0x218F,
+ 0x23CF, 0x23FF,
+ 0x2427, 0x243F,
+ 0x244B, 0x245F,
+ 0x24FF, 0x24FF,
+ 0x2614, 0x2615,
+ 0x2618, 0x2618,
+ 0x267E, 0x267F,
+ 0x268A, 0x2700,
+ 0x2705, 0x2705,
+ 0x270A, 0x270B,
+ 0x2728, 0x2728,
+ 0x274C, 0x274C,
+ 0x274E, 0x274E,
+ 0x2753, 0x2755,
+ 0x2757, 0x2757,
+ 0x275F, 0x2760,
+ 0x2795, 0x2797,
+ 0x27B0, 0x27B0,
+ 0x27BF, 0x27CF,
+ 0x27EC, 0x27EF,
+ 0x2B00, 0x2E7F,
+ 0x2E9A, 0x2E9A,
+ 0x2EF4, 0x2EFF,
+ 0x2FD6, 0x2FEF,
+ 0x2FFC, 0x2FFF,
+ 0x3040, 0x3040,
+ 0x3097, 0x3098,
+ 0x3100, 0x3104,
+ 0x312D, 0x3130,
+ 0x318F, 0x318F,
+ 0x31B8, 0x31EF,
+ 0x321D, 0x321F,
+ 0x3244, 0x3250,
+ 0x327C, 0x327E,
+ 0x32CC, 0x32CF,
+ 0x32FF, 0x32FF,
+ 0x3377, 0x337A,
+ 0x33DE, 0x33DF,
+ 0x33FF, 0x33FF,
+ 0x4DB6, 0x4DFF,
+ 0x9FA6, 0x9FFF,
+ 0xA48D, 0xA48F,
+ 0xA4C7, 0xABFF,
+ 0xD7A4, 0xD7FF,
+ 0xFA2E, 0xFA2F,
+ 0xFA6B, 0xFAFF,
+ 0xFB07, 0xFB12,
+ 0xFB18, 0xFB1C,
+ 0xFB37, 0xFB37,
+ 0xFB3D, 0xFB3D,
+ 0xFB3F, 0xFB3F,
+ 0xFB42, 0xFB42,
+ 0xFB45, 0xFB45,
+ 0xFBB2, 0xFBD2,
+ 0xFD40, 0xFD4F,
+ 0xFD90, 0xFD91,
+ 0xFDC8, 0xFDCF,
+ 0xFDFD, 0xFDFF,
+ 0xFE10, 0xFE1F,
+ 0xFE24, 0xFE2F,
+ 0xFE47, 0xFE48,
+ 0xFE53, 0xFE53,
+ 0xFE67, 0xFE67,
+ 0xFE6C, 0xFE6F,
+ 0xFE75, 0xFE75,
+ 0xFEFD, 0xFEFE,
+ 0xFF00, 0xFF00,
+ 0xFFBF, 0xFFC1,
+ 0xFFC8, 0xFFC9,
+ 0xFFD0, 0xFFD1,
+ 0xFFD8, 0xFFD9,
+ 0xFFDD, 0xFFDF,
+ 0xFFE7, 0xFFE7,
+ 0xFFEF, 0xFFF8,
+ 0x10000, 0x102FF,
+ 0x1031F, 0x1031F,
+ 0x10324, 0x1032F,
+ 0x1034B, 0x103FF,
+ 0x10426, 0x10427,
+ 0x1044E, 0x1CFFF,
+ 0x1D0F6, 0x1D0FF,
+ 0x1D127, 0x1D129,
+ 0x1D1DE, 0x1D3FF,
+ 0x1D455, 0x1D455,
+ 0x1D49D, 0x1D49D,
+ 0x1D4A0, 0x1D4A1,
+ 0x1D4A3, 0x1D4A4,
+ 0x1D4A7, 0x1D4A8,
+ 0x1D4AD, 0x1D4AD,
+ 0x1D4BA, 0x1D4BA,
+ 0x1D4BC, 0x1D4BC,
+ 0x1D4C1, 0x1D4C1,
+ 0x1D4C4, 0x1D4C4,
+ 0x1D506, 0x1D506,
+ 0x1D50B, 0x1D50C,
+ 0x1D515, 0x1D515,
+ 0x1D51D, 0x1D51D,
+ 0x1D53A, 0x1D53A,
+ 0x1D53F, 0x1D53F,
+ 0x1D545, 0x1D545,
+ 0x1D547, 0x1D549,
+ 0x1D551, 0x1D551,
+ 0x1D6A4, 0x1D6A7,
+ 0x1D7CA, 0x1D7CD,
+ 0x1D800, 0x1FFFD,
+ 0x2A6D7, 0x2F7FF,
+ 0x2FA1E, 0x2FFFD,
+ 0x30000, 0x3FFFD,
+ 0x40000, 0x4FFFD,
+ 0x50000, 0x5FFFD,
+ 0x60000, 0x6FFFD,
+ 0x70000, 0x7FFFD,
+ 0x80000, 0x8FFFD,
+ 0x90000, 0x9FFFD,
+ 0xA0000, 0xAFFFD,
+ 0xB0000, 0xBFFFD,
+ 0xC0000, 0xCFFFD,
+ 0xD0000, 0xDFFFD,
+ 0xE0000, 0xE0000,
+ 0xE0002, 0xE001F,
+ 0xE0080, 0xEFFFD
+};
+
+/* D.1 Characters with bidirectional property "R" or "AL" */
+static const pg_wchar RandALCat_codepoint_ranges[] =
+{
+ 0x05BE, 0x05BE,
+ 0x05C0, 0x05C0,
+ 0x05C3, 0x05C3,
+ 0x05D0, 0x05EA,
+ 0x05F0, 0x05F4,
+ 0x061B, 0x061B,
+ 0x061F, 0x061F,
+ 0x0621, 0x063A,
+ 0x0640, 0x064A,
+ 0x066D, 0x066F,
+ 0x0671, 0x06D5,
+ 0x06DD, 0x06DD,
+ 0x06E5, 0x06E6,
+ 0x06FA, 0x06FE,
+ 0x0700, 0x070D,
+ 0x0710, 0x0710,
+ 0x0712, 0x072C,
+ 0x0780, 0x07A5,
+ 0x07B1, 0x07B1,
+ 0x200F, 0x200F,
+ 0xFB1D, 0xFB1D,
+ 0xFB1F, 0xFB28,
+ 0xFB2A, 0xFB36,
+ 0xFB38, 0xFB3C,
+ 0xFB3E, 0xFB3E,
+ 0xFB40, 0xFB41,
+ 0xFB43, 0xFB44,
+ 0xFB46, 0xFBB1,
+ 0xFBD3, 0xFD3D,
+ 0xFD50, 0xFD8F,
+ 0xFD92, 0xFDC7,
+ 0xFDF0, 0xFDFC,
+ 0xFE70, 0xFE74,
+ 0xFE76, 0xFEFC
+};
+
+/* D.2 Characters with bidirectional property "L" */
+static const pg_wchar LCat_codepoint_ranges[] =
+{
+ 0x0041, 0x005A,
+ 0x0061, 0x007A,
+ 0x00AA, 0x00AA,
+ 0x00B5, 0x00B5,
+ 0x00BA, 0x00BA,
+ 0x00C0, 0x00D6,
+ 0x00D8, 0x00F6,
+ 0x00F8, 0x0220,
+ 0x0222, 0x0233,
+ 0x0250, 0x02AD,
+ 0x02B0, 0x02B8,
+ 0x02BB, 0x02C1,
+ 0x02D0, 0x02D1,
+ 0x02E0, 0x02E4,
+ 0x02EE, 0x02EE,
+ 0x037A, 0x037A,
+ 0x0386, 0x0386,
+ 0x0388, 0x038A,
+ 0x038C, 0x038C,
+ 0x038E, 0x03A1,
+ 0x03A3, 0x03CE,
+ 0x03D0, 0x03F5,
+ 0x0400, 0x0482,
+ 0x048A, 0x04CE,
+ 0x04D0, 0x04F5,
+ 0x04F8, 0x04F9,
+ 0x0500, 0x050F,
+ 0x0531, 0x0556,
+ 0x0559, 0x055F,
+ 0x0561, 0x0587,
+ 0x0589, 0x0589,
+ 0x0903, 0x0903,
+ 0x0905, 0x0939,
+ 0x093D, 0x0940,
+ 0x0949, 0x094C,
+ 0x0950, 0x0950,
+ 0x0958, 0x0961,
+ 0x0964, 0x0970,
+ 0x0982, 0x0983,
+ 0x0985, 0x098C,
+ 0x098F, 0x0990,
+ 0x0993, 0x09A8,
+ 0x09AA, 0x09B0,
+ 0x09B2, 0x09B2,
+ 0x09B6, 0x09B9,
+ 0x09BE, 0x09C0,
+ 0x09C7, 0x09C8,
+ 0x09CB, 0x09CC,
+ 0x09D7, 0x09D7,
+ 0x09DC, 0x09DD,
+ 0x09DF, 0x09E1,
+ 0x09E6, 0x09F1,
+ 0x09F4, 0x09FA,
+ 0x0A05, 0x0A0A,
+ 0x0A0F, 0x0A10,
+ 0x0A13, 0x0A28,
+ 0x0A2A, 0x0A30,
+ 0x0A32, 0x0A33,
+ 0x0A35, 0x0A36,
+ 0x0A38, 0x0A39,
+ 0x0A3E, 0x0A40,
+ 0x0A59, 0x0A5C,
+ 0x0A5E, 0x0A5E,
+ 0x0A66, 0x0A6F,
+ 0x0A72, 0x0A74,
+ 0x0A83, 0x0A83,
+ 0x0A85, 0x0A8B,
+ 0x0A8D, 0x0A8D,
+ 0x0A8F, 0x0A91,
+ 0x0A93, 0x0AA8,
+ 0x0AAA, 0x0AB0,
+ 0x0AB2, 0x0AB3,
+ 0x0AB5, 0x0AB9,
+ 0x0ABD, 0x0AC0,
+ 0x0AC9, 0x0AC9,
+ 0x0ACB, 0x0ACC,
+ 0x0AD0, 0x0AD0,
+ 0x0AE0, 0x0AE0,
+ 0x0AE6, 0x0AEF,
+ 0x0B02, 0x0B03,
+ 0x0B05, 0x0B0C,
+ 0x0B0F, 0x0B10,
+ 0x0B13, 0x0B28,
+ 0x0B2A, 0x0B30,
+ 0x0B32, 0x0B33,
+ 0x0B36, 0x0B39,
+ 0x0B3D, 0x0B3E,
+ 0x0B40, 0x0B40,
+ 0x0B47, 0x0B48,
+ 0x0B4B, 0x0B4C,
+ 0x0B57, 0x0B57,
+ 0x0B5C, 0x0B5D,
+ 0x0B5F, 0x0B61,
+ 0x0B66, 0x0B70,
+ 0x0B83, 0x0B83,
+ 0x0B85, 0x0B8A,
+ 0x0B8E, 0x0B90,
+ 0x0B92, 0x0B95,
+ 0x0B99, 0x0B9A,
+ 0x0B9C, 0x0B9C,
+ 0x0B9E, 0x0B9F,
+ 0x0BA3, 0x0BA4,
+ 0x0BA8, 0x0BAA,
+ 0x0BAE, 0x0BB5,
+ 0x0BB7, 0x0BB9,
+ 0x0BBE, 0x0BBF,
+ 0x0BC1, 0x0BC2,
+ 0x0BC6, 0x0BC8,
+ 0x0BCA, 0x0BCC,
+ 0x0BD7, 0x0BD7,
+ 0x0BE7, 0x0BF2,
+ 0x0C01, 0x0C03,
+ 0x0C05, 0x0C0C,
+ 0x0C0E, 0x0C10,
+ 0x0C12, 0x0C28,
+ 0x0C2A, 0x0C33,
+ 0x0C35, 0x0C39,
+ 0x0C41, 0x0C44,
+ 0x0C60, 0x0C61,
+ 0x0C66, 0x0C6F,
+ 0x0C82, 0x0C83,
+ 0x0C85, 0x0C8C,
+ 0x0C8E, 0x0C90,
+ 0x0C92, 0x0CA8,
+ 0x0CAA, 0x0CB3,
+ 0x0CB5, 0x0CB9,
+ 0x0CBE, 0x0CBE,
+ 0x0CC0, 0x0CC4,
+ 0x0CC7, 0x0CC8,
+ 0x0CCA, 0x0CCB,
+ 0x0CD5, 0x0CD6,
+ 0x0CDE, 0x0CDE,
+ 0x0CE0, 0x0CE1,
+ 0x0CE6, 0x0CEF,
+ 0x0D02, 0x0D03,
+ 0x0D05, 0x0D0C,
+ 0x0D0E, 0x0D10,
+ 0x0D12, 0x0D28,
+ 0x0D2A, 0x0D39,
+ 0x0D3E, 0x0D40,
+ 0x0D46, 0x0D48,
+ 0x0D4A, 0x0D4C,
+ 0x0D57, 0x0D57,
+ 0x0D60, 0x0D61,
+ 0x0D66, 0x0D6F,
+ 0x0D82, 0x0D83,
+ 0x0D85, 0x0D96,
+ 0x0D9A, 0x0DB1,
+ 0x0DB3, 0x0DBB,
+ 0x0DBD, 0x0DBD,
+ 0x0DC0, 0x0DC6,
+ 0x0DCF, 0x0DD1,
+ 0x0DD8, 0x0DDF,
+ 0x0DF2, 0x0DF4,
+ 0x0E01, 0x0E30,
+ 0x0E32, 0x0E33,
+ 0x0E40, 0x0E46,
+ 0x0E4F, 0x0E5B,
+ 0x0E81, 0x0E82,
+ 0x0E84, 0x0E84,
+ 0x0E87, 0x0E88,
+ 0x0E8A, 0x0E8A,
+ 0x0E8D, 0x0E8D,
+ 0x0E94, 0x0E97,
+ 0x0E99, 0x0E9F,
+ 0x0EA1, 0x0EA3,
+ 0x0EA5, 0x0EA5,
+ 0x0EA7, 0x0EA7,
+ 0x0EAA, 0x0EAB,
+ 0x0EAD, 0x0EB0,
+ 0x0EB2, 0x0EB3,
+ 0x0EBD, 0x0EBD,
+ 0x0EC0, 0x0EC4,
+ 0x0EC6, 0x0EC6,
+ 0x0ED0, 0x0ED9,
+ 0x0EDC, 0x0EDD,
+ 0x0F00, 0x0F17,
+ 0x0F1A, 0x0F34,
+ 0x0F36, 0x0F36,
+ 0x0F38, 0x0F38,
+ 0x0F3E, 0x0F47,
+ 0x0F49, 0x0F6A,
+ 0x0F7F, 0x0F7F,
+ 0x0F85, 0x0F85,
+ 0x0F88, 0x0F8B,
+ 0x0FBE, 0x0FC5,
+ 0x0FC7, 0x0FCC,
+ 0x0FCF, 0x0FCF,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102A,
+ 0x102C, 0x102C,
+ 0x1031, 0x1031,
+ 0x1038, 0x1038,
+ 0x1040, 0x1057,
+ 0x10A0, 0x10C5,
+ 0x10D0, 0x10F8,
+ 0x10FB, 0x10FB,
+ 0x1100, 0x1159,
+ 0x115F, 0x11A2,
+ 0x11A8, 0x11F9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124A, 0x124D,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125A, 0x125D,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128A, 0x128D,
+ 0x1290, 0x12AE,
+ 0x12B0, 0x12B0,
+ 0x12B2, 0x12B5,
+ 0x12B8, 0x12BE,
+ 0x12C0, 0x12C0,
+ 0x12C2, 0x12C5,
+ 0x12C8, 0x12CE,
+ 0x12D0, 0x12D6,
+ 0x12D8, 0x12EE,
+ 0x12F0, 0x130E,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131E,
+ 0x1320, 0x1346,
+ 0x1348, 0x135A,
+ 0x1361, 0x137C,
+ 0x13A0, 0x13F4,
+ 0x1401, 0x1676,
+ 0x1681, 0x169A,
+ 0x16A0, 0x16F0,
+ 0x1700, 0x170C,
+ 0x170E, 0x1711,
+ 0x1720, 0x1731,
+ 0x1735, 0x1736,
+ 0x1740, 0x1751,
+ 0x1760, 0x176C,
+ 0x176E, 0x1770,
+ 0x1780, 0x17B6,
+ 0x17BE, 0x17C5,
+ 0x17C7, 0x17C8,
+ 0x17D4, 0x17DA,
+ 0x17DC, 0x17DC,
+ 0x17E0, 0x17E9,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18A8,
+ 0x1E00, 0x1E9B,
+ 0x1EA0, 0x1EF9,
+ 0x1F00, 0x1F15,
+ 0x1F18, 0x1F1D,
+ 0x1F20, 0x1F45,
+ 0x1F48, 0x1F4D,
+ 0x1F50, 0x1F57,
+ 0x1F59, 0x1F59,
+ 0x1F5B, 0x1F5B,
+ 0x1F5D, 0x1F5D,
+ 0x1F5F, 0x1F7D,
+ 0x1F80, 0x1FB4,
+ 0x1FB6, 0x1FBC,
+ 0x1FBE, 0x1FBE,
+ 0x1FC2, 0x1FC4,
+ 0x1FC6, 0x1FCC,
+ 0x1FD0, 0x1FD3,
+ 0x1FD6, 0x1FDB,
+ 0x1FE0, 0x1FEC,
+ 0x1FF2, 0x1FF4,
+ 0x1FF6, 0x1FFC,
+ 0x200E, 0x200E,
+ 0x2071, 0x2071,
+ 0x207F, 0x207F,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210A, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211D,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212A, 0x212D,
+ 0x212F, 0x2131,
+ 0x2133, 0x2139,
+ 0x213D, 0x213F,
+ 0x2145, 0x2149,
+ 0x2160, 0x2183,
+ 0x2336, 0x237A,
+ 0x2395, 0x2395,
+ 0x249C, 0x24E9,
+ 0x3005, 0x3007,
+ 0x3021, 0x3029,
+ 0x3031, 0x3035,
+ 0x3038, 0x303C,
+ 0x3041, 0x3096,
+ 0x309D, 0x309F,
+ 0x30A1, 0x30FA,
+ 0x30FC, 0x30FF,
+ 0x3105, 0x312C,
+ 0x3131, 0x318E,
+ 0x3190, 0x31B7,
+ 0x31F0, 0x321C,
+ 0x3220, 0x3243,
+ 0x3260, 0x327B,
+ 0x327F, 0x32B0,
+ 0x32C0, 0x32CB,
+ 0x32D0, 0x32FE,
+ 0x3300, 0x3376,
+ 0x337B, 0x33DD,
+ 0x33E0, 0x33FE,
+ 0x3400, 0x4DB5,
+ 0x4E00, 0x9FA5,
+ 0xA000, 0xA48C,
+ 0xAC00, 0xD7A3,
+ 0xD800, 0xFA2D,
+ 0xFA30, 0xFA6A,
+ 0xFB00, 0xFB06,
+ 0xFB13, 0xFB17,
+ 0xFF21, 0xFF3A,
+ 0xFF41, 0xFF5A,
+ 0xFF66, 0xFFBE,
+ 0xFFC2, 0xFFC7,
+ 0xFFCA, 0xFFCF,
+ 0xFFD2, 0xFFD7,
+ 0xFFDA, 0xFFDC,
+ 0x10300, 0x1031E,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034A,
+ 0x10400, 0x10425,
+ 0x10428, 0x1044D,
+ 0x1D000, 0x1D0F5,
+ 0x1D100, 0x1D126,
+ 0x1D12A, 0x1D166,
+ 0x1D16A, 0x1D172,
+ 0x1D183, 0x1D184,
+ 0x1D18C, 0x1D1A9,
+ 0x1D1AE, 0x1D1DD,
+ 0x1D400, 0x1D454,
+ 0x1D456, 0x1D49C,
+ 0x1D49E, 0x1D49F,
+ 0x1D4A2, 0x1D4A2,
+ 0x1D4A5, 0x1D4A6,
+ 0x1D4A9, 0x1D4AC,
+ 0x1D4AE, 0x1D4B9,
+ 0x1D4BB, 0x1D4BB,
+ 0x1D4BD, 0x1D4C0,
+ 0x1D4C2, 0x1D4C3,
+ 0x1D4C5, 0x1D505,
+ 0x1D507, 0x1D50A,
+ 0x1D50D, 0x1D514,
+ 0x1D516, 0x1D51C,
+ 0x1D51E, 0x1D539,
+ 0x1D53B, 0x1D53E,
+ 0x1D540, 0x1D544,
+ 0x1D546, 0x1D546,
+ 0x1D54A, 0x1D550,
+ 0x1D552, 0x1D6A3,
+ 0x1D6A8, 0x1D7C9,
+ 0x20000, 0x2A6D6,
+ 0x2F800, 0x2FA1D,
+ 0xF0000, 0xFFFFD,
+ 0x100000, 0x10FFFD
+};
+
+/* End of stringprep tables */
+
+
+/* Is the given Unicode codepoint in the given table of ranges? */
+#define IS_CODE_IN_TABLE(code, map) is_code_in_table(code, map, lengthof(map))
+
+static int
+codepoint_range_cmp(const void *a, const void *b)
+{
+ const pg_wchar *key = (const pg_wchar *) a;
+ const pg_wchar *range = (const pg_wchar *) b;
+
+ if (*key < range[0])
+ return -1; /* less than lower bound */
+ if (*key > range[1])
+ return 1; /* greater than upper bound */
+
+ return 0; /* within range */
+}
+
+static bool
+is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize)
+{
+ Assert(mapsize % 2 == 0);
+
+ if (code < map[0] || code > map[mapsize - 1])
+ return false;
+
+ if (bsearch(&code, map, mapsize / 2, sizeof(pg_wchar) * 2,
+ codepoint_range_cmp))
+ return true;
+ else
+ return false;
+}
+
+/*
+ * Calculate the length in characters of a null-terminated UTF-8 string.
+ *
+ * Returns -1 if the input is not valid UTF-8.
+ */
+static int
+pg_utf8_string_len(const char *source)
+{
+ const unsigned char *p = (const unsigned char *) source;
+ int l;
+ int num_chars = 0;
+
+ while (*p)
+ {
+ l = pg_utf_mblen(p);
+
+ if (!pg_utf8_islegal(p, l))
+ return -1;
+
+ p += l;
+ num_chars++;
+ }
+
+ return num_chars;
+}
+
+/*
+ * Returns true if the input string is pure ASCII.
+ */
+static bool
+pg_is_ascii_string(const char *p)
+{
+ while (*p)
+ {
+ if (IS_HIGHBIT_SET(*p))
+ return false;
+ p++;
+ }
+ return true;
+}
+
+
+/*
+ * pg_saslprep - Normalize a password with SASLprep.
+ *
+ * SASLprep requires the input to be in UTF-8 encoding, but PostgreSQL
+ * supports many encodings, so we don't blindly assume that. pg_saslprep
+ * will check if the input looks like valid UTF-8, and returns
+ * SASLPREP_INVALID_UTF8 if not.
+ *
+ * If the string contains prohibited characters (or more precisely, if the
+ * output string would contain prohibited characters after normalization),
+ * returns SASLPREP_PROHIBITED.
+ *
+ * On success, returns SASLPREP_SUCCESS, and the normalized string in
+ * *output.
+ *
+ * In frontend, the normalized string is malloc'd, and the caller is
+ * responsible for freeing it. If an allocation fails, returns
+ * SASLPREP_OOM. In backend, the normalized string is palloc'd instead,
+ * and a failed allocation leads to ereport(ERROR).
+ */
+pg_saslprep_rc
+pg_saslprep(const char *input, char **output)
+{
+ pg_wchar *input_chars = NULL;
+ pg_wchar *output_chars = NULL;
+ int input_size;
+ char *result;
+ int result_size;
+ int count;
+ int i;
+ bool contains_RandALCat;
+ unsigned char *p;
+ pg_wchar *wp;
+
+ /* Ensure we return *output as NULL on failure */
+ *output = NULL;
+
+ /* Check that the password isn't stupendously long */
+ if (strlen(input) > MAX_PASSWORD_LENGTH)
+ {
+#ifndef FRONTEND
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("password too long")));
+#else
+ return SASLPREP_OOM;
+#endif
+ }
+
+ /*
+ * Quick check if the input is pure ASCII. An ASCII string requires no
+ * further processing.
+ */
+ if (pg_is_ascii_string(input))
+ {
+ *output = STRDUP(input);
+ if (!(*output))
+ goto oom;
+ return SASLPREP_SUCCESS;
+ }
+
+ /*
+ * Convert the input from UTF-8 to an array of Unicode codepoints.
+ *
+ * This also checks that the input is a legal UTF-8 string.
+ */
+ input_size = pg_utf8_string_len(input);
+ if (input_size < 0)
+ return SASLPREP_INVALID_UTF8;
+
+ input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar));
+ if (!input_chars)
+ goto oom;
+
+ p = (unsigned char *) input;
+ for (i = 0; i < input_size; i++)
+ {
+ input_chars[i] = utf8_to_unicode(p);
+ p += pg_utf_mblen(p);
+ }
+ input_chars[i] = (pg_wchar) '\0';
+
+ /*
+ * The steps below correspond to the steps listed in [RFC3454], Section
+ * "2. Preparation Overview"
+ */
+
+ /*
+ * 1) Map -- For each character in the input, check if it has a mapping
+ * and, if so, replace it with its mapping.
+ */
+ count = 0;
+ for (i = 0; i < input_size; i++)
+ {
+ pg_wchar code = input_chars[i];
+
+ if (IS_CODE_IN_TABLE(code, non_ascii_space_ranges))
+ input_chars[count++] = 0x0020;
+ else if (IS_CODE_IN_TABLE(code, commonly_mapped_to_nothing_ranges))
+ {
+ /* map to nothing */
+ }
+ else
+ input_chars[count++] = code;
+ }
+ input_chars[count] = (pg_wchar) '\0';
+ input_size = count;
+
+ if (input_size == 0)
+ goto prohibited; /* don't allow empty password */
+
+ /*
+ * 2) Normalize -- Normalize the result of step 1 using Unicode
+ * normalization.
+ */
+ output_chars = unicode_normalize(UNICODE_NFKC, input_chars);
+ if (!output_chars)
+ goto oom;
+
+ /*
+ * 3) Prohibit -- Check for any characters that are not allowed in the
+ * output. If any are found, return an error.
+ */
+ for (i = 0; i < input_size; i++)
+ {
+ pg_wchar code = input_chars[i];
+
+ if (IS_CODE_IN_TABLE(code, prohibited_output_ranges))
+ goto prohibited;
+ if (IS_CODE_IN_TABLE(code, unassigned_codepoint_ranges))
+ goto prohibited;
+ }
+
+ /*
+ * 4) Check bidi -- Possibly check for right-to-left characters, and if
+ * any are found, make sure that the whole string satisfies the
+ * requirements for bidirectional strings. If the string does not satisfy
+ * the requirements for bidirectional strings, return an error.
+ *
+ * [RFC3454], Section "6. Bidirectional Characters" explains in more
+ * detail what that means:
+ *
+ * "In any profile that specifies bidirectional character handling, all
+ * three of the following requirements MUST be met:
+ *
+ * 1) The characters in section 5.8 MUST be prohibited.
+ *
+ * 2) If a string contains any RandALCat character, the string MUST NOT
+ * contain any LCat character.
+ *
+ * 3) If a string contains any RandALCat character, a RandALCat character
+ * MUST be the first character of the string, and a RandALCat character
+ * MUST be the last character of the string."
+ */
+ contains_RandALCat = false;
+ for (i = 0; i < input_size; i++)
+ {
+ pg_wchar code = input_chars[i];
+
+ if (IS_CODE_IN_TABLE(code, RandALCat_codepoint_ranges))
+ {
+ contains_RandALCat = true;
+ break;
+ }
+ }
+
+ if (contains_RandALCat)
+ {
+ pg_wchar first = input_chars[0];
+ pg_wchar last = input_chars[input_size - 1];
+
+ for (i = 0; i < input_size; i++)
+ {
+ pg_wchar code = input_chars[i];
+
+ if (IS_CODE_IN_TABLE(code, LCat_codepoint_ranges))
+ goto prohibited;
+ }
+
+ if (!IS_CODE_IN_TABLE(first, RandALCat_codepoint_ranges) ||
+ !IS_CODE_IN_TABLE(last, RandALCat_codepoint_ranges))
+ goto prohibited;
+ }
+
+ /*
+ * Finally, convert the result back to UTF-8.
+ */
+ result_size = 0;
+ for (wp = output_chars; *wp; wp++)
+ {
+ unsigned char buf[4];
+
+ unicode_to_utf8(*wp, buf);
+ result_size += pg_utf_mblen(buf);
+ }
+
+ result = ALLOC(result_size + 1);
+ if (!result)
+ goto oom;
+
+ /*
+ * There are no error exits below here, so the error exit paths don't need
+ * to worry about possibly freeing "result".
+ */
+ p = (unsigned char *) result;
+ for (wp = output_chars; *wp; wp++)
+ {
+ unicode_to_utf8(*wp, p);
+ p += pg_utf_mblen(p);
+ }
+ Assert((char *) p == result + result_size);
+ *p = '\0';
+
+ FREE(input_chars);
+ FREE(output_chars);
+
+ *output = result;
+ return SASLPREP_SUCCESS;
+
+prohibited:
+ if (input_chars)
+ FREE(input_chars);
+ if (output_chars)
+ FREE(output_chars);
+
+ return SASLPREP_PROHIBITED;
+
+oom:
+ if (input_chars)
+ FREE(input_chars);
+ if (output_chars)
+ FREE(output_chars);
+
+ return SASLPREP_OOM;
+}
diff --git a/src/common/scram-common.c b/src/common/scram-common.c
new file mode 100644
index 0000000..4971134
--- /dev/null
+++ b/src/common/scram-common.c
@@ -0,0 +1,291 @@
+/*-------------------------------------------------------------------------
+ * scram-common.c
+ * Shared frontend/backend code for SCRAM authentication
+ *
+ * This contains the common low-level functions needed in both frontend and
+ * backend, for implement the Salted Challenge Response Authentication
+ * Mechanism (SCRAM), per IETF's RFC 5802.
+ *
+ * Portions Copyright (c) 2017-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/scram-common.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/base64.h"
+#include "common/scram-common.h"
+#include "port/pg_bswap.h"
+
+#define HMAC_IPAD 0x36
+#define HMAC_OPAD 0x5C
+
+/*
+ * Calculate HMAC per RFC2104.
+ *
+ * The hash function used is SHA-256.
+ */
+void
+scram_HMAC_init(scram_HMAC_ctx *ctx, const uint8 *key, int keylen)
+{
+ uint8 k_ipad[SHA256_HMAC_B];
+ int i;
+ uint8 keybuf[SCRAM_KEY_LEN];
+
+ /*
+ * If the key is longer than the block size (64 bytes for SHA-256), pass
+ * it through SHA-256 once to shrink it down.
+ */
+ if (keylen > SHA256_HMAC_B)
+ {
+ pg_sha256_ctx sha256_ctx;
+
+ pg_sha256_init(&sha256_ctx);
+ pg_sha256_update(&sha256_ctx, key, keylen);
+ pg_sha256_final(&sha256_ctx, keybuf);
+ key = keybuf;
+ keylen = SCRAM_KEY_LEN;
+ }
+
+ memset(k_ipad, HMAC_IPAD, SHA256_HMAC_B);
+ memset(ctx->k_opad, HMAC_OPAD, SHA256_HMAC_B);
+
+ for (i = 0; i < keylen; i++)
+ {
+ k_ipad[i] ^= key[i];
+ ctx->k_opad[i] ^= key[i];
+ }
+
+ /* tmp = H(K XOR ipad, text) */
+ pg_sha256_init(&ctx->sha256ctx);
+ pg_sha256_update(&ctx->sha256ctx, k_ipad, SHA256_HMAC_B);
+}
+
+/*
+ * Update HMAC calculation
+ * The hash function used is SHA-256.
+ */
+void
+scram_HMAC_update(scram_HMAC_ctx *ctx, const char *str, int slen)
+{
+ pg_sha256_update(&ctx->sha256ctx, (const uint8 *) str, slen);
+}
+
+/*
+ * Finalize HMAC calculation.
+ * The hash function used is SHA-256.
+ */
+void
+scram_HMAC_final(uint8 *result, scram_HMAC_ctx *ctx)
+{
+ uint8 h[SCRAM_KEY_LEN];
+
+ pg_sha256_final(&ctx->sha256ctx, h);
+
+ /* H(K XOR opad, tmp) */
+ pg_sha256_init(&ctx->sha256ctx);
+ pg_sha256_update(&ctx->sha256ctx, ctx->k_opad, SHA256_HMAC_B);
+ pg_sha256_update(&ctx->sha256ctx, h, SCRAM_KEY_LEN);
+ pg_sha256_final(&ctx->sha256ctx, result);
+}
+
+/*
+ * Calculate SaltedPassword.
+ *
+ * The password should already be normalized by SASLprep.
+ */
+void
+scram_SaltedPassword(const char *password,
+ const char *salt, int saltlen, int iterations,
+ uint8 *result)
+{
+ int password_len = strlen(password);
+ uint32 one = pg_hton32(1);
+ int i,
+ j;
+ uint8 Ui[SCRAM_KEY_LEN];
+ uint8 Ui_prev[SCRAM_KEY_LEN];
+ scram_HMAC_ctx hmac_ctx;
+
+ /*
+ * Iterate hash calculation of HMAC entry using given salt. This is
+ * essentially PBKDF2 (see RFC2898) with HMAC() as the pseudorandom
+ * function.
+ */
+
+ /* First iteration */
+ scram_HMAC_init(&hmac_ctx, (uint8 *) password, password_len);
+ scram_HMAC_update(&hmac_ctx, salt, saltlen);
+ scram_HMAC_update(&hmac_ctx, (char *) &one, sizeof(uint32));
+ scram_HMAC_final(Ui_prev, &hmac_ctx);
+ memcpy(result, Ui_prev, SCRAM_KEY_LEN);
+
+ /* Subsequent iterations */
+ for (i = 2; i <= iterations; i++)
+ {
+ scram_HMAC_init(&hmac_ctx, (uint8 *) password, password_len);
+ scram_HMAC_update(&hmac_ctx, (const char *) Ui_prev, SCRAM_KEY_LEN);
+ scram_HMAC_final(Ui, &hmac_ctx);
+ for (j = 0; j < SCRAM_KEY_LEN; j++)
+ result[j] ^= Ui[j];
+ memcpy(Ui_prev, Ui, SCRAM_KEY_LEN);
+ }
+}
+
+
+/*
+ * Calculate SHA-256 hash for a NULL-terminated string. (The NULL terminator is
+ * not included in the hash).
+ */
+void
+scram_H(const uint8 *input, int len, uint8 *result)
+{
+ pg_sha256_ctx ctx;
+
+ pg_sha256_init(&ctx);
+ pg_sha256_update(&ctx, input, len);
+ pg_sha256_final(&ctx, result);
+}
+
+/*
+ * Calculate ClientKey.
+ */
+void
+scram_ClientKey(const uint8 *salted_password, uint8 *result)
+{
+ scram_HMAC_ctx ctx;
+
+ scram_HMAC_init(&ctx, salted_password, SCRAM_KEY_LEN);
+ scram_HMAC_update(&ctx, "Client Key", strlen("Client Key"));
+ scram_HMAC_final(result, &ctx);
+}
+
+/*
+ * Calculate ServerKey.
+ */
+void
+scram_ServerKey(const uint8 *salted_password, uint8 *result)
+{
+ scram_HMAC_ctx ctx;
+
+ scram_HMAC_init(&ctx, salted_password, SCRAM_KEY_LEN);
+ scram_HMAC_update(&ctx, "Server Key", strlen("Server Key"));
+ scram_HMAC_final(result, &ctx);
+}
+
+
+/*
+ * Construct a SCRAM secret, for storing in pg_authid.rolpassword.
+ *
+ * The password should already have been processed with SASLprep, if necessary!
+ *
+ * If iterations is 0, default number of iterations is used. The result is
+ * palloc'd or malloc'd, so caller is responsible for freeing it.
+ */
+char *
+scram_build_secret(const char *salt, int saltlen, int iterations,
+ const char *password)
+{
+ uint8 salted_password[SCRAM_KEY_LEN];
+ uint8 stored_key[SCRAM_KEY_LEN];
+ uint8 server_key[SCRAM_KEY_LEN];
+ char *result;
+ char *p;
+ int maxlen;
+ int encoded_salt_len;
+ int encoded_stored_len;
+ int encoded_server_len;
+ int encoded_result;
+
+ if (iterations <= 0)
+ iterations = SCRAM_DEFAULT_ITERATIONS;
+
+ /* Calculate StoredKey and ServerKey */
+ scram_SaltedPassword(password, salt, saltlen, iterations,
+ salted_password);
+ scram_ClientKey(salted_password, stored_key);
+ scram_H(stored_key, SCRAM_KEY_LEN, stored_key);
+
+ scram_ServerKey(salted_password, server_key);
+
+ /*----------
+ * The format is:
+ * SCRAM-SHA-256$<iteration count>:<salt>$<StoredKey>:<ServerKey>
+ *----------
+ */
+ encoded_salt_len = pg_b64_enc_len(saltlen);
+ encoded_stored_len = pg_b64_enc_len(SCRAM_KEY_LEN);
+ encoded_server_len = pg_b64_enc_len(SCRAM_KEY_LEN);
+
+ maxlen = strlen("SCRAM-SHA-256") + 1
+ + 10 + 1 /* iteration count */
+ + encoded_salt_len + 1 /* Base64-encoded salt */
+ + encoded_stored_len + 1 /* Base64-encoded StoredKey */
+ + encoded_server_len + 1; /* Base64-encoded ServerKey */
+
+#ifdef FRONTEND
+ result = malloc(maxlen);
+ if (!result)
+ return NULL;
+#else
+ result = palloc(maxlen);
+#endif
+
+ p = result + sprintf(result, "SCRAM-SHA-256$%d:", iterations);
+
+ /* salt */
+ encoded_result = pg_b64_encode(salt, saltlen, p, encoded_salt_len);
+ if (encoded_result < 0)
+ {
+#ifdef FRONTEND
+ free(result);
+ return NULL;
+#else
+ elog(ERROR, "could not encode salt");
+#endif
+ }
+ p += encoded_result;
+ *(p++) = '$';
+
+ /* stored key */
+ encoded_result = pg_b64_encode((char *) stored_key, SCRAM_KEY_LEN, p,
+ encoded_stored_len);
+ if (encoded_result < 0)
+ {
+#ifdef FRONTEND
+ free(result);
+ return NULL;
+#else
+ elog(ERROR, "could not encode stored key");
+#endif
+ }
+
+ p += encoded_result;
+ *(p++) = ':';
+
+ /* server key */
+ encoded_result = pg_b64_encode((char *) server_key, SCRAM_KEY_LEN, p,
+ encoded_server_len);
+ if (encoded_result < 0)
+ {
+#ifdef FRONTEND
+ free(result);
+ return NULL;
+#else
+ elog(ERROR, "could not encode server key");
+#endif
+ }
+
+ p += encoded_result;
+ *(p++) = '\0';
+
+ Assert(p - result <= maxlen);
+
+ return result;
+}
diff --git a/src/common/sha2.c b/src/common/sha2.c
new file mode 100644
index 0000000..0d329bb
--- /dev/null
+++ b/src/common/sha2.c
@@ -0,0 +1,1006 @@
+/*-------------------------------------------------------------------------
+ *
+ * sha2.c
+ * Set of SHA functions for SHA-224, SHA-256, SHA-384 and SHA-512.
+ *
+ * This is the set of in-core functions used when there are no other
+ * alternative options like OpenSSL.
+ *
+ * Portions Copyright (c) 2016-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/sha2.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* $OpenBSD: sha2.c,v 1.6 2004/05/03 02:57:36 millert Exp $ */
+/*
+ * FILE: sha2.c
+ * AUTHOR: Aaron D. Gifford <me@aarongifford.com>
+ *
+ * Copyright (c) 2000-2001, Aaron D. Gifford
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the copyright holder nor the names of contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTOR(S) ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTOR(S) BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $From: sha2.c,v 1.1 2001/11/08 00:01:51 adg Exp adg $
+ */
+
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <sys/param.h>
+
+#include "common/sha2.h"
+
+/*
+ * UNROLLED TRANSFORM LOOP NOTE:
+ * You can define SHA2_UNROLL_TRANSFORM to use the unrolled transform
+ * loop version for the hash transform rounds (defined using macros
+ * later in this file). Either define on the command line, for example:
+ *
+ * cc -DSHA2_UNROLL_TRANSFORM -o sha2 sha2.c sha2prog.c
+ *
+ * or define below:
+ *
+ * #define SHA2_UNROLL_TRANSFORM
+ *
+ */
+
+/*** SHA-256/384/512 Various Length Definitions ***********************/
+#define PG_SHA256_SHORT_BLOCK_LENGTH (PG_SHA256_BLOCK_LENGTH - 8)
+#define PG_SHA384_SHORT_BLOCK_LENGTH (PG_SHA384_BLOCK_LENGTH - 16)
+#define PG_SHA512_SHORT_BLOCK_LENGTH (PG_SHA512_BLOCK_LENGTH - 16)
+
+/*** ENDIAN REVERSAL MACROS *******************************************/
+#ifndef WORDS_BIGENDIAN
+#define REVERSE32(w,x) { \
+ uint32 tmp = (w); \
+ tmp = (tmp >> 16) | (tmp << 16); \
+ (x) = ((tmp & 0xff00ff00UL) >> 8) | ((tmp & 0x00ff00ffUL) << 8); \
+}
+#define REVERSE64(w,x) { \
+ uint64 tmp = (w); \
+ tmp = (tmp >> 32) | (tmp << 32); \
+ tmp = ((tmp & 0xff00ff00ff00ff00ULL) >> 8) | \
+ ((tmp & 0x00ff00ff00ff00ffULL) << 8); \
+ (x) = ((tmp & 0xffff0000ffff0000ULL) >> 16) | \
+ ((tmp & 0x0000ffff0000ffffULL) << 16); \
+}
+#endif /* not bigendian */
+
+/*
+ * Macro for incrementally adding the unsigned 64-bit integer n to the
+ * unsigned 128-bit integer (represented using a two-element array of
+ * 64-bit words):
+ */
+#define ADDINC128(w,n) { \
+ (w)[0] += (uint64)(n); \
+ if ((w)[0] < (n)) { \
+ (w)[1]++; \
+ } \
+}
+
+/*** THE SIX LOGICAL FUNCTIONS ****************************************/
+/*
+ * Bit shifting and rotation (used by the six SHA-XYZ logical functions:
+ *
+ * NOTE: The naming of R and S appears backwards here (R is a SHIFT and
+ * S is a ROTATION) because the SHA-256/384/512 description document
+ * (see http://www.iwar.org.uk/comsec/resources/cipher/sha256-384-512.pdf)
+ * uses this same "backwards" definition.
+ */
+/* Shift-right (used in SHA-256, SHA-384, and SHA-512): */
+#define R(b,x) ((x) >> (b))
+/* 32-bit Rotate-right (used in SHA-256): */
+#define S32(b,x) (((x) >> (b)) | ((x) << (32 - (b))))
+/* 64-bit Rotate-right (used in SHA-384 and SHA-512): */
+#define S64(b,x) (((x) >> (b)) | ((x) << (64 - (b))))
+
+/* Two of six logical functions used in SHA-256, SHA-384, and SHA-512: */
+#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
+#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+
+/* Four of six logical functions used in SHA-256: */
+#define Sigma0_256(x) (S32(2, (x)) ^ S32(13, (x)) ^ S32(22, (x)))
+#define Sigma1_256(x) (S32(6, (x)) ^ S32(11, (x)) ^ S32(25, (x)))
+#define sigma0_256(x) (S32(7, (x)) ^ S32(18, (x)) ^ R(3 , (x)))
+#define sigma1_256(x) (S32(17, (x)) ^ S32(19, (x)) ^ R(10, (x)))
+
+/* Four of six logical functions used in SHA-384 and SHA-512: */
+#define Sigma0_512(x) (S64(28, (x)) ^ S64(34, (x)) ^ S64(39, (x)))
+#define Sigma1_512(x) (S64(14, (x)) ^ S64(18, (x)) ^ S64(41, (x)))
+#define sigma0_512(x) (S64( 1, (x)) ^ S64( 8, (x)) ^ R( 7, (x)))
+#define sigma1_512(x) (S64(19, (x)) ^ S64(61, (x)) ^ R( 6, (x)))
+
+/*** INTERNAL FUNCTION PROTOTYPES *************************************/
+/* NOTE: These should not be accessed directly from outside this
+ * library -- they are intended for private internal visibility/use
+ * only.
+ */
+static void SHA512_Last(pg_sha512_ctx *context);
+static void SHA256_Transform(pg_sha256_ctx *context, const uint8 *data);
+static void SHA512_Transform(pg_sha512_ctx *context, const uint8 *data);
+
+/*** SHA-XYZ INITIAL HASH VALUES AND CONSTANTS ************************/
+/* Hash constant words K for SHA-256: */
+static const uint32 K256[64] = {
+ 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
+ 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
+ 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
+ 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
+ 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
+ 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
+ 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL,
+ 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
+ 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL,
+ 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
+ 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL,
+ 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
+ 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL,
+ 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
+ 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
+ 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
+};
+
+/* Initial hash value H for SHA-224: */
+static const uint32 sha224_initial_hash_value[8] = {
+ 0xc1059ed8UL,
+ 0x367cd507UL,
+ 0x3070dd17UL,
+ 0xf70e5939UL,
+ 0xffc00b31UL,
+ 0x68581511UL,
+ 0x64f98fa7UL,
+ 0xbefa4fa4UL
+};
+
+/* Initial hash value H for SHA-256: */
+static const uint32 sha256_initial_hash_value[8] = {
+ 0x6a09e667UL,
+ 0xbb67ae85UL,
+ 0x3c6ef372UL,
+ 0xa54ff53aUL,
+ 0x510e527fUL,
+ 0x9b05688cUL,
+ 0x1f83d9abUL,
+ 0x5be0cd19UL
+};
+
+/* Hash constant words K for SHA-384 and SHA-512: */
+static const uint64 K512[80] = {
+ 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
+ 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
+ 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+ 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
+ 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
+ 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+ 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
+ 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
+ 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+ 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
+ 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
+ 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+ 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
+ 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
+ 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+ 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
+ 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
+ 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+ 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
+ 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
+ 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+ 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
+ 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
+ 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+ 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
+ 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
+ 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+ 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
+ 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
+ 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+ 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
+ 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
+ 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+ 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
+ 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
+ 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+ 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
+ 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
+ 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+ 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
+};
+
+/* Initial hash value H for SHA-384 */
+static const uint64 sha384_initial_hash_value[8] = {
+ 0xcbbb9d5dc1059ed8ULL,
+ 0x629a292a367cd507ULL,
+ 0x9159015a3070dd17ULL,
+ 0x152fecd8f70e5939ULL,
+ 0x67332667ffc00b31ULL,
+ 0x8eb44a8768581511ULL,
+ 0xdb0c2e0d64f98fa7ULL,
+ 0x47b5481dbefa4fa4ULL
+};
+
+/* Initial hash value H for SHA-512 */
+static const uint64 sha512_initial_hash_value[8] = {
+ 0x6a09e667f3bcc908ULL,
+ 0xbb67ae8584caa73bULL,
+ 0x3c6ef372fe94f82bULL,
+ 0xa54ff53a5f1d36f1ULL,
+ 0x510e527fade682d1ULL,
+ 0x9b05688c2b3e6c1fULL,
+ 0x1f83d9abfb41bd6bULL,
+ 0x5be0cd19137e2179ULL
+};
+
+
+/*** SHA-256: *********************************************************/
+void
+pg_sha256_init(pg_sha256_ctx *context)
+{
+ if (context == NULL)
+ return;
+ memcpy(context->state, sha256_initial_hash_value, PG_SHA256_DIGEST_LENGTH);
+ memset(context->buffer, 0, PG_SHA256_BLOCK_LENGTH);
+ context->bitcount = 0;
+}
+
+#ifdef SHA2_UNROLL_TRANSFORM
+
+/* Unrolled SHA-256 round macros: */
+
+#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h) do { \
+ W256[j] = (uint32)data[3] | ((uint32)data[2] << 8) | \
+ ((uint32)data[1] << 16) | ((uint32)data[0] << 24); \
+ data += 4; \
+ T1 = (h) + Sigma1_256((e)) + Ch((e), (f), (g)) + K256[j] + W256[j]; \
+ (d) += T1; \
+ (h) = T1 + Sigma0_256((a)) + Maj((a), (b), (c)); \
+ j++; \
+} while(0)
+
+#define ROUND256(a,b,c,d,e,f,g,h) do { \
+ s0 = W256[(j+1)&0x0f]; \
+ s0 = sigma0_256(s0); \
+ s1 = W256[(j+14)&0x0f]; \
+ s1 = sigma1_256(s1); \
+ T1 = (h) + Sigma1_256((e)) + Ch((e), (f), (g)) + K256[j] + \
+ (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); \
+ (d) += T1; \
+ (h) = T1 + Sigma0_256((a)) + Maj((a), (b), (c)); \
+ j++; \
+} while(0)
+
+static void
+SHA256_Transform(pg_sha256_ctx *context, const uint8 *data)
+{
+ uint32 a,
+ b,
+ c,
+ d,
+ e,
+ f,
+ g,
+ h,
+ s0,
+ s1;
+ uint32 T1,
+ *W256;
+ int j;
+
+ W256 = (uint32 *) context->buffer;
+
+ /* Initialize registers with the prev. intermediate value */
+ a = context->state[0];
+ b = context->state[1];
+ c = context->state[2];
+ d = context->state[3];
+ e = context->state[4];
+ f = context->state[5];
+ g = context->state[6];
+ h = context->state[7];
+
+ j = 0;
+ do
+ {
+ /* Rounds 0 to 15 (unrolled): */
+ ROUND256_0_TO_15(a, b, c, d, e, f, g, h);
+ ROUND256_0_TO_15(h, a, b, c, d, e, f, g);
+ ROUND256_0_TO_15(g, h, a, b, c, d, e, f);
+ ROUND256_0_TO_15(f, g, h, a, b, c, d, e);
+ ROUND256_0_TO_15(e, f, g, h, a, b, c, d);
+ ROUND256_0_TO_15(d, e, f, g, h, a, b, c);
+ ROUND256_0_TO_15(c, d, e, f, g, h, a, b);
+ ROUND256_0_TO_15(b, c, d, e, f, g, h, a);
+ } while (j < 16);
+
+ /* Now for the remaining rounds to 64: */
+ do
+ {
+ ROUND256(a, b, c, d, e, f, g, h);
+ ROUND256(h, a, b, c, d, e, f, g);
+ ROUND256(g, h, a, b, c, d, e, f);
+ ROUND256(f, g, h, a, b, c, d, e);
+ ROUND256(e, f, g, h, a, b, c, d);
+ ROUND256(d, e, f, g, h, a, b, c);
+ ROUND256(c, d, e, f, g, h, a, b);
+ ROUND256(b, c, d, e, f, g, h, a);
+ } while (j < 64);
+
+ /* Compute the current intermediate hash value */
+ context->state[0] += a;
+ context->state[1] += b;
+ context->state[2] += c;
+ context->state[3] += d;
+ context->state[4] += e;
+ context->state[5] += f;
+ context->state[6] += g;
+ context->state[7] += h;
+
+ /* Clean up */
+ a = b = c = d = e = f = g = h = T1 = 0;
+}
+#else /* SHA2_UNROLL_TRANSFORM */
+
+static void
+SHA256_Transform(pg_sha256_ctx *context, const uint8 *data)
+{
+ uint32 a,
+ b,
+ c,
+ d,
+ e,
+ f,
+ g,
+ h,
+ s0,
+ s1;
+ uint32 T1,
+ T2,
+ *W256;
+ int j;
+
+ W256 = (uint32 *) context->buffer;
+
+ /* Initialize registers with the prev. intermediate value */
+ a = context->state[0];
+ b = context->state[1];
+ c = context->state[2];
+ d = context->state[3];
+ e = context->state[4];
+ f = context->state[5];
+ g = context->state[6];
+ h = context->state[7];
+
+ j = 0;
+ do
+ {
+ W256[j] = (uint32) data[3] | ((uint32) data[2] << 8) |
+ ((uint32) data[1] << 16) | ((uint32) data[0] << 24);
+ data += 4;
+ /* Apply the SHA-256 compression function to update a..h */
+ T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + W256[j];
+ T2 = Sigma0_256(a) + Maj(a, b, c);
+ h = g;
+ g = f;
+ f = e;
+ e = d + T1;
+ d = c;
+ c = b;
+ b = a;
+ a = T1 + T2;
+
+ j++;
+ } while (j < 16);
+
+ do
+ {
+ /* Part of the message block expansion: */
+ s0 = W256[(j + 1) & 0x0f];
+ s0 = sigma0_256(s0);
+ s1 = W256[(j + 14) & 0x0f];
+ s1 = sigma1_256(s1);
+
+ /* Apply the SHA-256 compression function to update a..h */
+ T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] +
+ (W256[j & 0x0f] += s1 + W256[(j + 9) & 0x0f] + s0);
+ T2 = Sigma0_256(a) + Maj(a, b, c);
+ h = g;
+ g = f;
+ f = e;
+ e = d + T1;
+ d = c;
+ c = b;
+ b = a;
+ a = T1 + T2;
+
+ j++;
+ } while (j < 64);
+
+ /* Compute the current intermediate hash value */
+ context->state[0] += a;
+ context->state[1] += b;
+ context->state[2] += c;
+ context->state[3] += d;
+ context->state[4] += e;
+ context->state[5] += f;
+ context->state[6] += g;
+ context->state[7] += h;
+
+ /* Clean up */
+ a = b = c = d = e = f = g = h = T1 = T2 = 0;
+}
+#endif /* SHA2_UNROLL_TRANSFORM */
+
+void
+pg_sha256_update(pg_sha256_ctx *context, const uint8 *data, size_t len)
+{
+ size_t freespace,
+ usedspace;
+
+ /* Calling with no data is valid (we do nothing) */
+ if (len == 0)
+ return;
+
+ usedspace = (context->bitcount >> 3) % PG_SHA256_BLOCK_LENGTH;
+ if (usedspace > 0)
+ {
+ /* Calculate how much free space is available in the buffer */
+ freespace = PG_SHA256_BLOCK_LENGTH - usedspace;
+
+ if (len >= freespace)
+ {
+ /* Fill the buffer completely and process it */
+ memcpy(&context->buffer[usedspace], data, freespace);
+ context->bitcount += freespace << 3;
+ len -= freespace;
+ data += freespace;
+ SHA256_Transform(context, context->buffer);
+ }
+ else
+ {
+ /* The buffer is not yet full */
+ memcpy(&context->buffer[usedspace], data, len);
+ context->bitcount += len << 3;
+ /* Clean up: */
+ usedspace = freespace = 0;
+ return;
+ }
+ }
+ while (len >= PG_SHA256_BLOCK_LENGTH)
+ {
+ /* Process as many complete blocks as we can */
+ SHA256_Transform(context, data);
+ context->bitcount += PG_SHA256_BLOCK_LENGTH << 3;
+ len -= PG_SHA256_BLOCK_LENGTH;
+ data += PG_SHA256_BLOCK_LENGTH;
+ }
+ if (len > 0)
+ {
+ /* There's left-overs, so save 'em */
+ memcpy(context->buffer, data, len);
+ context->bitcount += len << 3;
+ }
+ /* Clean up: */
+ usedspace = freespace = 0;
+}
+
+static void
+SHA256_Last(pg_sha256_ctx *context)
+{
+ unsigned int usedspace;
+
+ usedspace = (context->bitcount >> 3) % PG_SHA256_BLOCK_LENGTH;
+#ifndef WORDS_BIGENDIAN
+ /* Convert FROM host byte order */
+ REVERSE64(context->bitcount, context->bitcount);
+#endif
+ if (usedspace > 0)
+ {
+ /* Begin padding with a 1 bit: */
+ context->buffer[usedspace++] = 0x80;
+
+ if (usedspace <= PG_SHA256_SHORT_BLOCK_LENGTH)
+ {
+ /* Set-up for the last transform: */
+ memset(&context->buffer[usedspace], 0, PG_SHA256_SHORT_BLOCK_LENGTH - usedspace);
+ }
+ else
+ {
+ if (usedspace < PG_SHA256_BLOCK_LENGTH)
+ {
+ memset(&context->buffer[usedspace], 0, PG_SHA256_BLOCK_LENGTH - usedspace);
+ }
+ /* Do second-to-last transform: */
+ SHA256_Transform(context, context->buffer);
+
+ /* And set-up for the last transform: */
+ memset(context->buffer, 0, PG_SHA256_SHORT_BLOCK_LENGTH);
+ }
+ }
+ else
+ {
+ /* Set-up for the last transform: */
+ memset(context->buffer, 0, PG_SHA256_SHORT_BLOCK_LENGTH);
+
+ /* Begin padding with a 1 bit: */
+ *context->buffer = 0x80;
+ }
+ /* Set the bit count: */
+ *(uint64 *) &context->buffer[PG_SHA256_SHORT_BLOCK_LENGTH] = context->bitcount;
+
+ /* Final transform: */
+ SHA256_Transform(context, context->buffer);
+}
+
+void
+pg_sha256_final(pg_sha256_ctx *context, uint8 *digest)
+{
+ /* If no digest buffer is passed, we don't bother doing this: */
+ if (digest != NULL)
+ {
+ SHA256_Last(context);
+
+#ifndef WORDS_BIGENDIAN
+ {
+ /* Convert TO host byte order */
+ int j;
+
+ for (j = 0; j < 8; j++)
+ {
+ REVERSE32(context->state[j], context->state[j]);
+ }
+ }
+#endif
+ memcpy(digest, context->state, PG_SHA256_DIGEST_LENGTH);
+ }
+
+ /* Clean up state data: */
+ memset(context, 0, sizeof(pg_sha256_ctx));
+}
+
+
+/*** SHA-512: *********************************************************/
+void
+pg_sha512_init(pg_sha512_ctx *context)
+{
+ if (context == NULL)
+ return;
+ memcpy(context->state, sha512_initial_hash_value, PG_SHA512_DIGEST_LENGTH);
+ memset(context->buffer, 0, PG_SHA512_BLOCK_LENGTH);
+ context->bitcount[0] = context->bitcount[1] = 0;
+}
+
+#ifdef SHA2_UNROLL_TRANSFORM
+
+/* Unrolled SHA-512 round macros: */
+
+#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h) do { \
+ W512[j] = (uint64)data[7] | ((uint64)data[6] << 8) | \
+ ((uint64)data[5] << 16) | ((uint64)data[4] << 24) | \
+ ((uint64)data[3] << 32) | ((uint64)data[2] << 40) | \
+ ((uint64)data[1] << 48) | ((uint64)data[0] << 56); \
+ data += 8; \
+ T1 = (h) + Sigma1_512((e)) + Ch((e), (f), (g)) + K512[j] + W512[j]; \
+ (d) += T1; \
+ (h) = T1 + Sigma0_512((a)) + Maj((a), (b), (c)); \
+ j++; \
+} while(0)
+
+
+#define ROUND512(a,b,c,d,e,f,g,h) do { \
+ s0 = W512[(j+1)&0x0f]; \
+ s0 = sigma0_512(s0); \
+ s1 = W512[(j+14)&0x0f]; \
+ s1 = sigma1_512(s1); \
+ T1 = (h) + Sigma1_512((e)) + Ch((e), (f), (g)) + K512[j] + \
+ (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0); \
+ (d) += T1; \
+ (h) = T1 + Sigma0_512((a)) + Maj((a), (b), (c)); \
+ j++; \
+} while(0)
+
+static void
+SHA512_Transform(pg_sha512_ctx *context, const uint8 *data)
+{
+ uint64 a,
+ b,
+ c,
+ d,
+ e,
+ f,
+ g,
+ h,
+ s0,
+ s1;
+ uint64 T1,
+ *W512 = (uint64 *) context->buffer;
+ int j;
+
+ /* Initialize registers with the prev. intermediate value */
+ a = context->state[0];
+ b = context->state[1];
+ c = context->state[2];
+ d = context->state[3];
+ e = context->state[4];
+ f = context->state[5];
+ g = context->state[6];
+ h = context->state[7];
+
+ j = 0;
+ do
+ {
+ ROUND512_0_TO_15(a, b, c, d, e, f, g, h);
+ ROUND512_0_TO_15(h, a, b, c, d, e, f, g);
+ ROUND512_0_TO_15(g, h, a, b, c, d, e, f);
+ ROUND512_0_TO_15(f, g, h, a, b, c, d, e);
+ ROUND512_0_TO_15(e, f, g, h, a, b, c, d);
+ ROUND512_0_TO_15(d, e, f, g, h, a, b, c);
+ ROUND512_0_TO_15(c, d, e, f, g, h, a, b);
+ ROUND512_0_TO_15(b, c, d, e, f, g, h, a);
+ } while (j < 16);
+
+ /* Now for the remaining rounds up to 79: */
+ do
+ {
+ ROUND512(a, b, c, d, e, f, g, h);
+ ROUND512(h, a, b, c, d, e, f, g);
+ ROUND512(g, h, a, b, c, d, e, f);
+ ROUND512(f, g, h, a, b, c, d, e);
+ ROUND512(e, f, g, h, a, b, c, d);
+ ROUND512(d, e, f, g, h, a, b, c);
+ ROUND512(c, d, e, f, g, h, a, b);
+ ROUND512(b, c, d, e, f, g, h, a);
+ } while (j < 80);
+
+ /* Compute the current intermediate hash value */
+ context->state[0] += a;
+ context->state[1] += b;
+ context->state[2] += c;
+ context->state[3] += d;
+ context->state[4] += e;
+ context->state[5] += f;
+ context->state[6] += g;
+ context->state[7] += h;
+
+ /* Clean up */
+ a = b = c = d = e = f = g = h = T1 = 0;
+}
+#else /* SHA2_UNROLL_TRANSFORM */
+
+static void
+SHA512_Transform(pg_sha512_ctx *context, const uint8 *data)
+{
+ uint64 a,
+ b,
+ c,
+ d,
+ e,
+ f,
+ g,
+ h,
+ s0,
+ s1;
+ uint64 T1,
+ T2,
+ *W512 = (uint64 *) context->buffer;
+ int j;
+
+ /* Initialize registers with the prev. intermediate value */
+ a = context->state[0];
+ b = context->state[1];
+ c = context->state[2];
+ d = context->state[3];
+ e = context->state[4];
+ f = context->state[5];
+ g = context->state[6];
+ h = context->state[7];
+
+ j = 0;
+ do
+ {
+ W512[j] = (uint64) data[7] | ((uint64) data[6] << 8) |
+ ((uint64) data[5] << 16) | ((uint64) data[4] << 24) |
+ ((uint64) data[3] << 32) | ((uint64) data[2] << 40) |
+ ((uint64) data[1] << 48) | ((uint64) data[0] << 56);
+ data += 8;
+ /* Apply the SHA-512 compression function to update a..h */
+ T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + W512[j];
+ T2 = Sigma0_512(a) + Maj(a, b, c);
+ h = g;
+ g = f;
+ f = e;
+ e = d + T1;
+ d = c;
+ c = b;
+ b = a;
+ a = T1 + T2;
+
+ j++;
+ } while (j < 16);
+
+ do
+ {
+ /* Part of the message block expansion: */
+ s0 = W512[(j + 1) & 0x0f];
+ s0 = sigma0_512(s0);
+ s1 = W512[(j + 14) & 0x0f];
+ s1 = sigma1_512(s1);
+
+ /* Apply the SHA-512 compression function to update a..h */
+ T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] +
+ (W512[j & 0x0f] += s1 + W512[(j + 9) & 0x0f] + s0);
+ T2 = Sigma0_512(a) + Maj(a, b, c);
+ h = g;
+ g = f;
+ f = e;
+ e = d + T1;
+ d = c;
+ c = b;
+ b = a;
+ a = T1 + T2;
+
+ j++;
+ } while (j < 80);
+
+ /* Compute the current intermediate hash value */
+ context->state[0] += a;
+ context->state[1] += b;
+ context->state[2] += c;
+ context->state[3] += d;
+ context->state[4] += e;
+ context->state[5] += f;
+ context->state[6] += g;
+ context->state[7] += h;
+
+ /* Clean up */
+ a = b = c = d = e = f = g = h = T1 = T2 = 0;
+}
+#endif /* SHA2_UNROLL_TRANSFORM */
+
+void
+pg_sha512_update(pg_sha512_ctx *context, const uint8 *data, size_t len)
+{
+ size_t freespace,
+ usedspace;
+
+ /* Calling with no data is valid (we do nothing) */
+ if (len == 0)
+ return;
+
+ usedspace = (context->bitcount[0] >> 3) % PG_SHA512_BLOCK_LENGTH;
+ if (usedspace > 0)
+ {
+ /* Calculate how much free space is available in the buffer */
+ freespace = PG_SHA512_BLOCK_LENGTH - usedspace;
+
+ if (len >= freespace)
+ {
+ /* Fill the buffer completely and process it */
+ memcpy(&context->buffer[usedspace], data, freespace);
+ ADDINC128(context->bitcount, freespace << 3);
+ len -= freespace;
+ data += freespace;
+ SHA512_Transform(context, context->buffer);
+ }
+ else
+ {
+ /* The buffer is not yet full */
+ memcpy(&context->buffer[usedspace], data, len);
+ ADDINC128(context->bitcount, len << 3);
+ /* Clean up: */
+ usedspace = freespace = 0;
+ return;
+ }
+ }
+ while (len >= PG_SHA512_BLOCK_LENGTH)
+ {
+ /* Process as many complete blocks as we can */
+ SHA512_Transform(context, data);
+ ADDINC128(context->bitcount, PG_SHA512_BLOCK_LENGTH << 3);
+ len -= PG_SHA512_BLOCK_LENGTH;
+ data += PG_SHA512_BLOCK_LENGTH;
+ }
+ if (len > 0)
+ {
+ /* There's left-overs, so save 'em */
+ memcpy(context->buffer, data, len);
+ ADDINC128(context->bitcount, len << 3);
+ }
+ /* Clean up: */
+ usedspace = freespace = 0;
+}
+
+static void
+SHA512_Last(pg_sha512_ctx *context)
+{
+ unsigned int usedspace;
+
+ usedspace = (context->bitcount[0] >> 3) % PG_SHA512_BLOCK_LENGTH;
+#ifndef WORDS_BIGENDIAN
+ /* Convert FROM host byte order */
+ REVERSE64(context->bitcount[0], context->bitcount[0]);
+ REVERSE64(context->bitcount[1], context->bitcount[1]);
+#endif
+ if (usedspace > 0)
+ {
+ /* Begin padding with a 1 bit: */
+ context->buffer[usedspace++] = 0x80;
+
+ if (usedspace <= PG_SHA512_SHORT_BLOCK_LENGTH)
+ {
+ /* Set-up for the last transform: */
+ memset(&context->buffer[usedspace], 0, PG_SHA512_SHORT_BLOCK_LENGTH - usedspace);
+ }
+ else
+ {
+ if (usedspace < PG_SHA512_BLOCK_LENGTH)
+ {
+ memset(&context->buffer[usedspace], 0, PG_SHA512_BLOCK_LENGTH - usedspace);
+ }
+ /* Do second-to-last transform: */
+ SHA512_Transform(context, context->buffer);
+
+ /* And set-up for the last transform: */
+ memset(context->buffer, 0, PG_SHA512_BLOCK_LENGTH - 2);
+ }
+ }
+ else
+ {
+ /* Prepare for final transform: */
+ memset(context->buffer, 0, PG_SHA512_SHORT_BLOCK_LENGTH);
+
+ /* Begin padding with a 1 bit: */
+ *context->buffer = 0x80;
+ }
+ /* Store the length of input data (in bits): */
+ *(uint64 *) &context->buffer[PG_SHA512_SHORT_BLOCK_LENGTH] = context->bitcount[1];
+ *(uint64 *) &context->buffer[PG_SHA512_SHORT_BLOCK_LENGTH + 8] = context->bitcount[0];
+
+ /* Final transform: */
+ SHA512_Transform(context, context->buffer);
+}
+
+void
+pg_sha512_final(pg_sha512_ctx *context, uint8 *digest)
+{
+ /* If no digest buffer is passed, we don't bother doing this: */
+ if (digest != NULL)
+ {
+ SHA512_Last(context);
+
+ /* Save the hash data for output: */
+#ifndef WORDS_BIGENDIAN
+ {
+ /* Convert TO host byte order */
+ int j;
+
+ for (j = 0; j < 8; j++)
+ {
+ REVERSE64(context->state[j], context->state[j]);
+ }
+ }
+#endif
+ memcpy(digest, context->state, PG_SHA512_DIGEST_LENGTH);
+ }
+
+ /* Zero out state data */
+ memset(context, 0, sizeof(pg_sha512_ctx));
+}
+
+
+/*** SHA-384: *********************************************************/
+void
+pg_sha384_init(pg_sha384_ctx *context)
+{
+ if (context == NULL)
+ return;
+ memcpy(context->state, sha384_initial_hash_value, PG_SHA512_DIGEST_LENGTH);
+ memset(context->buffer, 0, PG_SHA384_BLOCK_LENGTH);
+ context->bitcount[0] = context->bitcount[1] = 0;
+}
+
+void
+pg_sha384_update(pg_sha384_ctx *context, const uint8 *data, size_t len)
+{
+ pg_sha512_update((pg_sha512_ctx *) context, data, len);
+}
+
+void
+pg_sha384_final(pg_sha384_ctx *context, uint8 *digest)
+{
+ /* If no digest buffer is passed, we don't bother doing this: */
+ if (digest != NULL)
+ {
+ SHA512_Last((pg_sha512_ctx *) context);
+
+ /* Save the hash data for output: */
+#ifndef WORDS_BIGENDIAN
+ {
+ /* Convert TO host byte order */
+ int j;
+
+ for (j = 0; j < 6; j++)
+ {
+ REVERSE64(context->state[j], context->state[j]);
+ }
+ }
+#endif
+ memcpy(digest, context->state, PG_SHA384_DIGEST_LENGTH);
+ }
+
+ /* Zero out state data */
+ memset(context, 0, sizeof(pg_sha384_ctx));
+}
+
+/*** SHA-224: *********************************************************/
+void
+pg_sha224_init(pg_sha224_ctx *context)
+{
+ if (context == NULL)
+ return;
+ memcpy(context->state, sha224_initial_hash_value, PG_SHA256_DIGEST_LENGTH);
+ memset(context->buffer, 0, PG_SHA256_BLOCK_LENGTH);
+ context->bitcount = 0;
+}
+
+void
+pg_sha224_update(pg_sha224_ctx *context, const uint8 *data, size_t len)
+{
+ pg_sha256_update((pg_sha256_ctx *) context, data, len);
+}
+
+void
+pg_sha224_final(pg_sha224_ctx *context, uint8 *digest)
+{
+ /* If no digest buffer is passed, we don't bother doing this: */
+ if (digest != NULL)
+ {
+ SHA256_Last(context);
+
+#ifndef WORDS_BIGENDIAN
+ {
+ /* Convert TO host byte order */
+ int j;
+
+ for (j = 0; j < 8; j++)
+ {
+ REVERSE32(context->state[j], context->state[j]);
+ }
+ }
+#endif
+ memcpy(digest, context->state, PG_SHA224_DIGEST_LENGTH);
+ }
+
+ /* Clean up state data: */
+ memset(context, 0, sizeof(pg_sha224_ctx));
+}
diff --git a/src/common/sha2_openssl.c b/src/common/sha2_openssl.c
new file mode 100644
index 0000000..41673b3
--- /dev/null
+++ b/src/common/sha2_openssl.c
@@ -0,0 +1,102 @@
+/*-------------------------------------------------------------------------
+ *
+ * sha2_openssl.c
+ * Set of wrapper routines on top of OpenSSL to support SHA-224
+ * SHA-256, SHA-384 and SHA-512 functions.
+ *
+ * This should only be used if code is compiled with OpenSSL support.
+ *
+ * Portions Copyright (c) 2016-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/sha2_openssl.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <openssl/sha.h>
+
+#include "common/sha2.h"
+
+
+/* Interface routines for SHA-256 */
+void
+pg_sha256_init(pg_sha256_ctx *ctx)
+{
+ SHA256_Init((SHA256_CTX *) ctx);
+}
+
+void
+pg_sha256_update(pg_sha256_ctx *ctx, const uint8 *data, size_t len)
+{
+ SHA256_Update((SHA256_CTX *) ctx, data, len);
+}
+
+void
+pg_sha256_final(pg_sha256_ctx *ctx, uint8 *dest)
+{
+ SHA256_Final(dest, (SHA256_CTX *) ctx);
+}
+
+/* Interface routines for SHA-512 */
+void
+pg_sha512_init(pg_sha512_ctx *ctx)
+{
+ SHA512_Init((SHA512_CTX *) ctx);
+}
+
+void
+pg_sha512_update(pg_sha512_ctx *ctx, const uint8 *data, size_t len)
+{
+ SHA512_Update((SHA512_CTX *) ctx, data, len);
+}
+
+void
+pg_sha512_final(pg_sha512_ctx *ctx, uint8 *dest)
+{
+ SHA512_Final(dest, (SHA512_CTX *) ctx);
+}
+
+/* Interface routines for SHA-384 */
+void
+pg_sha384_init(pg_sha384_ctx *ctx)
+{
+ SHA384_Init((SHA512_CTX *) ctx);
+}
+
+void
+pg_sha384_update(pg_sha384_ctx *ctx, const uint8 *data, size_t len)
+{
+ SHA384_Update((SHA512_CTX *) ctx, data, len);
+}
+
+void
+pg_sha384_final(pg_sha384_ctx *ctx, uint8 *dest)
+{
+ SHA384_Final(dest, (SHA512_CTX *) ctx);
+}
+
+/* Interface routines for SHA-224 */
+void
+pg_sha224_init(pg_sha224_ctx *ctx)
+{
+ SHA224_Init((SHA256_CTX *) ctx);
+}
+
+void
+pg_sha224_update(pg_sha224_ctx *ctx, const uint8 *data, size_t len)
+{
+ SHA224_Update((SHA256_CTX *) ctx, data, len);
+}
+
+void
+pg_sha224_final(pg_sha224_ctx *ctx, uint8 *dest)
+{
+ SHA224_Final(dest, (SHA256_CTX *) ctx);
+}
diff --git a/src/common/string.c b/src/common/string.c
new file mode 100644
index 0000000..bcbbfb8
--- /dev/null
+++ b/src/common/string.c
@@ -0,0 +1,114 @@
+/*-------------------------------------------------------------------------
+ *
+ * string.c
+ * string handling helpers
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/string.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/string.h"
+
+
+/*
+ * Returns whether the string `str' has the postfix `end'.
+ */
+bool
+pg_str_endswith(const char *str, const char *end)
+{
+ size_t slen = strlen(str);
+ size_t elen = strlen(end);
+
+ /* can't be a postfix if longer */
+ if (elen > slen)
+ return false;
+
+ /* compare the end of the strings */
+ str += slen - elen;
+ return strcmp(str, end) == 0;
+}
+
+
+/*
+ * strtoint --- just like strtol, but returns int not long
+ */
+int
+strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base)
+{
+ long val;
+
+ val = strtol(str, endptr, base);
+ if (val != (int) val)
+ errno = ERANGE;
+ return (int) val;
+}
+
+
+/*
+ * pg_clean_ascii -- Replace any non-ASCII chars with a '?' char
+ *
+ * Modifies the string passed in which must be '\0'-terminated.
+ *
+ * This function exists specifically to deal with filtering out
+ * non-ASCII characters in a few places where the client can provide an almost
+ * arbitrary string (and it isn't checked to ensure it's a valid username or
+ * database name or similar) and we don't want to have control characters or other
+ * things ending up in the log file where server admins might end up with a
+ * messed up terminal when looking at them.
+ *
+ * In general, this function should NOT be used- instead, consider how to handle
+ * the string without needing to filter out the non-ASCII characters.
+ *
+ * Ultimately, we'd like to improve the situation to not require stripping out
+ * all non-ASCII but perform more intelligent filtering which would allow UTF or
+ * similar, but it's unclear exactly what we should allow, so stick to ASCII only
+ * for now.
+ */
+void
+pg_clean_ascii(char *str)
+{
+ /* Only allow clean ASCII chars in the string */
+ char *p;
+
+ for (p = str; *p != '\0'; p++)
+ {
+ if (*p < 32 || *p > 126)
+ *p = '?';
+ }
+}
+
+
+/*
+ * pg_strip_crlf -- Remove any trailing newline and carriage return
+ *
+ * Removes any trailing newline and carriage return characters (\r on
+ * Windows) in the input string, zero-terminating it.
+ *
+ * The passed in string must be zero-terminated. This function returns
+ * the new length of the string.
+ */
+int
+pg_strip_crlf(char *str)
+{
+ int len = strlen(str);
+
+ while (len > 0 && (str[len - 1] == '\n' ||
+ str[len - 1] == '\r'))
+ str[--len] = '\0';
+
+ return len;
+}
diff --git a/src/common/stringinfo.c b/src/common/stringinfo.c
new file mode 100644
index 0000000..0badc46
--- /dev/null
+++ b/src/common/stringinfo.c
@@ -0,0 +1,343 @@
+/*-------------------------------------------------------------------------
+ *
+ * stringinfo.c
+ *
+ * StringInfo provides an extensible string data type (currently limited to a
+ * length of 1GB). It can be used to buffer either ordinary C strings
+ * (null-terminated text) or arbitrary binary data. All storage is allocated
+ * with palloc() (falling back to malloc in frontend code).
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/common/stringinfo.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+
+#include "postgres.h"
+#include "utils/memutils.h"
+
+#else
+
+#include "postgres_fe.h"
+
+/* It's possible we could use a different value for this in frontend code */
+#define MaxAllocSize ((Size) 0x3fffffff) /* 1 gigabyte - 1 */
+
+#endif
+
+#include "lib/stringinfo.h"
+
+
+/*
+ * makeStringInfo
+ *
+ * Create an empty 'StringInfoData' & return a pointer to it.
+ */
+StringInfo
+makeStringInfo(void)
+{
+ StringInfo res;
+
+ res = (StringInfo) palloc(sizeof(StringInfoData));
+
+ initStringInfo(res);
+
+ return res;
+}
+
+/*
+ * initStringInfo
+ *
+ * Initialize a StringInfoData struct (with previously undefined contents)
+ * to describe an empty string.
+ */
+void
+initStringInfo(StringInfo str)
+{
+ int size = 1024; /* initial default buffer size */
+
+ str->data = (char *) palloc(size);
+ str->maxlen = size;
+ resetStringInfo(str);
+}
+
+/*
+ * resetStringInfo
+ *
+ * Reset the StringInfo: the data buffer remains valid, but its
+ * previous content, if any, is cleared.
+ */
+void
+resetStringInfo(StringInfo str)
+{
+ str->data[0] = '\0';
+ str->len = 0;
+ str->cursor = 0;
+}
+
+/*
+ * appendStringInfo
+ *
+ * Format text data under the control of fmt (an sprintf-style format string)
+ * and append it to whatever is already in str. More space is allocated
+ * to str if necessary. This is sort of like a combination of sprintf and
+ * strcat.
+ */
+void
+appendStringInfo(StringInfo str, const char *fmt,...)
+{
+ int save_errno = errno;
+
+ for (;;)
+ {
+ va_list args;
+ int needed;
+
+ /* Try to format the data. */
+ errno = save_errno;
+ va_start(args, fmt);
+ needed = appendStringInfoVA(str, fmt, args);
+ va_end(args);
+
+ if (needed == 0)
+ break; /* success */
+
+ /* Increase the buffer size and try again. */
+ enlargeStringInfo(str, needed);
+ }
+}
+
+/*
+ * appendStringInfoVA
+ *
+ * Attempt to format text data under the control of fmt (an sprintf-style
+ * format string) and append it to whatever is already in str. If successful
+ * return zero; if not (because there's not enough space), return an estimate
+ * of the space needed, without modifying str. Typically the caller should
+ * pass the return value to enlargeStringInfo() before trying again; see
+ * appendStringInfo for standard usage pattern.
+ *
+ * Caution: callers must be sure to preserve their entry-time errno
+ * when looping, in case the fmt contains "%m".
+ *
+ * XXX This API is ugly, but there seems no alternative given the C spec's
+ * restrictions on what can portably be done with va_list arguments: you have
+ * to redo va_start before you can rescan the argument list, and we can't do
+ * that from here.
+ */
+int
+appendStringInfoVA(StringInfo str, const char *fmt, va_list args)
+{
+ int avail;
+ size_t nprinted;
+
+ Assert(str != NULL);
+
+ /*
+ * If there's hardly any space, don't bother trying, just fail to make the
+ * caller enlarge the buffer first. We have to guess at how much to
+ * enlarge, since we're skipping the formatting work.
+ */
+ avail = str->maxlen - str->len;
+ if (avail < 16)
+ return 32;
+
+ nprinted = pvsnprintf(str->data + str->len, (size_t) avail, fmt, args);
+
+ if (nprinted < (size_t) avail)
+ {
+ /* Success. Note nprinted does not include trailing null. */
+ str->len += (int) nprinted;
+ return 0;
+ }
+
+ /* Restore the trailing null so that str is unmodified. */
+ str->data[str->len] = '\0';
+
+ /*
+ * Return pvsnprintf's estimate of the space needed. (Although this is
+ * given as a size_t, we know it will fit in int because it's not more
+ * than MaxAllocSize.)
+ */
+ return (int) nprinted;
+}
+
+/*
+ * appendStringInfoString
+ *
+ * Append a null-terminated string to str.
+ * Like appendStringInfo(str, "%s", s) but faster.
+ */
+void
+appendStringInfoString(StringInfo str, const char *s)
+{
+ appendBinaryStringInfo(str, s, strlen(s));
+}
+
+/*
+ * appendStringInfoChar
+ *
+ * Append a single byte to str.
+ * Like appendStringInfo(str, "%c", ch) but much faster.
+ */
+void
+appendStringInfoChar(StringInfo str, char ch)
+{
+ /* Make more room if needed */
+ if (str->len + 1 >= str->maxlen)
+ enlargeStringInfo(str, 1);
+
+ /* OK, append the character */
+ str->data[str->len] = ch;
+ str->len++;
+ str->data[str->len] = '\0';
+}
+
+/*
+ * appendStringInfoSpaces
+ *
+ * Append the specified number of spaces to a buffer.
+ */
+void
+appendStringInfoSpaces(StringInfo str, int count)
+{
+ if (count > 0)
+ {
+ /* Make more room if needed */
+ enlargeStringInfo(str, count);
+
+ /* OK, append the spaces */
+ while (--count >= 0)
+ str->data[str->len++] = ' ';
+ str->data[str->len] = '\0';
+ }
+}
+
+/*
+ * appendBinaryStringInfo
+ *
+ * Append arbitrary binary data to a StringInfo, allocating more space
+ * if necessary. Ensures that a trailing null byte is present.
+ */
+void
+appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
+{
+ Assert(str != NULL);
+
+ /* Make more room if needed */
+ enlargeStringInfo(str, datalen);
+
+ /* OK, append the data */
+ memcpy(str->data + str->len, data, datalen);
+ str->len += datalen;
+
+ /*
+ * Keep a trailing null in place, even though it's probably useless for
+ * binary data. (Some callers are dealing with text but call this because
+ * their input isn't null-terminated.)
+ */
+ str->data[str->len] = '\0';
+}
+
+/*
+ * appendBinaryStringInfoNT
+ *
+ * Append arbitrary binary data to a StringInfo, allocating more space
+ * if necessary. Does not ensure a trailing null-byte exists.
+ */
+void
+appendBinaryStringInfoNT(StringInfo str, const char *data, int datalen)
+{
+ Assert(str != NULL);
+
+ /* Make more room if needed */
+ enlargeStringInfo(str, datalen);
+
+ /* OK, append the data */
+ memcpy(str->data + str->len, data, datalen);
+ str->len += datalen;
+}
+
+/*
+ * enlargeStringInfo
+ *
+ * Make sure there is enough space for 'needed' more bytes
+ * ('needed' does not include the terminating null).
+ *
+ * External callers usually need not concern themselves with this, since
+ * all stringinfo.c routines do it automatically. However, if a caller
+ * knows that a StringInfo will eventually become X bytes large, it
+ * can save some palloc overhead by enlarging the buffer before starting
+ * to store data in it.
+ *
+ * NB: In the backend, because we use repalloc() to enlarge the buffer, the
+ * string buffer will remain allocated in the same memory context that was
+ * current when initStringInfo was called, even if another context is now
+ * current. This is the desired and indeed critical behavior!
+ */
+void
+enlargeStringInfo(StringInfo str, int needed)
+{
+ int newlen;
+
+ /*
+ * Guard against out-of-range "needed" values. Without this, we can get
+ * an overflow or infinite loop in the following.
+ */
+ if (needed < 0) /* should not happen */
+ {
+#ifndef FRONTEND
+ elog(ERROR, "invalid string enlargement request size: %d", needed);
+#else
+ fprintf(stderr, "invalid string enlargement request size: %d\n", needed);
+ exit(EXIT_FAILURE);
+#endif
+ }
+ if (((Size) needed) >= (MaxAllocSize - (Size) str->len))
+ {
+#ifndef FRONTEND
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("out of memory"),
+ errdetail("Cannot enlarge string buffer containing %d bytes by %d more bytes.",
+ str->len, needed)));
+#else
+ fprintf(stderr,
+ _("out of memory\n\nCannot enlarge string buffer containing %d bytes by %d more bytes.\n"),
+ str->len, needed);
+ exit(EXIT_FAILURE);
+#endif
+ }
+
+ needed += str->len + 1; /* total space required now */
+
+ /* Because of the above test, we now have needed <= MaxAllocSize */
+
+ if (needed <= str->maxlen)
+ return; /* got enough space already */
+
+ /*
+ * We don't want to allocate just a little more space with each append;
+ * for efficiency, double the buffer size each time it overflows.
+ * Actually, we might need to more than double it if 'needed' is big...
+ */
+ newlen = 2 * str->maxlen;
+ while (needed > newlen)
+ newlen = 2 * newlen;
+
+ /*
+ * Clamp to MaxAllocSize in case we went past it. Note we are assuming
+ * here that MaxAllocSize <= INT_MAX/2, else the above loop could
+ * overflow. We will still have newlen >= needed.
+ */
+ if (newlen > (int) MaxAllocSize)
+ newlen = (int) MaxAllocSize;
+
+ str->data = (char *) repalloc(str->data, newlen);
+
+ str->maxlen = newlen;
+}
diff --git a/src/common/unicode/.gitignore b/src/common/unicode/.gitignore
new file mode 100644
index 0000000..512862e
--- /dev/null
+++ b/src/common/unicode/.gitignore
@@ -0,0 +1,8 @@
+/norm_test
+/norm_test_table.h
+
+# Downloaded files
+/CompositionExclusions.txt
+/DerivedNormalizationProps.txt
+/NormalizationTest.txt
+/UnicodeData.txt
diff --git a/src/common/unicode/Makefile b/src/common/unicode/Makefile
new file mode 100644
index 0000000..93a9d16
--- /dev/null
+++ b/src/common/unicode/Makefile
@@ -0,0 +1,67 @@
+#-------------------------------------------------------------------------
+#
+# Makefile
+# Makefile for src/common/unicode
+#
+# IDENTIFICATION
+# src/common/unicode/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/common/unicode
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+override CPPFLAGS := -DFRONTEND $(CPPFLAGS)
+LIBS += $(PTHREAD_LIBS)
+
+# By default, do nothing.
+all:
+
+update-unicode: unicode_norm_table.h unicode_combining_table.h unicode_normprops_table.h
+ mv $^ ../../../src/include/common/
+ $(MAKE) normalization-check
+
+# These files are part of the Unicode Character Database. Download
+# them on demand. The dependency on Makefile.global is for
+# UNICODE_VERSION.
+UnicodeData.txt DerivedNormalizationProps.txt CompositionExclusions.txt NormalizationTest.txt: $(top_builddir)/src/Makefile.global
+ $(DOWNLOAD) https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/$(@F)
+
+# Generation of conversion tables used for string normalization with
+# UTF-8 strings.
+unicode_norm_table.h: generate-unicode_norm_table.pl UnicodeData.txt CompositionExclusions.txt
+ $(PERL) generate-unicode_norm_table.pl
+
+unicode_combining_table.h: generate-unicode_combining_table.pl UnicodeData.txt
+ $(PERL) $^ >$@
+
+unicode_normprops_table.h: generate-unicode_normprops_table.pl DerivedNormalizationProps.txt
+ $(PERL) $^ >$@
+
+# Test suite
+normalization-check: norm_test
+ ./norm_test
+
+norm_test: norm_test.o ../unicode_norm.o | submake-common
+
+norm_test.o: norm_test_table.h
+
+.PHONY: submake-common
+
+submake-common:
+ $(MAKE) -C .. all
+
+norm_test_table.h: generate-norm_test_table.pl NormalizationTest.txt
+ perl generate-norm_test_table.pl NormalizationTest.txt $@
+
+.PHONY: normalization-check
+
+
+clean:
+ rm -f $(OBJS) norm_test norm_test.o
+
+distclean: clean
+ rm -f UnicodeData.txt CompositionExclusions.txt NormalizationTest.txt norm_test_table.h unicode_norm_table.h
+
+maintainer-clean: distclean
diff --git a/src/common/unicode/README b/src/common/unicode/README
new file mode 100644
index 0000000..56956f6
--- /dev/null
+++ b/src/common/unicode/README
@@ -0,0 +1,28 @@
+This directory contains tools to generate the tables in
+src/include/common/unicode_norm.h, used for Unicode normalization. The
+generated .h file is included in the source tree, so these are normally not
+needed to build PostgreSQL, only if you need to re-generate the .h file
+from the Unicode data files for some reason, e.g. to update to a new version
+of Unicode.
+
+Generating unicode_norm_table.h
+-------------------------------
+
+Run
+
+ make update-unicode
+
+from the top level of the source tree and commit the result.
+
+Tests
+-----
+
+The Unicode consortium publishes a comprehensive test suite for the
+normalization algorithm, in a file called NormalizationTest.txt. This
+directory also contains a perl script and some C code, to run our
+normalization code with all the test strings in NormalizationTest.txt.
+To download NormalizationTest.txt and run the tests:
+
+ make normalization-check
+
+This is also run as part of the update-unicode target.
diff --git a/src/common/unicode/generate-norm_test_table.pl b/src/common/unicode/generate-norm_test_table.pl
new file mode 100644
index 0000000..acc6796
--- /dev/null
+++ b/src/common/unicode/generate-norm_test_table.pl
@@ -0,0 +1,106 @@
+#!/usr/bin/perl
+#
+# Read Unicode consortium's normalization test suite, NormalizationTest.txt,
+# and generate a C array from it, for norm_test.c.
+#
+# NormalizationTest.txt is part of the Unicode Character Database.
+#
+# Copyright (c) 2000-2020, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+use File::Basename;
+
+die "Usage: $0 INPUT_FILE OUTPUT_FILE\n" if @ARGV != 2;
+my $input_file = $ARGV[0];
+my $output_file = $ARGV[1];
+my $output_base = basename($output_file);
+
+# Open the input and output files
+open my $INPUT, '<', $input_file
+ or die "Could not open input file $input_file: $!";
+open my $OUTPUT, '>', $output_file
+ or die "Could not open output file $output_file: $!\n";
+
+# Print header of output file.
+print $OUTPUT <<HEADER;
+/*-------------------------------------------------------------------------
+ *
+ * norm_test_table.h
+ * Test strings for Unicode normalization.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/common/unicode/norm_test_table.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * File auto-generated by src/common/unicode/generate-norm_test_table.pl, do
+ * not edit. There is deliberately not an #ifndef PG_NORM_TEST_TABLE_H
+ * here.
+ */
+
+typedef struct
+{
+ int linenum;
+ pg_wchar input[50];
+ pg_wchar output[4][50];
+} pg_unicode_test;
+
+/* test table */
+HEADER
+print $OUTPUT
+ "static const pg_unicode_test UnicodeNormalizationTests[] =\n{\n";
+
+# Helper routine to convert a space-separated list of Unicode characters to
+# hexadecimal list format, suitable for outputting in a C array.
+sub codepoint_string_to_hex
+{
+ my $codepoint_string = shift;
+
+ my $result;
+
+ foreach (split(' ', $codepoint_string))
+ {
+ my $cp = $_;
+ my $utf8 = "0x$cp, ";
+ $result .= $utf8;
+ }
+ $result .= '0'; # null-terminated the array
+ return $result;
+}
+
+# Process the input file line by line
+my $linenum = 0;
+while (my $line = <$INPUT>)
+{
+ $linenum = $linenum + 1;
+ if ($line =~ /^\s*#/) { next; } # ignore comments
+
+ if ($line =~ /^@/) { next; } # ignore @Part0 like headers
+
+ # Split the line wanted and get the fields needed:
+ #
+ # source; NFC; NFD; NFKC; NFKD
+ my ($source, $nfc, $nfd, $nfkc, $nfkd) = split(';', $line);
+
+ my $source_utf8 = codepoint_string_to_hex($source);
+ my $nfc_utf8 = codepoint_string_to_hex($nfc);
+ my $nfd_utf8 = codepoint_string_to_hex($nfd);
+ my $nfkc_utf8 = codepoint_string_to_hex($nfkc);
+ my $nfkd_utf8 = codepoint_string_to_hex($nfkd);
+
+ print $OUTPUT
+ "\t{ $linenum, { $source_utf8 }, { { $nfc_utf8 }, { $nfd_utf8 }, { $nfkc_utf8 }, { $nfkd_utf8 } } },\n";
+}
+
+# Output terminator entry
+print $OUTPUT "\t{ 0, { 0 }, { { 0 }, { 0 }, { 0 }, { 0 } } }";
+print $OUTPUT "\n};\n";
+
+close $OUTPUT;
+close $INPUT;
diff --git a/src/common/unicode/generate-unicode_combining_table.pl b/src/common/unicode/generate-unicode_combining_table.pl
new file mode 100644
index 0000000..c0fc3cc
--- /dev/null
+++ b/src/common/unicode/generate-unicode_combining_table.pl
@@ -0,0 +1,53 @@
+#!/usr/bin/perl
+#
+# Generate sorted list of non-overlapping intervals of non-spacing
+# characters, using Unicode data files as input. Pass UnicodeData.txt
+# as argument. The output is on stdout.
+#
+# Copyright (c) 2019, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+my $range_start = undef;
+my $codepoint;
+my $prev_codepoint;
+my $count = 0;
+
+print
+ "/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */\n\n";
+
+print "static const struct mbinterval combining[] = {\n";
+
+foreach my $line (<ARGV>)
+{
+ chomp $line;
+ my @fields = split ';', $line;
+ $codepoint = hex $fields[0];
+
+ next if $codepoint > 0xFFFF;
+
+ if ($fields[2] eq 'Me' || $fields[2] eq 'Mn')
+ {
+ # combining character, save for start of range
+ if (!defined($range_start))
+ {
+ $range_start = $codepoint;
+ }
+ }
+ else
+ {
+ # not a combining character, print out previous range if any
+ if (defined($range_start))
+ {
+ printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint;
+ $range_start = undef;
+ }
+ }
+}
+continue
+{
+ $prev_codepoint = $codepoint;
+}
+
+print "};\n";
diff --git a/src/common/unicode/generate-unicode_norm_table.pl b/src/common/unicode/generate-unicode_norm_table.pl
new file mode 100644
index 0000000..7ce15e1
--- /dev/null
+++ b/src/common/unicode/generate-unicode_norm_table.pl
@@ -0,0 +1,234 @@
+#!/usr/bin/perl
+#
+# Generate a composition table, using Unicode data files as input
+#
+# Input: UnicodeData.txt and CompositionExclusions.txt
+# Output: unicode_norm_table.h
+#
+# Copyright (c) 2000-2020, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+my $output_file = "unicode_norm_table.h";
+
+my $FH;
+
+# Read list of codes that should be excluded from re-composition.
+my @composition_exclusion_codes = ();
+open($FH, '<', "CompositionExclusions.txt")
+ or die "Could not open CompositionExclusions.txt: $!.";
+while (my $line = <$FH>)
+{
+ if ($line =~ /^([[:xdigit:]]+)/)
+ {
+ push @composition_exclusion_codes, $1;
+ }
+}
+close $FH;
+
+# Read entries from UnicodeData.txt into a list, and a hash table. We need
+# three fields from each row: the codepoint, canonical combining class,
+# and character decomposition mapping
+my @characters = ();
+my %character_hash = ();
+open($FH, '<', "UnicodeData.txt")
+ or die "Could not open UnicodeData.txt: $!.";
+while (my $line = <$FH>)
+{
+
+ # Split the line wanted and get the fields needed:
+ # - Unicode code value
+ # - Canonical Combining Class
+ # - Character Decomposition Mapping
+ my @elts = split(';', $line);
+ my $code = $elts[0];
+ my $class = $elts[3];
+ my $decomp = $elts[5];
+
+ # Skip codepoints above U+10FFFF. They cannot be represented in 4 bytes
+ # in UTF-8, and PostgreSQL doesn't support UTF-8 characters longer than
+ # 4 bytes. (This is just pro forma, as there aren't any such entries in
+ # the data file, currently.)
+ next if hex($code) > 0x10FFFF;
+
+ # Skip characters with no decompositions and a class of 0, to reduce the
+ # table size.
+ next if $class eq '0' && $decomp eq '';
+
+ my %char_entry = (code => $code, class => $class, decomp => $decomp);
+ push(@characters, \%char_entry);
+ $character_hash{$code} = \%char_entry;
+}
+close $FH;
+
+my $num_characters = scalar @characters;
+
+# Start writing out the output file
+open my $OUTPUT, '>', $output_file
+ or die "Could not open output file $output_file: $!\n";
+
+print $OUTPUT <<HEADER;
+/*-------------------------------------------------------------------------
+ *
+ * unicode_norm_table.h
+ * Composition table used for Unicode normalization
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/common/unicode_norm_table.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * File auto-generated by src/common/unicode/generate-unicode_norm_table.pl,
+ * do not edit. There is deliberately not an #ifndef PG_UNICODE_NORM_TABLE_H
+ * here.
+ */
+typedef struct
+{
+ uint32 codepoint; /* Unicode codepoint */
+ uint8 comb_class; /* combining class of character */
+ uint8 dec_size_flags; /* size and flags of decomposition code list */
+ uint16 dec_index; /* index into UnicodeDecomp_codepoints, or the
+ * decomposition itself if DECOMP_INLINE */
+} pg_unicode_decomposition;
+
+#define DECOMP_NO_COMPOSE 0x80 /* don't use for re-composition */
+#define DECOMP_INLINE 0x40 /* decomposition is stored inline in
+ * dec_index */
+#define DECOMP_COMPAT 0x20 /* compatibility mapping */
+
+#define DECOMPOSITION_SIZE(x) ((x)->dec_size_flags & 0x1F)
+#define DECOMPOSITION_NO_COMPOSE(x) (((x)->dec_size_flags & (DECOMP_NO_COMPOSE | DECOMP_COMPAT)) != 0)
+#define DECOMPOSITION_IS_INLINE(x) (((x)->dec_size_flags & DECOMP_INLINE) != 0)
+#define DECOMPOSITION_IS_COMPAT(x) (((x)->dec_size_flags & DECOMP_COMPAT) != 0)
+
+/* Table of Unicode codepoints and their decompositions */
+static const pg_unicode_decomposition UnicodeDecompMain[$num_characters] =
+{
+HEADER
+
+my $decomp_index = 0;
+my $decomp_string = "";
+
+my $last_code = $characters[-1]->{code};
+foreach my $char (@characters)
+{
+ my $code = $char->{code};
+ my $class = $char->{class};
+ my $decomp = $char->{decomp};
+
+ # The character decomposition mapping field in UnicodeData.txt is a list
+ # of unicode codepoints, separated by space. But it can be prefixed with
+ # so-called compatibility formatting tag, like "<compat>", or "<font>".
+ # The entries with compatibility formatting tags should not be used for
+ # re-composing characters during normalization, so flag them in the table.
+ # (The tag doesn't matter, only whether there is a tag or not)
+ my $compat = 0;
+ if ($decomp =~ /\<.*\>/)
+ {
+ $compat = 1;
+ $decomp =~ s/\<[^][]*\>//g;
+ }
+ my @decomp_elts = split(" ", $decomp);
+
+ # Decomposition size
+ # Print size of decomposition
+ my $decomp_size = scalar(@decomp_elts);
+ die if $decomp_size > 0x1F; # to not overrun bitmask
+
+ my $first_decomp = shift @decomp_elts;
+
+ my $flags = "";
+ my $comment = "";
+
+ if ($compat)
+ {
+ $flags .= " | DECOMP_COMPAT";
+ }
+
+ if ($decomp_size == 2)
+ {
+ # Should this be used for recomposition?
+ if ( $character_hash{$first_decomp}
+ && $character_hash{$first_decomp}->{class} != 0)
+ {
+ $flags .= " | DECOMP_NO_COMPOSE";
+ $comment = "non-starter decomposition";
+ }
+ else
+ {
+ foreach my $lcode (@composition_exclusion_codes)
+ {
+ if ($lcode eq $char->{code})
+ {
+ $flags .= " | DECOMP_NO_COMPOSE";
+ $comment = "in exclusion list";
+ last;
+ }
+ }
+ }
+ }
+
+ if ($decomp_size == 0)
+ {
+ print $OUTPUT "\t{0x$code, $class, 0$flags, 0}";
+ }
+ elsif ($decomp_size == 1 && length($first_decomp) <= 4)
+ {
+
+ # The decomposition consists of a single codepoint, and it fits
+ # in a uint16, so we can store it "inline" in the main table.
+ $flags .= " | DECOMP_INLINE";
+ print $OUTPUT "\t{0x$code, $class, 1$flags, 0x$first_decomp}";
+ }
+ else
+ {
+ print $OUTPUT
+ "\t{0x$code, $class, $decomp_size$flags, $decomp_index}";
+
+ # Now save the decompositions into a dedicated area that will
+ # be written afterwards. First build the entry dedicated to
+ # a sub-table with the code and decomposition.
+ $decomp_string .= ",\n" if ($decomp_string ne "");
+
+ $decomp_string .= "\t /* $decomp_index */ 0x$first_decomp";
+ foreach (@decomp_elts)
+ {
+ $decomp_string .= ", 0x$_";
+ }
+
+ $decomp_index = $decomp_index + $decomp_size;
+ }
+
+ # Print a comma after all items except the last one.
+ print $OUTPUT "," unless ($code eq $last_code);
+ if ($comment ne "")
+ {
+
+ # If the line is wide already, indent the comment with one tab,
+ # otherwise with two. This is to make the output match the way
+ # pgindent would mangle it. (This is quite hacky. To do this
+ # properly, we should actually track how long the line is so far,
+ # but this works for now.)
+ print $OUTPUT "\t" if ($decomp_index < 10);
+
+ print $OUTPUT "\t/* $comment */" if ($comment ne "");
+ }
+ print $OUTPUT "\n";
+}
+print $OUTPUT "\n};\n\n";
+
+# Print the array of decomposed codes.
+print $OUTPUT <<HEADER;
+/* codepoints array */
+static const uint32 UnicodeDecomp_codepoints[$decomp_index] =
+{
+$decomp_string
+};
+HEADER
+
+close $OUTPUT;
diff --git a/src/common/unicode/generate-unicode_normprops_table.pl b/src/common/unicode/generate-unicode_normprops_table.pl
new file mode 100644
index 0000000..e8e5097
--- /dev/null
+++ b/src/common/unicode/generate-unicode_normprops_table.pl
@@ -0,0 +1,88 @@
+#!/usr/bin/perl
+#
+# Generate table of Unicode normalization "quick check" properties
+# (see UAX #15). Pass DerivedNormalizationProps.txt as argument. The
+# output is on stdout.
+#
+# Copyright (c) 2020, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+my %data;
+
+print
+ "/* generated by src/common/unicode/generate-unicode_normprops_table.pl, do not edit */\n\n";
+
+print <<EOS;
+#include "common/unicode_norm.h"
+
+/*
+ * We use a bit field here to save space.
+ */
+typedef struct
+{
+ unsigned int codepoint:21;
+ signed int quickcheck:4; /* really UnicodeNormalizationQC */
+} pg_unicode_normprops;
+EOS
+
+foreach my $line (<ARGV>)
+{
+ chomp $line;
+ $line =~ s/\s*#.*$//;
+ next if $line eq '';
+ my ($codepoint, $prop, $value) = split /\s*;\s*/, $line;
+ next if $prop !~ /_QC/;
+
+ my ($first, $last);
+ if ($codepoint =~ /\.\./)
+ {
+ ($first, $last) = split /\.\./, $codepoint;
+ }
+ else
+ {
+ $first = $last = $codepoint;
+ }
+
+ foreach my $cp (hex($first) .. hex($last))
+ {
+ $data{$prop}{$cp} = $value;
+ }
+}
+
+# We create a separate array for each normalization form rather than,
+# say, a two-dimensional array, because that array would be very
+# sparse and would create unnecessary overhead especially for the NFC
+# lookup.
+foreach my $prop (sort keys %data)
+{
+ # Don't build the tables for the "D" forms because they are too
+ # big. See also unicode_is_normalized_quickcheck().
+ next if $prop eq "NFD_QC" || $prop eq "NFKD_QC";
+
+ print "\n";
+ print
+ "static const pg_unicode_normprops UnicodeNormProps_${prop}[] = {\n";
+
+ my %subdata = %{ $data{$prop} };
+ foreach my $cp (sort { $a <=> $b } keys %subdata)
+ {
+ my $qc;
+ if ($subdata{$cp} eq 'N')
+ {
+ $qc = 'UNICODE_NORM_QC_NO';
+ }
+ elsif ($subdata{$cp} eq 'M')
+ {
+ $qc = 'UNICODE_NORM_QC_MAYBE';
+ }
+ else
+ {
+ die;
+ }
+ printf "\t{0x%04X, %s},\n", $cp, $qc;
+ }
+
+ print "};\n";
+}
diff --git a/src/common/unicode/norm_test.c b/src/common/unicode/norm_test.c
new file mode 100644
index 0000000..dde5d24
--- /dev/null
+++ b/src/common/unicode/norm_test.c
@@ -0,0 +1,86 @@
+/*-------------------------------------------------------------------------
+ * norm_test.c
+ * Program to test Unicode normalization functions.
+ *
+ * Portions Copyright (c) 2017-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/unicode/norm_test.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres_fe.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "common/unicode_norm.h"
+
+#include "norm_test_table.h"
+
+static char *
+print_wchar_str(const pg_wchar *s)
+{
+#define BUF_DIGITS 50
+ static char buf[BUF_DIGITS * 11 + 1];
+ int i;
+ char *p;
+
+ i = 0;
+ p = buf;
+ while (*s && i < BUF_DIGITS)
+ {
+ p += sprintf(p, "U+%04X ", *s);
+ i++;
+ s++;
+ }
+ *p = '\0';
+
+ return buf;
+}
+
+static int
+pg_wcscmp(const pg_wchar *s1, const pg_wchar *s2)
+{
+ for (;;)
+ {
+ if (*s1 < *s2)
+ return -1;
+ if (*s1 > *s2)
+ return 1;
+ if (*s1 == 0)
+ return 0;
+ s1++;
+ s2++;
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ const pg_unicode_test *test;
+
+ for (test = UnicodeNormalizationTests; test->input[0] != 0; test++)
+ {
+ for (int form = 0; form < 4; form++)
+ {
+ pg_wchar *result;
+
+ result = unicode_normalize(form, test->input);
+
+ if (pg_wcscmp(test->output[form], result) != 0)
+ {
+ printf("FAILURE (NormalizationTest.txt line %d form %d):\n", test->linenum, form);
+ printf("input: %s\n", print_wchar_str(test->input));
+ printf("expected: %s\n", print_wchar_str(test->output[form]));
+ printf("got: %s\n", print_wchar_str(result));
+ printf("\n");
+ exit(1);
+ }
+ }
+ }
+
+ printf("All tests successful!\n");
+ exit(0);
+}
diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c
new file mode 100644
index 0000000..ab5ce59
--- /dev/null
+++ b/src/common/unicode_norm.c
@@ -0,0 +1,554 @@
+/*-------------------------------------------------------------------------
+ * unicode_norm.c
+ * Normalize a Unicode string
+ *
+ * This implements Unicode normalization, per the documentation at
+ * https://www.unicode.org/reports/tr15/.
+ *
+ * Portions Copyright (c) 2017-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/unicode_norm.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/unicode_norm.h"
+#include "common/unicode_norm_table.h"
+#ifndef FRONTEND
+#include "common/unicode_normprops_table.h"
+#endif
+
+#ifndef FRONTEND
+#define ALLOC(size) palloc(size)
+#define FREE(size) pfree(size)
+#else
+#define ALLOC(size) malloc(size)
+#define FREE(size) free(size)
+#endif
+
+/* Constants for calculations with Hangul characters */
+#define SBASE 0xAC00 /* U+AC00 */
+#define LBASE 0x1100 /* U+1100 */
+#define VBASE 0x1161 /* U+1161 */
+#define TBASE 0x11A7 /* U+11A7 */
+#define LCOUNT 19
+#define VCOUNT 21
+#define TCOUNT 28
+#define NCOUNT VCOUNT * TCOUNT
+#define SCOUNT LCOUNT * NCOUNT
+
+/* comparison routine for bsearch() of decomposition lookup table. */
+static int
+conv_compare(const void *p1, const void *p2)
+{
+ uint32 v1,
+ v2;
+
+ v1 = *(const uint32 *) p1;
+ v2 = ((const pg_unicode_decomposition *) p2)->codepoint;
+ return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
+}
+
+/*
+ * Get the entry corresponding to code in the decomposition lookup table.
+ */
+static pg_unicode_decomposition *
+get_code_entry(pg_wchar code)
+{
+ return bsearch(&(code),
+ UnicodeDecompMain,
+ lengthof(UnicodeDecompMain),
+ sizeof(pg_unicode_decomposition),
+ conv_compare);
+}
+
+/*
+ * Given a decomposition entry looked up earlier, get the decomposed
+ * characters.
+ *
+ * Note: the returned pointer can point to statically allocated buffer, and
+ * is only valid until next call to this function!
+ */
+static const pg_wchar *
+get_code_decomposition(pg_unicode_decomposition *entry, int *dec_size)
+{
+ static pg_wchar x;
+
+ if (DECOMPOSITION_IS_INLINE(entry))
+ {
+ Assert(DECOMPOSITION_SIZE(entry) == 1);
+ x = (pg_wchar) entry->dec_index;
+ *dec_size = 1;
+ return &x;
+ }
+ else
+ {
+ *dec_size = DECOMPOSITION_SIZE(entry);
+ return &UnicodeDecomp_codepoints[entry->dec_index];
+ }
+}
+
+/*
+ * Calculate how many characters a given character will decompose to.
+ *
+ * This needs to recurse, if the character decomposes into characters that
+ * are, in turn, decomposable.
+ */
+static int
+get_decomposed_size(pg_wchar code, bool compat)
+{
+ pg_unicode_decomposition *entry;
+ int size = 0;
+ int i;
+ const uint32 *decomp;
+ int dec_size;
+
+ /*
+ * Fast path for Hangul characters not stored in tables to save memory as
+ * decomposition is algorithmic. See
+ * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details
+ * on the matter.
+ */
+ if (code >= SBASE && code < SBASE + SCOUNT)
+ {
+ uint32 tindex,
+ sindex;
+
+ sindex = code - SBASE;
+ tindex = sindex % TCOUNT;
+
+ if (tindex != 0)
+ return 3;
+ return 2;
+ }
+
+ entry = get_code_entry(code);
+
+ /*
+ * Just count current code if no other decompositions. A NULL entry is
+ * equivalent to a character with class 0 and no decompositions.
+ */
+ if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||
+ (!compat && DECOMPOSITION_IS_COMPAT(entry)))
+ return 1;
+
+ /*
+ * If this entry has other decomposition codes look at them as well. First
+ * get its decomposition in the list of tables available.
+ */
+ decomp = get_code_decomposition(entry, &dec_size);
+ for (i = 0; i < dec_size; i++)
+ {
+ uint32 lcode = decomp[i];
+
+ size += get_decomposed_size(lcode, compat);
+ }
+
+ return size;
+}
+
+/*
+ * Recompose a set of characters. For hangul characters, the calculation
+ * is algorithmic. For others, an inverse lookup at the decomposition
+ * table is necessary. Returns true if a recomposition can be done, and
+ * false otherwise.
+ */
+static bool
+recompose_code(uint32 start, uint32 code, uint32 *result)
+{
+ /*
+ * Handle Hangul characters algorithmically, per the Unicode spec.
+ *
+ * Check if two current characters are L and V.
+ */
+ if (start >= LBASE && start < LBASE + LCOUNT &&
+ code >= VBASE && code < VBASE + VCOUNT)
+ {
+ /* make syllable of form LV */
+ uint32 lindex = start - LBASE;
+ uint32 vindex = code - VBASE;
+
+ *result = SBASE + (lindex * VCOUNT + vindex) * TCOUNT;
+ return true;
+ }
+ /* Check if two current characters are LV and T */
+ else if (start >= SBASE && start < (SBASE + SCOUNT) &&
+ ((start - SBASE) % TCOUNT) == 0 &&
+ code >= TBASE && code < (TBASE + TCOUNT))
+ {
+ /* make syllable of form LVT */
+ uint32 tindex = code - TBASE;
+
+ *result = start + tindex;
+ return true;
+ }
+ else
+ {
+ int i;
+
+ /*
+ * Do an inverse lookup of the decomposition tables to see if anything
+ * matches. The comparison just needs to be a perfect match on the
+ * sub-table of size two, because the start character has already been
+ * recomposed partially.
+ */
+ for (i = 0; i < lengthof(UnicodeDecompMain); i++)
+ {
+ const pg_unicode_decomposition *entry = &UnicodeDecompMain[i];
+
+ if (DECOMPOSITION_SIZE(entry) != 2)
+ continue;
+
+ if (DECOMPOSITION_NO_COMPOSE(entry))
+ continue;
+
+ if (start == UnicodeDecomp_codepoints[entry->dec_index] &&
+ code == UnicodeDecomp_codepoints[entry->dec_index + 1])
+ {
+ *result = entry->codepoint;
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Decompose the given code into the array given by caller. The
+ * decomposition begins at the position given by caller, saving one
+ * lookup on the decomposition table. The current position needs to be
+ * updated here to let the caller know from where to continue filling
+ * in the array result.
+ */
+static void
+decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)
+{
+ pg_unicode_decomposition *entry;
+ int i;
+ const uint32 *decomp;
+ int dec_size;
+
+ /*
+ * Fast path for Hangul characters not stored in tables to save memory as
+ * decomposition is algorithmic. See
+ * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details
+ * on the matter.
+ */
+ if (code >= SBASE && code < SBASE + SCOUNT)
+ {
+ uint32 l,
+ v,
+ tindex,
+ sindex;
+ pg_wchar *res = *result;
+
+ sindex = code - SBASE;
+ l = LBASE + sindex / (VCOUNT * TCOUNT);
+ v = VBASE + (sindex % (VCOUNT * TCOUNT)) / TCOUNT;
+ tindex = sindex % TCOUNT;
+
+ res[*current] = l;
+ (*current)++;
+ res[*current] = v;
+ (*current)++;
+
+ if (tindex != 0)
+ {
+ res[*current] = TBASE + tindex;
+ (*current)++;
+ }
+
+ return;
+ }
+
+ entry = get_code_entry(code);
+
+ /*
+ * Just fill in with the current decomposition if there are no
+ * decomposition codes to recurse to. A NULL entry is equivalent to a
+ * character with class 0 and no decompositions, so just leave also in
+ * this case.
+ */
+ if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||
+ (!compat && DECOMPOSITION_IS_COMPAT(entry)))
+ {
+ pg_wchar *res = *result;
+
+ res[*current] = code;
+ (*current)++;
+ return;
+ }
+
+ /*
+ * If this entry has other decomposition codes look at them as well.
+ */
+ decomp = get_code_decomposition(entry, &dec_size);
+ for (i = 0; i < dec_size; i++)
+ {
+ pg_wchar lcode = (pg_wchar) decomp[i];
+
+ /* Leave if no more decompositions */
+ decompose_code(lcode, compat, result, current);
+ }
+}
+
+/*
+ * unicode_normalize - Normalize a Unicode string to the specified form.
+ *
+ * The input is a 0-terminated array of codepoints.
+ *
+ * In frontend, returns a 0-terminated array of codepoints, allocated with
+ * malloc. Or NULL if we run out of memory. In backend, the returned
+ * string is palloc'd instead, and OOM is reported with ereport().
+ */
+pg_wchar *
+unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
+{
+ bool compat = (form == UNICODE_NFKC || form == UNICODE_NFKD);
+ bool recompose = (form == UNICODE_NFC || form == UNICODE_NFKC);
+ pg_wchar *decomp_chars;
+ pg_wchar *recomp_chars;
+ int decomp_size,
+ current_size;
+ int count;
+ const pg_wchar *p;
+
+ /* variables for recomposition */
+ int last_class;
+ int starter_pos;
+ int target_pos;
+ uint32 starter_ch;
+
+ /* First, do character decomposition */
+
+ /*
+ * Calculate how many characters long the decomposed version will be.
+ */
+ decomp_size = 0;
+ for (p = input; *p; p++)
+ decomp_size += get_decomposed_size(*p, compat);
+
+ decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
+ if (decomp_chars == NULL)
+ return NULL;
+
+ /*
+ * Now fill in each entry recursively. This needs a second pass on the
+ * decomposition table.
+ */
+ current_size = 0;
+ for (p = input; *p; p++)
+ decompose_code(*p, compat, &decomp_chars, &current_size);
+ decomp_chars[decomp_size] = '\0';
+ Assert(decomp_size == current_size);
+
+ /*
+ * Now apply canonical ordering.
+ */
+ for (count = 1; count < decomp_size; count++)
+ {
+ pg_wchar prev = decomp_chars[count - 1];
+ pg_wchar next = decomp_chars[count];
+ pg_wchar tmp;
+ pg_unicode_decomposition *prevEntry = get_code_entry(prev);
+ pg_unicode_decomposition *nextEntry = get_code_entry(next);
+
+ /*
+ * If no entries are found, the character used is either an Hangul
+ * character or a character with a class of 0 and no decompositions,
+ * so move to next result.
+ */
+ if (prevEntry == NULL || nextEntry == NULL)
+ continue;
+
+ /*
+ * Per Unicode (https://www.unicode.org/reports/tr15/tr15-18.html)
+ * annex 4, a sequence of two adjacent characters in a string is an
+ * exchangeable pair if the combining class (from the Unicode
+ * Character Database) for the first character is greater than the
+ * combining class for the second, and the second is not a starter. A
+ * character is a starter if its combining class is 0.
+ */
+ if (nextEntry->comb_class == 0x0 || prevEntry->comb_class == 0x0)
+ continue;
+
+ if (prevEntry->comb_class <= nextEntry->comb_class)
+ continue;
+
+ /* exchange can happen */
+ tmp = decomp_chars[count - 1];
+ decomp_chars[count - 1] = decomp_chars[count];
+ decomp_chars[count] = tmp;
+
+ /* backtrack to check again */
+ if (count > 1)
+ count -= 2;
+ }
+
+ if (!recompose)
+ return decomp_chars;
+
+ /*
+ * The last phase of NFC and NFKC is the recomposition of the reordered
+ * Unicode string using combining classes. The recomposed string cannot be
+ * longer than the decomposed one, so make the allocation of the output
+ * string based on that assumption.
+ */
+ recomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
+ if (!recomp_chars)
+ {
+ FREE(decomp_chars);
+ return NULL;
+ }
+
+ last_class = -1; /* this eliminates a special check */
+ starter_pos = 0;
+ target_pos = 1;
+ starter_ch = recomp_chars[0] = decomp_chars[0];
+
+ for (count = 1; count < decomp_size; count++)
+ {
+ pg_wchar ch = decomp_chars[count];
+ pg_unicode_decomposition *ch_entry = get_code_entry(ch);
+ int ch_class = (ch_entry == NULL) ? 0 : ch_entry->comb_class;
+ pg_wchar composite;
+
+ if (last_class < ch_class &&
+ recompose_code(starter_ch, ch, &composite))
+ {
+ recomp_chars[starter_pos] = composite;
+ starter_ch = composite;
+ }
+ else if (ch_class == 0)
+ {
+ starter_pos = target_pos;
+ starter_ch = ch;
+ last_class = -1;
+ recomp_chars[target_pos++] = ch;
+ }
+ else
+ {
+ last_class = ch_class;
+ recomp_chars[target_pos++] = ch;
+ }
+ }
+ recomp_chars[target_pos] = (pg_wchar) '\0';
+
+ FREE(decomp_chars);
+
+ return recomp_chars;
+}
+
+/*
+ * Normalization "quick check" algorithm; see
+ * <http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms>
+ */
+
+/* We only need this in the backend. */
+#ifndef FRONTEND
+
+static uint8
+get_canonical_class(pg_wchar ch)
+{
+ pg_unicode_decomposition *entry = get_code_entry(ch);
+
+ if (!entry)
+ return 0;
+ else
+ return entry->comb_class;
+}
+
+static int
+qc_compare(const void *p1, const void *p2)
+{
+ uint32 v1,
+ v2;
+
+ v1 = ((const pg_unicode_normprops *) p1)->codepoint;
+ v2 = ((const pg_unicode_normprops *) p2)->codepoint;
+ return (v1 - v2);
+}
+
+/*
+ * Look up the normalization quick check character property
+ */
+static UnicodeNormalizationQC
+qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch)
+{
+ pg_unicode_normprops key;
+ pg_unicode_normprops *found = NULL;
+
+ key.codepoint = ch;
+
+ switch (form)
+ {
+ case UNICODE_NFC:
+ found = bsearch(&key,
+ UnicodeNormProps_NFC_QC,
+ lengthof(UnicodeNormProps_NFC_QC),
+ sizeof(pg_unicode_normprops),
+ qc_compare);
+ break;
+ case UNICODE_NFKC:
+ found = bsearch(&key,
+ UnicodeNormProps_NFKC_QC,
+ lengthof(UnicodeNormProps_NFKC_QC),
+ sizeof(pg_unicode_normprops),
+ qc_compare);
+ break;
+ default:
+ Assert(false);
+ break;
+ }
+
+ if (found)
+ return found->quickcheck;
+ else
+ return UNICODE_NORM_QC_YES;
+}
+
+UnicodeNormalizationQC
+unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input)
+{
+ uint8 lastCanonicalClass = 0;
+ UnicodeNormalizationQC result = UNICODE_NORM_QC_YES;
+
+ /*
+ * For the "D" forms, we don't run the quickcheck. We don't include the
+ * lookup tables for those because they are huge, checking for these
+ * particular forms is less common, and running the slow path is faster
+ * for the "D" forms than the "C" forms because you don't need to
+ * recompose, which is slow.
+ */
+ if (form == UNICODE_NFD || form == UNICODE_NFKD)
+ return UNICODE_NORM_QC_MAYBE;
+
+ for (const pg_wchar *p = input; *p; p++)
+ {
+ pg_wchar ch = *p;
+ uint8 canonicalClass;
+ UnicodeNormalizationQC check;
+
+ canonicalClass = get_canonical_class(ch);
+ if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
+ return UNICODE_NORM_QC_NO;
+
+ check = qc_is_allowed(form, ch);
+ if (check == UNICODE_NORM_QC_NO)
+ return UNICODE_NORM_QC_NO;
+ else if (check == UNICODE_NORM_QC_MAYBE)
+ result = UNICODE_NORM_QC_MAYBE;
+
+ lastCanonicalClass = canonicalClass;
+ }
+ return result;
+}
+
+#endif /* !FRONTEND */
diff --git a/src/common/username.c b/src/common/username.c
new file mode 100644
index 0000000..5d47774
--- /dev/null
+++ b/src/common/username.c
@@ -0,0 +1,87 @@
+/*-------------------------------------------------------------------------
+ *
+ * username.c
+ * get user name
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/username.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <pwd.h>
+#include <unistd.h>
+
+#include "common/username.h"
+
+/*
+ * Returns the current user name in a static buffer
+ * On error, returns NULL and sets *errstr to point to a palloc'd message
+ */
+const char *
+get_user_name(char **errstr)
+{
+#ifndef WIN32
+ struct passwd *pw;
+ uid_t user_id = geteuid();
+
+ *errstr = NULL;
+
+ errno = 0; /* clear errno before call */
+ pw = getpwuid(user_id);
+ if (!pw)
+ {
+ *errstr = psprintf(_("could not look up effective user ID %ld: %s"),
+ (long) user_id,
+ errno ? strerror(errno) : _("user does not exist"));
+ return NULL;
+ }
+
+ return pw->pw_name;
+#else
+ /* Microsoft recommends buffer size of UNLEN+1, where UNLEN = 256 */
+ /* "static" variable remains after function exit */
+ static char username[256 + 1];
+ DWORD len = sizeof(username);
+
+ *errstr = NULL;
+
+ if (!GetUserName(username, &len))
+ {
+ *errstr = psprintf(_("user name lookup failure: error code %lu"),
+ GetLastError());
+ return NULL;
+ }
+
+ return username;
+#endif
+}
+
+
+/*
+ * Returns the current user name in a static buffer or exits
+ */
+const char *
+get_user_name_or_exit(const char *progname)
+{
+ const char *user_name;
+ char *errstr;
+
+ user_name = get_user_name(&errstr);
+
+ if (!user_name)
+ {
+ fprintf(stderr, "%s: %s\n", progname, errstr);
+ exit(1);
+ }
+ return user_name;
+}
diff --git a/src/common/wait_error.c b/src/common/wait_error.c
new file mode 100644
index 0000000..2026280
--- /dev/null
+++ b/src/common/wait_error.c
@@ -0,0 +1,119 @@
+/*-------------------------------------------------------------------------
+ *
+ * wait_error.c
+ * Convert a wait/waitpid(2) result code to a human-readable string
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/wait_error.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <signal.h>
+#include <sys/wait.h>
+
+/*
+ * Return a human-readable string explaining the reason a child process
+ * terminated. The argument is a return code returned by wait(2) or
+ * waitpid(2). The result is a translated, palloc'd or malloc'd string.
+ */
+char *
+wait_result_to_str(int exitstatus)
+{
+ char str[512];
+
+ if (WIFEXITED(exitstatus))
+ {
+ /*
+ * Give more specific error message for some common exit codes that
+ * have a special meaning in shells.
+ */
+ switch (WEXITSTATUS(exitstatus))
+ {
+ case 126:
+ snprintf(str, sizeof(str), _("command not executable"));
+ break;
+
+ case 127:
+ snprintf(str, sizeof(str), _("command not found"));
+ break;
+
+ default:
+ snprintf(str, sizeof(str),
+ _("child process exited with exit code %d"),
+ WEXITSTATUS(exitstatus));
+ }
+ }
+ else if (WIFSIGNALED(exitstatus))
+ {
+#if defined(WIN32)
+ snprintf(str, sizeof(str),
+ _("child process was terminated by exception 0x%X"),
+ WTERMSIG(exitstatus));
+#else
+ snprintf(str, sizeof(str),
+ _("child process was terminated by signal %d: %s"),
+ WTERMSIG(exitstatus), pg_strsignal(WTERMSIG(exitstatus)));
+#endif
+ }
+ else
+ snprintf(str, sizeof(str),
+ _("child process exited with unrecognized status %d"),
+ exitstatus);
+
+ return pstrdup(str);
+}
+
+/*
+ * Return true if a wait(2) result indicates that the child process
+ * died due to the specified signal.
+ *
+ * The reason this is worth having a wrapper function for is that
+ * there are two cases: the signal might have been received by our
+ * immediate child process, or there might've been a shell process
+ * between us and the child that died. The shell will, per POSIX,
+ * report the child death using exit code 128 + signal number.
+ *
+ * If there is no possibility of an intermediate shell, this function
+ * need not (and probably should not) be used.
+ */
+bool
+wait_result_is_signal(int exit_status, int signum)
+{
+ if (WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum)
+ return true;
+ if (WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == 128 + signum)
+ return true;
+ return false;
+}
+
+/*
+ * Return true if a wait(2) result indicates that the child process
+ * died due to any signal. We consider either direct child death
+ * or a shell report of child process death as matching the condition.
+ *
+ * If include_command_not_found is true, also return true for shell
+ * exit codes indicating "command not found" and the like
+ * (specifically, exit codes 126 and 127; see above).
+ */
+bool
+wait_result_is_any_signal(int exit_status, bool include_command_not_found)
+{
+ if (WIFSIGNALED(exit_status))
+ return true;
+ if (WIFEXITED(exit_status) &&
+ WEXITSTATUS(exit_status) > (include_command_not_found ? 125 : 128))
+ return true;
+ return false;
+}
diff --git a/src/common/wchar.c b/src/common/wchar.c
new file mode 100644
index 0000000..0873480
--- /dev/null
+++ b/src/common/wchar.c
@@ -0,0 +1,1609 @@
+/*-------------------------------------------------------------------------
+ *
+ * wchar.c
+ * Functions for working with multibyte characters in various encodings.
+ *
+ * Portions Copyright (c) 1998-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/wchar.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "mb/pg_wchar.h"
+
+
+/*
+ * Operations on multi-byte encodings are driven by a table of helper
+ * functions.
+ *
+ * To add an encoding support, define mblen(), dsplen() and verifier() for
+ * the encoding. For server-encodings, also define mb2wchar() and wchar2mb()
+ * conversion functions.
+ *
+ * These functions generally assume that their input is validly formed.
+ * The "verifier" functions, further down in the file, have to be more
+ * paranoid.
+ *
+ * We expect that mblen() does not need to examine more than the first byte
+ * of the character to discover the correct length. GB18030 is an exception
+ * to that rule, though, as it also looks at second byte. But even that
+ * behaves in a predictable way, if you only pass the first byte: it will
+ * treat 4-byte encoded characters as two 2-byte encoded characters, which is
+ * good enough for all current uses.
+ *
+ * Note: for the display output of psql to work properly, the return values
+ * of the dsplen functions must conform to the Unicode standard. In particular
+ * the NUL character is zero width and control characters are generally
+ * width -1. It is recommended that non-ASCII encodings refer their ASCII
+ * subset to the ASCII routines to ensure consistency.
+ */
+
+/*
+ * SQL/ASCII
+ */
+static int
+pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ *to++ = *from++;
+ len--;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+static int
+pg_ascii_mblen(const unsigned char *s)
+{
+ return 1;
+}
+
+static int
+pg_ascii_dsplen(const unsigned char *s)
+{
+ if (*s == '\0')
+ return 0;
+ if (*s < 0x20 || *s == 0x7f)
+ return -1;
+
+ return 1;
+}
+
+/*
+ * EUC
+ */
+static int
+pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
+ * KANA") */
+ {
+ from++;
+ *to = (SS2 << 8) | *from++;
+ len -= 2;
+ }
+ else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
+ {
+ from++;
+ *to = (SS3 << 16) | (*from++ << 8);
+ *to |= *from++;
+ len -= 3;
+ }
+ else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
+ {
+ *to = *from++ << 8;
+ *to |= *from++;
+ len -= 2;
+ }
+ else /* must be ASCII */
+ {
+ *to = *from++;
+ len--;
+ }
+ to++;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+static inline int
+pg_euc_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (*s == SS2)
+ len = 2;
+ else if (*s == SS3)
+ len = 3;
+ else if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = 1;
+ return len;
+}
+
+static inline int
+pg_euc_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (*s == SS2)
+ len = 2;
+ else if (*s == SS3)
+ len = 2;
+ else if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = pg_ascii_dsplen(s);
+ return len;
+}
+
+/*
+ * EUC_JP
+ */
+static int
+pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ return pg_euc2wchar_with_len(from, to, len);
+}
+
+static int
+pg_eucjp_mblen(const unsigned char *s)
+{
+ return pg_euc_mblen(s);
+}
+
+static int
+pg_eucjp_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (*s == SS2)
+ len = 1;
+ else if (*s == SS3)
+ len = 2;
+ else if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = pg_ascii_dsplen(s);
+ return len;
+}
+
+/*
+ * EUC_KR
+ */
+static int
+pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ return pg_euc2wchar_with_len(from, to, len);
+}
+
+static int
+pg_euckr_mblen(const unsigned char *s)
+{
+ return pg_euc_mblen(s);
+}
+
+static int
+pg_euckr_dsplen(const unsigned char *s)
+{
+ return pg_euc_dsplen(s);
+}
+
+/*
+ * EUC_CN
+ *
+ */
+static int
+pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
+ {
+ from++;
+ *to = (SS2 << 16) | (*from++ << 8);
+ *to |= *from++;
+ len -= 3;
+ }
+ else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
+ {
+ from++;
+ *to = (SS3 << 16) | (*from++ << 8);
+ *to |= *from++;
+ len -= 3;
+ }
+ else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
+ {
+ *to = *from++ << 8;
+ *to |= *from++;
+ len -= 2;
+ }
+ else
+ {
+ *to = *from++;
+ len--;
+ }
+ to++;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+static int
+pg_euccn_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = 1;
+ return len;
+}
+
+static int
+pg_euccn_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = pg_ascii_dsplen(s);
+ return len;
+}
+
+/*
+ * EUC_TW
+ *
+ */
+static int
+pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ if (*from == SS2 && len >= 4) /* code set 2 */
+ {
+ from++;
+ *to = (((uint32) SS2) << 24) | (*from++ << 16);
+ *to |= *from++ << 8;
+ *to |= *from++;
+ len -= 4;
+ }
+ else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
+ {
+ from++;
+ *to = (SS3 << 16) | (*from++ << 8);
+ *to |= *from++;
+ len -= 3;
+ }
+ else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
+ {
+ *to = *from++ << 8;
+ *to |= *from++;
+ len -= 2;
+ }
+ else
+ {
+ *to = *from++;
+ len--;
+ }
+ to++;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+static int
+pg_euctw_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (*s == SS2)
+ len = 4;
+ else if (*s == SS3)
+ len = 3;
+ else if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = 1;
+ return len;
+}
+
+static int
+pg_euctw_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (*s == SS2)
+ len = 2;
+ else if (*s == SS3)
+ len = 2;
+ else if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = pg_ascii_dsplen(s);
+ return len;
+}
+
+/*
+ * Convert pg_wchar to EUC_* encoding.
+ * caller must allocate enough space for "to", including a trailing zero!
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ unsigned char c;
+
+ if ((c = (*from >> 24)))
+ {
+ *to++ = c;
+ *to++ = (*from >> 16) & 0xff;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 4;
+ }
+ else if ((c = (*from >> 16)))
+ {
+ *to++ = c;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if ((c = (*from >> 8)))
+ {
+ *to++ = c;
+ *to++ = *from & 0xff;
+ cnt += 2;
+ }
+ else
+ {
+ *to++ = *from;
+ cnt++;
+ }
+ from++;
+ len--;
+ }
+ *to = 0;
+ return cnt;
+}
+
+
+/*
+ * JOHAB
+ */
+static int
+pg_johab_mblen(const unsigned char *s)
+{
+ return pg_euc_mblen(s);
+}
+
+static int
+pg_johab_dsplen(const unsigned char *s)
+{
+ return pg_euc_dsplen(s);
+}
+
+/*
+ * convert UTF8 string to pg_wchar (UCS-4)
+ * caller must allocate enough space for "to", including a trailing zero!
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ int cnt = 0;
+ uint32 c1,
+ c2,
+ c3,
+ c4;
+
+ while (len > 0 && *from)
+ {
+ if ((*from & 0x80) == 0)
+ {
+ *to = *from++;
+ len--;
+ }
+ else if ((*from & 0xe0) == 0xc0)
+ {
+ if (len < 2)
+ break; /* drop trailing incomplete char */
+ c1 = *from++ & 0x1f;
+ c2 = *from++ & 0x3f;
+ *to = (c1 << 6) | c2;
+ len -= 2;
+ }
+ else if ((*from & 0xf0) == 0xe0)
+ {
+ if (len < 3)
+ break; /* drop trailing incomplete char */
+ c1 = *from++ & 0x0f;
+ c2 = *from++ & 0x3f;
+ c3 = *from++ & 0x3f;
+ *to = (c1 << 12) | (c2 << 6) | c3;
+ len -= 3;
+ }
+ else if ((*from & 0xf8) == 0xf0)
+ {
+ if (len < 4)
+ break; /* drop trailing incomplete char */
+ c1 = *from++ & 0x07;
+ c2 = *from++ & 0x3f;
+ c3 = *from++ & 0x3f;
+ c4 = *from++ & 0x3f;
+ *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
+ len -= 4;
+ }
+ else
+ {
+ /* treat a bogus char as length 1; not ours to raise error */
+ *to = *from++;
+ len--;
+ }
+ to++;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+
+/*
+ * Map a Unicode code point to UTF-8. utf8string must have 4 bytes of
+ * space allocated.
+ */
+unsigned char *
+unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
+{
+ if (c <= 0x7F)
+ {
+ utf8string[0] = c;
+ }
+ else if (c <= 0x7FF)
+ {
+ utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
+ utf8string[1] = 0x80 | (c & 0x3F);
+ }
+ else if (c <= 0xFFFF)
+ {
+ utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
+ utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
+ utf8string[2] = 0x80 | (c & 0x3F);
+ }
+ else
+ {
+ utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
+ utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
+ utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
+ utf8string[3] = 0x80 | (c & 0x3F);
+ }
+
+ return utf8string;
+}
+
+/*
+ * Trivial conversion from pg_wchar to UTF-8.
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ int char_len;
+
+ unicode_to_utf8(*from, to);
+ char_len = pg_utf_mblen(to);
+ cnt += char_len;
+ to += char_len;
+ from++;
+ len--;
+ }
+ *to = 0;
+ return cnt;
+}
+
+/*
+ * Return the byte length of a UTF8 character pointed to by s
+ *
+ * Note: in the current implementation we do not support UTF8 sequences
+ * of more than 4 bytes; hence do NOT return a value larger than 4.
+ * We return "1" for any leading byte that is either flat-out illegal or
+ * indicates a length larger than we support.
+ *
+ * pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), and perhaps
+ * other places would need to be fixed to change this.
+ */
+int
+pg_utf_mblen(const unsigned char *s)
+{
+ int len;
+
+ if ((*s & 0x80) == 0)
+ len = 1;
+ else if ((*s & 0xe0) == 0xc0)
+ len = 2;
+ else if ((*s & 0xf0) == 0xe0)
+ len = 3;
+ else if ((*s & 0xf8) == 0xf0)
+ len = 4;
+#ifdef NOT_USED
+ else if ((*s & 0xfc) == 0xf8)
+ len = 5;
+ else if ((*s & 0xfe) == 0xfc)
+ len = 6;
+#endif
+ else
+ len = 1;
+ return len;
+}
+
+/*
+ * This is an implementation of wcwidth() and wcswidth() as defined in
+ * "The Single UNIX Specification, Version 2, The Open Group, 1997"
+ * <http://www.unix.org/online.html>
+ *
+ * Markus Kuhn -- 2001-09-08 -- public domain
+ *
+ * customised for PostgreSQL
+ *
+ * original available at : http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+ */
+
+struct mbinterval
+{
+ unsigned short first;
+ unsigned short last;
+};
+
+/* auxiliary function for binary search in interval table */
+static int
+mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
+{
+ int min = 0;
+ int mid;
+
+ if (ucs < table[0].first || ucs > table[max].last)
+ return 0;
+ while (max >= min)
+ {
+ mid = (min + max) / 2;
+ if (ucs > table[mid].last)
+ min = mid + 1;
+ else if (ucs < table[mid].first)
+ max = mid - 1;
+ else
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/* The following functions define the column width of an ISO 10646
+ * character as follows:
+ *
+ * - The null character (U+0000) has a column width of 0.
+ *
+ * - Other C0/C1 control characters and DEL will lead to a return
+ * value of -1.
+ *
+ * - Non-spacing and enclosing combining characters (general
+ * category code Mn or Me in the Unicode database) have a
+ * column width of 0.
+ *
+ * - Other format characters (general category code Cf in the Unicode
+ * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
+ *
+ * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
+ * have a column width of 0.
+ *
+ * - Spacing characters in the East Asian Wide (W) or East Asian
+ * FullWidth (F) category as defined in Unicode Technical
+ * Report #11 have a column width of 2.
+ *
+ * - All remaining characters (including all printable
+ * ISO 8859-1 and WGL4 characters, Unicode control characters,
+ * etc.) have a column width of 1.
+ *
+ * This implementation assumes that wchar_t characters are encoded
+ * in ISO 10646.
+ */
+
+static int
+ucs_wcwidth(pg_wchar ucs)
+{
+#include "common/unicode_combining_table.h"
+
+ /* test for 8-bit control characters */
+ if (ucs == 0)
+ return 0;
+
+ if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
+ return -1;
+
+ /* binary search in table of non-spacing characters */
+ if (mbbisearch(ucs, combining,
+ sizeof(combining) / sizeof(struct mbinterval) - 1))
+ return 0;
+
+ /*
+ * if we arrive here, ucs is not a combining or C0/C1 control character
+ */
+
+ return 1 +
+ (ucs >= 0x1100 &&
+ (ucs <= 0x115f || /* Hangul Jamo init. consonants */
+ (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
+ ucs != 0x303f) || /* CJK ... Yi */
+ (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
+ (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility
+ * Ideographs */
+ (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
+ (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
+ (ucs >= 0xffe0 && ucs <= 0xffe6) ||
+ (ucs >= 0x20000 && ucs <= 0x2ffff)));
+}
+
+/*
+ * Convert a UTF-8 character to a Unicode code point.
+ * This is a one-character version of pg_utf2wchar_with_len.
+ *
+ * No error checks here, c must point to a long-enough string.
+ */
+pg_wchar
+utf8_to_unicode(const unsigned char *c)
+{
+ if ((*c & 0x80) == 0)
+ return (pg_wchar) c[0];
+ else if ((*c & 0xe0) == 0xc0)
+ return (pg_wchar) (((c[0] & 0x1f) << 6) |
+ (c[1] & 0x3f));
+ else if ((*c & 0xf0) == 0xe0)
+ return (pg_wchar) (((c[0] & 0x0f) << 12) |
+ ((c[1] & 0x3f) << 6) |
+ (c[2] & 0x3f));
+ else if ((*c & 0xf8) == 0xf0)
+ return (pg_wchar) (((c[0] & 0x07) << 18) |
+ ((c[1] & 0x3f) << 12) |
+ ((c[2] & 0x3f) << 6) |
+ (c[3] & 0x3f));
+ else
+ /* that is an invalid code on purpose */
+ return 0xffffffff;
+}
+
+static int
+pg_utf_dsplen(const unsigned char *s)
+{
+ return ucs_wcwidth(utf8_to_unicode(s));
+}
+
+/*
+ * convert mule internal code to pg_wchar
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ if (IS_LC1(*from) && len >= 2)
+ {
+ *to = *from++ << 16;
+ *to |= *from++;
+ len -= 2;
+ }
+ else if (IS_LCPRV1(*from) && len >= 3)
+ {
+ from++;
+ *to = *from++ << 16;
+ *to |= *from++;
+ len -= 3;
+ }
+ else if (IS_LC2(*from) && len >= 3)
+ {
+ *to = *from++ << 16;
+ *to |= *from++ << 8;
+ *to |= *from++;
+ len -= 3;
+ }
+ else if (IS_LCPRV2(*from) && len >= 4)
+ {
+ from++;
+ *to = *from++ << 16;
+ *to |= *from++ << 8;
+ *to |= *from++;
+ len -= 4;
+ }
+ else
+ { /* assume ASCII */
+ *to = (unsigned char) *from++;
+ len--;
+ }
+ to++;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+/*
+ * convert pg_wchar to mule internal code
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ unsigned char lb;
+
+ lb = (*from >> 16) & 0xff;
+ if (IS_LC1(lb))
+ {
+ *to++ = lb;
+ *to++ = *from & 0xff;
+ cnt += 2;
+ }
+ else if (IS_LC2(lb))
+ {
+ *to++ = lb;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if (IS_LCPRV1_A_RANGE(lb))
+ {
+ *to++ = LCPRV1_A;
+ *to++ = lb;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if (IS_LCPRV1_B_RANGE(lb))
+ {
+ *to++ = LCPRV1_B;
+ *to++ = lb;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if (IS_LCPRV2_A_RANGE(lb))
+ {
+ *to++ = LCPRV2_A;
+ *to++ = lb;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 4;
+ }
+ else if (IS_LCPRV2_B_RANGE(lb))
+ {
+ *to++ = LCPRV2_B;
+ *to++ = lb;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 4;
+ }
+ else
+ {
+ *to++ = *from & 0xff;
+ cnt += 1;
+ }
+ from++;
+ len--;
+ }
+ *to = 0;
+ return cnt;
+}
+
+/* exported for direct use by conv.c */
+int
+pg_mule_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_LC1(*s))
+ len = 2;
+ else if (IS_LCPRV1(*s))
+ len = 3;
+ else if (IS_LC2(*s))
+ len = 3;
+ else if (IS_LCPRV2(*s))
+ len = 4;
+ else
+ len = 1; /* assume ASCII */
+ return len;
+}
+
+static int
+pg_mule_dsplen(const unsigned char *s)
+{
+ int len;
+
+ /*
+ * Note: it's not really appropriate to assume that all multibyte charsets
+ * are double-wide on screen. But this seems an okay approximation for
+ * the MULE charsets we currently support.
+ */
+
+ if (IS_LC1(*s))
+ len = 1;
+ else if (IS_LCPRV1(*s))
+ len = 1;
+ else if (IS_LC2(*s))
+ len = 2;
+ else if (IS_LCPRV2(*s))
+ len = 2;
+ else
+ len = 1; /* assume ASCII */
+
+ return len;
+}
+
+/*
+ * ISO8859-1
+ */
+static int
+pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ *to++ = *from++;
+ len--;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+/*
+ * Trivial conversion from pg_wchar to single byte encoding. Just ignores
+ * high bits.
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ *to++ = *from++;
+ len--;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+static int
+pg_latin1_mblen(const unsigned char *s)
+{
+ return 1;
+}
+
+static int
+pg_latin1_dsplen(const unsigned char *s)
+{
+ return pg_ascii_dsplen(s);
+}
+
+/*
+ * SJIS
+ */
+static int
+pg_sjis_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (*s >= 0xa1 && *s <= 0xdf)
+ len = 1; /* 1 byte kana? */
+ else if (IS_HIGHBIT_SET(*s))
+ len = 2; /* kanji? */
+ else
+ len = 1; /* should be ASCII */
+ return len;
+}
+
+static int
+pg_sjis_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (*s >= 0xa1 && *s <= 0xdf)
+ len = 1; /* 1 byte kana? */
+ else if (IS_HIGHBIT_SET(*s))
+ len = 2; /* kanji? */
+ else
+ len = pg_ascii_dsplen(s); /* should be ASCII */
+ return len;
+}
+
+/*
+ * Big5
+ */
+static int
+pg_big5_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2; /* kanji? */
+ else
+ len = 1; /* should be ASCII */
+ return len;
+}
+
+static int
+pg_big5_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2; /* kanji? */
+ else
+ len = pg_ascii_dsplen(s); /* should be ASCII */
+ return len;
+}
+
+/*
+ * GBK
+ */
+static int
+pg_gbk_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2; /* kanji? */
+ else
+ len = 1; /* should be ASCII */
+ return len;
+}
+
+static int
+pg_gbk_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2; /* kanji? */
+ else
+ len = pg_ascii_dsplen(s); /* should be ASCII */
+ return len;
+}
+
+/*
+ * UHC
+ */
+static int
+pg_uhc_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2; /* 2byte? */
+ else
+ len = 1; /* should be ASCII */
+ return len;
+}
+
+static int
+pg_uhc_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2; /* 2byte? */
+ else
+ len = pg_ascii_dsplen(s); /* should be ASCII */
+ return len;
+}
+
+/*
+ * GB18030
+ * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
+ */
+
+/*
+ * Unlike all other mblen() functions, this also looks at the second byte of
+ * the input. However, if you only pass the first byte of a multi-byte
+ * string, and \0 as the second byte, this still works in a predictable way:
+ * a 4-byte character will be reported as two 2-byte characters. That's
+ * enough for all current uses, as a client-only encoding. It works that
+ * way, because in any valid 4-byte GB18030-encoded character, the third and
+ * fourth byte look like a 2-byte encoded character, when looked at
+ * separately.
+ */
+static int
+pg_gb18030_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (!IS_HIGHBIT_SET(*s))
+ len = 1; /* ASCII */
+ else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
+ len = 4;
+ else
+ len = 2;
+ return len;
+}
+
+static int
+pg_gb18030_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = pg_ascii_dsplen(s); /* ASCII */
+ return len;
+}
+
+/*
+ *-------------------------------------------------------------------
+ * multibyte sequence validators
+ *
+ * These functions accept "s", a pointer to the first byte of a string,
+ * and "len", the remaining length of the string. If there is a validly
+ * encoded character beginning at *s, return its length in bytes; else
+ * return -1.
+ *
+ * The functions can assume that len > 0 and that *s != '\0', but they must
+ * test for and reject zeroes in any additional bytes of a multibyte character.
+ *
+ * Note that this definition allows the function for a single-byte
+ * encoding to be just "return 1".
+ *-------------------------------------------------------------------
+ */
+
+static int
+pg_ascii_verifier(const unsigned char *s, int len)
+{
+ return 1;
+}
+
+#define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
+
+static int
+pg_eucjp_verifier(const unsigned char *s, int len)
+{
+ int l;
+ unsigned char c1,
+ c2;
+
+ c1 = *s++;
+
+ switch (c1)
+ {
+ case SS2: /* JIS X 0201 */
+ l = 2;
+ if (l > len)
+ return -1;
+ c2 = *s++;
+ if (c2 < 0xa1 || c2 > 0xdf)
+ return -1;
+ break;
+
+ case SS3: /* JIS X 0212 */
+ l = 3;
+ if (l > len)
+ return -1;
+ c2 = *s++;
+ if (!IS_EUC_RANGE_VALID(c2))
+ return -1;
+ c2 = *s++;
+ if (!IS_EUC_RANGE_VALID(c2))
+ return -1;
+ break;
+
+ default:
+ if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
+ {
+ l = 2;
+ if (l > len)
+ return -1;
+ if (!IS_EUC_RANGE_VALID(c1))
+ return -1;
+ c2 = *s++;
+ if (!IS_EUC_RANGE_VALID(c2))
+ return -1;
+ }
+ else
+ /* must be ASCII */
+ {
+ l = 1;
+ }
+ break;
+ }
+
+ return l;
+}
+
+static int
+pg_euckr_verifier(const unsigned char *s, int len)
+{
+ int l;
+ unsigned char c1,
+ c2;
+
+ c1 = *s++;
+
+ if (IS_HIGHBIT_SET(c1))
+ {
+ l = 2;
+ if (l > len)
+ return -1;
+ if (!IS_EUC_RANGE_VALID(c1))
+ return -1;
+ c2 = *s++;
+ if (!IS_EUC_RANGE_VALID(c2))
+ return -1;
+ }
+ else
+ /* must be ASCII */
+ {
+ l = 1;
+ }
+
+ return l;
+}
+
+/* EUC-CN byte sequences are exactly same as EUC-KR */
+#define pg_euccn_verifier pg_euckr_verifier
+
+static int
+pg_euctw_verifier(const unsigned char *s, int len)
+{
+ int l;
+ unsigned char c1,
+ c2;
+
+ c1 = *s++;
+
+ switch (c1)
+ {
+ case SS2: /* CNS 11643 Plane 1-7 */
+ l = 4;
+ if (l > len)
+ return -1;
+ c2 = *s++;
+ if (c2 < 0xa1 || c2 > 0xa7)
+ return -1;
+ c2 = *s++;
+ if (!IS_EUC_RANGE_VALID(c2))
+ return -1;
+ c2 = *s++;
+ if (!IS_EUC_RANGE_VALID(c2))
+ return -1;
+ break;
+
+ case SS3: /* unused */
+ return -1;
+
+ default:
+ if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
+ {
+ l = 2;
+ if (l > len)
+ return -1;
+ /* no further range check on c1? */
+ c2 = *s++;
+ if (!IS_EUC_RANGE_VALID(c2))
+ return -1;
+ }
+ else
+ /* must be ASCII */
+ {
+ l = 1;
+ }
+ break;
+ }
+ return l;
+}
+
+static int
+pg_johab_verifier(const unsigned char *s, int len)
+{
+ int l,
+ mbl;
+ unsigned char c;
+
+ l = mbl = pg_johab_mblen(s);
+
+ if (len < l)
+ return -1;
+
+ if (!IS_HIGHBIT_SET(*s))
+ return mbl;
+
+ while (--l > 0)
+ {
+ c = *++s;
+ if (!IS_EUC_RANGE_VALID(c))
+ return -1;
+ }
+ return mbl;
+}
+
+static int
+pg_mule_verifier(const unsigned char *s, int len)
+{
+ int l,
+ mbl;
+ unsigned char c;
+
+ l = mbl = pg_mule_mblen(s);
+
+ if (len < l)
+ return -1;
+
+ while (--l > 0)
+ {
+ c = *++s;
+ if (!IS_HIGHBIT_SET(c))
+ return -1;
+ }
+ return mbl;
+}
+
+static int
+pg_latin1_verifier(const unsigned char *s, int len)
+{
+ return 1;
+}
+
+static int
+pg_sjis_verifier(const unsigned char *s, int len)
+{
+ int l,
+ mbl;
+ unsigned char c1,
+ c2;
+
+ l = mbl = pg_sjis_mblen(s);
+
+ if (len < l)
+ return -1;
+
+ if (l == 1) /* pg_sjis_mblen already verified it */
+ return mbl;
+
+ c1 = *s++;
+ c2 = *s;
+ if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
+ return -1;
+ return mbl;
+}
+
+static int
+pg_big5_verifier(const unsigned char *s, int len)
+{
+ int l,
+ mbl;
+
+ l = mbl = pg_big5_mblen(s);
+
+ if (len < l)
+ return -1;
+
+ while (--l > 0)
+ {
+ if (*++s == '\0')
+ return -1;
+ }
+
+ return mbl;
+}
+
+static int
+pg_gbk_verifier(const unsigned char *s, int len)
+{
+ int l,
+ mbl;
+
+ l = mbl = pg_gbk_mblen(s);
+
+ if (len < l)
+ return -1;
+
+ while (--l > 0)
+ {
+ if (*++s == '\0')
+ return -1;
+ }
+
+ return mbl;
+}
+
+static int
+pg_uhc_verifier(const unsigned char *s, int len)
+{
+ int l,
+ mbl;
+
+ l = mbl = pg_uhc_mblen(s);
+
+ if (len < l)
+ return -1;
+
+ while (--l > 0)
+ {
+ if (*++s == '\0')
+ return -1;
+ }
+
+ return mbl;
+}
+
+static int
+pg_gb18030_verifier(const unsigned char *s, int len)
+{
+ int l;
+
+ if (!IS_HIGHBIT_SET(*s))
+ l = 1; /* ASCII */
+ else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
+ {
+ /* Should be 4-byte, validate remaining bytes */
+ if (*s >= 0x81 && *s <= 0xfe &&
+ *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
+ *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
+ l = 4;
+ else
+ l = -1;
+ }
+ else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
+ {
+ /* Should be 2-byte, validate */
+ if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
+ (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
+ l = 2;
+ else
+ l = -1;
+ }
+ else
+ l = -1;
+ return l;
+}
+
+static int
+pg_utf8_verifier(const unsigned char *s, int len)
+{
+ int l = pg_utf_mblen(s);
+
+ if (len < l)
+ return -1;
+
+ if (!pg_utf8_islegal(s, l))
+ return -1;
+
+ return l;
+}
+
+/*
+ * Check for validity of a single UTF-8 encoded character
+ *
+ * This directly implements the rules in RFC3629. The bizarre-looking
+ * restrictions on the second byte are meant to ensure that there isn't
+ * more than one encoding of a given Unicode character point; that is,
+ * you may not use a longer-than-necessary byte sequence with high order
+ * zero bits to represent a character that would fit in fewer bytes.
+ * To do otherwise is to create security hazards (eg, create an apparent
+ * non-ASCII character that decodes to plain ASCII).
+ *
+ * length is assumed to have been obtained by pg_utf_mblen(), and the
+ * caller must have checked that that many bytes are present in the buffer.
+ */
+bool
+pg_utf8_islegal(const unsigned char *source, int length)
+{
+ unsigned char a;
+
+ switch (length)
+ {
+ default:
+ /* reject lengths 5 and 6 for now */
+ return false;
+ case 4:
+ a = source[3];
+ if (a < 0x80 || a > 0xBF)
+ return false;
+ /* FALL THRU */
+ case 3:
+ a = source[2];
+ if (a < 0x80 || a > 0xBF)
+ return false;
+ /* FALL THRU */
+ case 2:
+ a = source[1];
+ switch (*source)
+ {
+ case 0xE0:
+ if (a < 0xA0 || a > 0xBF)
+ return false;
+ break;
+ case 0xED:
+ if (a < 0x80 || a > 0x9F)
+ return false;
+ break;
+ case 0xF0:
+ if (a < 0x90 || a > 0xBF)
+ return false;
+ break;
+ case 0xF4:
+ if (a < 0x80 || a > 0x8F)
+ return false;
+ break;
+ default:
+ if (a < 0x80 || a > 0xBF)
+ return false;
+ break;
+ }
+ /* FALL THRU */
+ case 1:
+ a = *source;
+ if (a >= 0x80 && a < 0xC2)
+ return false;
+ if (a > 0xF4)
+ return false;
+ break;
+ }
+ return true;
+}
+
+
+/*
+ *-------------------------------------------------------------------
+ * encoding info table
+ * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
+ *-------------------------------------------------------------------
+ */
+const pg_wchar_tbl pg_wchar_table[] = {
+ {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* PG_SQL_ASCII */
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JP */
+ {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2}, /* PG_EUC_CN */
+ {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3}, /* PG_EUC_KR */
+ {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4}, /* PG_EUC_TW */
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JIS_2004 */
+ {pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4}, /* PG_UTF8 */
+ {pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4}, /* PG_MULE_INTERNAL */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN1 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN2 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN3 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN4 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN5 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN6 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN7 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN8 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN9 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN10 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1256 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1258 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN866 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN874 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8R */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1251 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1252 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-5 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-6 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-7 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-8 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1250 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1253 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1254 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1255 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1257 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8U */
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */
+ {0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */
+ {0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* PG_GBK */
+ {0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* PG_UHC */
+ {0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4}, /* PG_GB18030 */
+ {0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3}, /* PG_JOHAB */
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2} /* PG_SHIFT_JIS_2004 */
+};
+
+/*
+ * Returns the byte length of a multibyte character.
+ *
+ * Caution: when dealing with text that is not certainly valid in the
+ * specified encoding, the result may exceed the actual remaining
+ * string length. Callers that are not prepared to deal with that
+ * should use pg_encoding_mblen_bounded() instead.
+ */
+int
+pg_encoding_mblen(int encoding, const char *mbstr)
+{
+ return (PG_VALID_ENCODING(encoding) ?
+ pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
+ pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
+}
+
+/*
+ * Returns the byte length of a multibyte character; but not more than
+ * the distance to end of string.
+ */
+int
+pg_encoding_mblen_bounded(int encoding, const char *mbstr)
+{
+ return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr));
+}
+
+/*
+ * Returns the display length of a multibyte character.
+ */
+int
+pg_encoding_dsplen(int encoding, const char *mbstr)
+{
+ return (PG_VALID_ENCODING(encoding) ?
+ pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
+ pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
+}
+
+/*
+ * Verify the first multibyte character of the given string.
+ * Return its byte length if good, -1 if bad. (See comments above for
+ * full details of the mbverify API.)
+ */
+int
+pg_encoding_verifymb(int encoding, const char *mbstr, int len)
+{
+ return (PG_VALID_ENCODING(encoding) ?
+ pg_wchar_table[encoding].mbverify((const unsigned char *) mbstr, len) :
+ pg_wchar_table[PG_SQL_ASCII].mbverify((const unsigned char *) mbstr, len));
+}
+
+/*
+ * fetch maximum length of a given encoding
+ */
+int
+pg_encoding_max_length(int encoding)
+{
+ Assert(PG_VALID_ENCODING(encoding));
+
+ return pg_wchar_table[encoding].maxmblen;
+}