summaryrefslogtreecommitdiffstats
path: root/src/common
diff options
context:
space:
mode:
Diffstat (limited to 'src/common')
-rw-r--r--src/common/.gitignore1
-rw-r--r--src/common/Makefile197
-rw-r--r--src/common/archive.c60
-rw-r--r--src/common/base64.c242
-rw-r--r--src/common/checksum_helper.c232
-rw-r--r--src/common/compression.c476
-rw-r--r--src/common/config_info.c201
-rw-r--r--src/common/controldata_utils.c269
-rw-r--r--src/common/cryptohash.c273
-rw-r--r--src/common/cryptohash_openssl.c353
-rw-r--r--src/common/d2s.c1076
-rw-r--r--src/common/d2s_full_table.h358
-rw-r--r--src/common/d2s_intrinsics.h202
-rw-r--r--src/common/digit_table.h21
-rw-r--r--src/common/encnames.c598
-rw-r--r--src/common/exec.c719
-rw-r--r--src/common/f2s.c803
-rw-r--r--src/common/fe_memutils.c175
-rw-r--r--src/common/file_perm.c91
-rw-r--r--src/common/file_utils.c582
-rw-r--r--src/common/hashfn.c692
-rw-r--r--src/common/hmac.c330
-rw-r--r--src/common/hmac_openssl.c348
-rw-r--r--src/common/ip.c262
-rw-r--r--src/common/jsonapi.c1206
-rw-r--r--src/common/keywords.c48
-rw-r--r--src/common/kwlist_d.h1119
-rw-r--r--src/common/kwlookup.c85
-rw-r--r--src/common/link-canary.c36
-rw-r--r--src/common/logging.c334
-rw-r--r--src/common/md5.c439
-rw-r--r--src/common/md5_common.c172
-rw-r--r--src/common/md5_int.h85
-rw-r--r--src/common/meson.build177
-rw-r--r--src/common/percentrepl.c137
-rw-r--r--src/common/pg_get_line.c180
-rw-r--r--src/common/pg_lzcompress.c876
-rw-r--r--src/common/pg_prng.c282
-rw-r--r--src/common/pgfnames.c94
-rw-r--r--src/common/protocol_openssl.c117
-rw-r--r--src/common/psprintf.c151
-rw-r--r--src/common/relpath.c210
-rw-r--r--src/common/restricted_token.c174
-rw-r--r--src/common/rmtree.c130
-rw-r--r--src/common/ryu_common.h133
-rw-r--r--src/common/saslprep.c1245
-rw-r--r--src/common/scram-common.c330
-rw-r--r--src/common/sha1.c369
-rw-r--r--src/common/sha1_int.h81
-rw-r--r--src/common/sha2.c1017
-rw-r--r--src/common/sha2_int.h91
-rw-r--r--src/common/sprompt.c181
-rw-r--r--src/common/string.c164
-rw-r--r--src/common/stringinfo.c343
-rw-r--r--src/common/unicode/.gitignore9
-rw-r--r--src/common/unicode/Makefile72
-rw-r--r--src/common/unicode/README28
-rw-r--r--src/common/unicode/generate-norm_test_table.pl106
-rw-r--r--src/common/unicode/generate-unicode_east_asian_fw_table.pl76
-rw-r--r--src/common/unicode/generate-unicode_nonspacing_table.pl53
-rw-r--r--src/common/unicode/generate-unicode_norm_table.pl412
-rw-r--r--src/common/unicode/generate-unicode_normprops_table.pl125
-rw-r--r--src/common/unicode/meson.build111
-rw-r--r--src/common/unicode/norm_test.c86
-rw-r--r--src/common/unicode_norm.c634
-rw-r--r--src/common/username.c87
-rw-r--r--src/common/wait_error.c148
-rw-r--r--src/common/wchar.c2194
68 files changed, 22708 insertions, 0 deletions
diff --git a/src/common/.gitignore b/src/common/.gitignore
new file mode 100644
index 0000000..ffa3284
--- /dev/null
+++ b/src/common/.gitignore
@@ -0,0 +1 @@
+/kwlist_d.h
diff --git a/src/common/Makefile b/src/common/Makefile
new file mode 100644
index 0000000..113029b
--- /dev/null
+++ b/src/common/Makefile
@@ -0,0 +1,197 @@
+#-------------------------------------------------------------------------
+#
+# Makefile
+# Makefile for src/common
+#
+# These files are used by the Postgres backend, and also by frontend
+# programs. These files provide common functionality that isn't directly
+# concerned with portability and thus doesn't belong in src/port.
+#
+# This makefile generates three outputs:
+#
+# libpgcommon.a - contains object files with FRONTEND defined,
+# for use by client applications
+#
+# libpgcommon_shlib.a - contains object files with FRONTEND defined,
+# built suitably for use in shared libraries; for use
+# by frontend libraries
+#
+# libpgcommon_srv.a - contains object files without FRONTEND defined,
+# for use only by the backend
+#
+# IDENTIFICATION
+# src/common/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/common
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+# don't include subdirectory-path-dependent -I and -L switches
+STD_CPPFLAGS := $(filter-out -I$(top_srcdir)/src/include -I$(top_builddir)/src/include,$(CPPFLAGS))
+STD_LDFLAGS := $(filter-out -L$(top_builddir)/src/common -L$(top_builddir)/src/port,$(LDFLAGS))
+override CPPFLAGS += -DVAL_CC="\"$(CC)\""
+override CPPFLAGS += -DVAL_CPPFLAGS="\"$(STD_CPPFLAGS)\""
+override CPPFLAGS += -DVAL_CFLAGS="\"$(CFLAGS)\""
+override CPPFLAGS += -DVAL_CFLAGS_SL="\"$(CFLAGS_SL)\""
+override CPPFLAGS += -DVAL_LDFLAGS="\"$(STD_LDFLAGS)\""
+override CPPFLAGS += -DVAL_LDFLAGS_EX="\"$(LDFLAGS_EX)\""
+override CPPFLAGS += -DVAL_LDFLAGS_SL="\"$(LDFLAGS_SL)\""
+override CPPFLAGS += -DVAL_LIBS="\"$(LIBS)\""
+
+override CPPFLAGS := -DFRONTEND -I. -I$(top_srcdir)/src/common $(CPPFLAGS)
+LIBS += $(PTHREAD_LIBS)
+
+# If you add objects here, see also src/tools/msvc/Mkvcbuild.pm
+
+OBJS_COMMON = \
+ archive.o \
+ base64.o \
+ checksum_helper.o \
+ compression.o \
+ config_info.o \
+ controldata_utils.o \
+ d2s.o \
+ encnames.o \
+ exec.o \
+ f2s.o \
+ file_perm.o \
+ file_utils.o \
+ hashfn.o \
+ ip.o \
+ jsonapi.o \
+ keywords.o \
+ kwlookup.o \
+ link-canary.o \
+ md5_common.o \
+ percentrepl.o \
+ pg_get_line.o \
+ pg_lzcompress.o \
+ pg_prng.o \
+ pgfnames.o \
+ psprintf.o \
+ relpath.o \
+ rmtree.o \
+ saslprep.o \
+ scram-common.o \
+ string.o \
+ stringinfo.o \
+ unicode_norm.o \
+ username.o \
+ wait_error.o \
+ wchar.o
+
+ifeq ($(with_ssl),openssl)
+OBJS_COMMON += \
+ cryptohash_openssl.o \
+ hmac_openssl.o \
+ protocol_openssl.o
+else
+OBJS_COMMON += \
+ cryptohash.o \
+ hmac.o \
+ md5.o \
+ sha1.o \
+ sha2.o
+endif
+
+# A few files are currently only built for frontend, not server
+# (Mkvcbuild.pm has a copy of this list, too). logging.c is excluded
+# from OBJS_FRONTEND_SHLIB (shared library) as a matter of policy,
+# because it is not appropriate for general purpose libraries such
+# as libpq to report errors directly.
+OBJS_FRONTEND_SHLIB = \
+ $(OBJS_COMMON) \
+ fe_memutils.o \
+ restricted_token.o \
+ sprompt.o
+OBJS_FRONTEND = \
+ $(OBJS_FRONTEND_SHLIB) \
+ logging.o
+
+# foo.o, foo_shlib.o, and foo_srv.o are all built from foo.c
+OBJS_SHLIB = $(OBJS_FRONTEND_SHLIB:%.o=%_shlib.o)
+OBJS_SRV = $(OBJS_COMMON:%.o=%_srv.o)
+
+# where to find gen_keywordlist.pl and subsidiary files
+TOOLSDIR = $(top_srcdir)/src/tools
+GEN_KEYWORDLIST = $(PERL) -I $(TOOLSDIR) $(TOOLSDIR)/gen_keywordlist.pl
+GEN_KEYWORDLIST_DEPS = $(TOOLSDIR)/gen_keywordlist.pl $(TOOLSDIR)/PerfectHash.pm
+
+all: libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a
+
+distprep: kwlist_d.h
+
+# libpgcommon is needed by some contrib
+install: all installdirs
+ $(INSTALL_STLIB) libpgcommon.a '$(DESTDIR)$(libdir)/libpgcommon.a'
+ $(INSTALL_STLIB) libpgcommon_shlib.a '$(DESTDIR)$(libdir)/libpgcommon_shlib.a'
+
+installdirs:
+ $(MKDIR_P) '$(DESTDIR)$(libdir)'
+
+uninstall:
+ rm -f '$(DESTDIR)$(libdir)/libpgcommon.a'
+ rm -f '$(DESTDIR)$(libdir)/libpgcommon_shlib.a'
+
+libpgcommon.a: $(OBJS_FRONTEND)
+ rm -f $@
+ $(AR) $(AROPT) $@ $^
+
+#
+# Shared library versions of object files
+#
+
+libpgcommon_shlib.a: $(OBJS_SHLIB)
+ rm -f $@
+ $(AR) $(AROPT) $@ $^
+
+# Because this uses its own compilation rule, it doesn't use the
+# dependency tracking logic from Makefile.global. To make sure that
+# dependency tracking works anyway for the *_shlib.o files, depend on
+# their *.o siblings as well, which do have proper dependencies. It's
+# a hack that might fail someday if there is a *_shlib.o without a
+# corresponding *.o, but there seems little reason for that.
+%_shlib.o: %.c %.o
+ $(CC) $(CFLAGS) $(CFLAGS_SL) $(CPPFLAGS) -c $< -o $@
+
+#
+# Server versions of object files
+#
+
+libpgcommon_srv.a: $(OBJS_SRV)
+ rm -f $@
+ $(AR) $(AROPT) $@ $^
+
+# Because this uses its own compilation rule, it doesn't use the
+# dependency tracking logic from Makefile.global. To make sure that
+# dependency tracking works anyway for the *_srv.o files, depend on
+# their *.o siblings as well, which do have proper dependencies. It's
+# a hack that might fail someday if there is a *_srv.o without a
+# corresponding *.o, but it works for now.
+%_srv.o: %.c %.o
+ $(CC) $(CFLAGS) $(subst -DFRONTEND,, $(CPPFLAGS)) -c $< -o $@
+
+# generate SQL keyword lookup table to be included into keywords*.o.
+kwlist_d.h: $(top_srcdir)/src/include/parser/kwlist.h $(GEN_KEYWORDLIST_DEPS)
+ $(GEN_KEYWORDLIST) --extern $<
+
+# Dependencies of keywords*.o need to be managed explicitly to make sure
+# that you don't get broken parsing code, even in a non-enable-depend build.
+keywords.o keywords_shlib.o keywords_srv.o: kwlist_d.h
+
+# The code imported from Ryu gets a pass on declaration-after-statement,
+# in order to keep it more closely aligned with its upstream.
+RYU_FILES = d2s.o f2s.o
+RYU_OBJS = $(RYU_FILES) $(RYU_FILES:%.o=%_shlib.o) $(RYU_FILES:%.o=%_srv.o)
+
+$(RYU_OBJS): CFLAGS += $(PERMIT_DECLARATION_AFTER_STATEMENT)
+
+# kwlist_d.h is in the distribution tarball, so it is not cleaned here.
+clean distclean:
+ rm -f libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a
+ rm -f $(OBJS_FRONTEND) $(OBJS_SHLIB) $(OBJS_SRV)
+
+maintainer-clean: distclean
+ rm -f kwlist_d.h
diff --git a/src/common/archive.c b/src/common/archive.c
new file mode 100644
index 0000000..641a58e
--- /dev/null
+++ b/src/common/archive.c
@@ -0,0 +1,60 @@
+/*-------------------------------------------------------------------------
+ *
+ * archive.c
+ * Common WAL archive routines
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/archive.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/archive.h"
+#include "common/percentrepl.h"
+
+/*
+ * BuildRestoreCommand
+ *
+ * Builds a restore command to retrieve a file from WAL archives, replacing
+ * the supported aliases with values supplied by the caller as defined by
+ * the GUC parameter restore_command: xlogpath for %p, xlogfname for %f and
+ * lastRestartPointFname for %r.
+ *
+ * The result is a palloc'd string for the restore command built. The
+ * caller is responsible for freeing it. If any of the required arguments
+ * is NULL and that the corresponding alias is found in the command given
+ * by the caller, then an error is thrown.
+ */
+char *
+BuildRestoreCommand(const char *restoreCommand,
+ const char *xlogpath,
+ const char *xlogfname,
+ const char *lastRestartPointFname)
+{
+ char *nativePath = NULL;
+ char *result;
+
+ if (xlogpath)
+ {
+ nativePath = pstrdup(xlogpath);
+ make_native_path(nativePath);
+ }
+
+ result = replace_percent_placeholders(restoreCommand, "restore_command", "frp",
+ xlogfname, lastRestartPointFname, nativePath);
+
+ if (nativePath)
+ pfree(nativePath);
+
+ return result;
+}
diff --git a/src/common/base64.c b/src/common/base64.c
new file mode 100644
index 0000000..ec4eb49
--- /dev/null
+++ b/src/common/base64.c
@@ -0,0 +1,242 @@
+/*-------------------------------------------------------------------------
+ *
+ * base64.c
+ * Encoding and decoding routines for base64 without whitespace.
+ *
+ * Copyright (c) 2001-2023, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/common/base64.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/base64.h"
+
+/*
+ * BASE64
+ */
+
+static const char _base64[] =
+"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+static const int8 b64lookup[128] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
+ -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
+};
+
+/*
+ * pg_b64_encode
+ *
+ * Encode into base64 the given string. Returns the length of the encoded
+ * string, and -1 in the event of an error with the result buffer zeroed
+ * for safety.
+ */
+int
+pg_b64_encode(const char *src, int len, char *dst, int dstlen)
+{
+ char *p;
+ const char *s,
+ *end = src + len;
+ int pos = 2;
+ uint32 buf = 0;
+
+ s = src;
+ p = dst;
+
+ while (s < end)
+ {
+ buf |= (unsigned char) *s << (pos << 3);
+ pos--;
+ s++;
+
+ /* write it out */
+ if (pos < 0)
+ {
+ /*
+ * Leave if there is an overflow in the area allocated for the
+ * encoded string.
+ */
+ if ((p - dst + 4) > dstlen)
+ goto error;
+
+ *p++ = _base64[(buf >> 18) & 0x3f];
+ *p++ = _base64[(buf >> 12) & 0x3f];
+ *p++ = _base64[(buf >> 6) & 0x3f];
+ *p++ = _base64[buf & 0x3f];
+
+ pos = 2;
+ buf = 0;
+ }
+ }
+ if (pos != 2)
+ {
+ /*
+ * Leave if there is an overflow in the area allocated for the encoded
+ * string.
+ */
+ if ((p - dst + 4) > dstlen)
+ goto error;
+
+ *p++ = _base64[(buf >> 18) & 0x3f];
+ *p++ = _base64[(buf >> 12) & 0x3f];
+ *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
+ *p++ = '=';
+ }
+
+ Assert((p - dst) <= dstlen);
+ return p - dst;
+
+error:
+ memset(dst, 0, dstlen);
+ return -1;
+}
+
+/*
+ * pg_b64_decode
+ *
+ * Decode the given base64 string. Returns the length of the decoded
+ * string on success, and -1 in the event of an error with the result
+ * buffer zeroed for safety.
+ */
+int
+pg_b64_decode(const char *src, int len, char *dst, int dstlen)
+{
+ const char *srcend = src + len,
+ *s = src;
+ char *p = dst;
+ char c;
+ int b = 0;
+ uint32 buf = 0;
+ int pos = 0,
+ end = 0;
+
+ while (s < srcend)
+ {
+ c = *s++;
+
+ /* Leave if a whitespace is found */
+ if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
+ goto error;
+
+ if (c == '=')
+ {
+ /* end sequence */
+ if (!end)
+ {
+ if (pos == 2)
+ end = 1;
+ else if (pos == 3)
+ end = 2;
+ else
+ {
+ /*
+ * Unexpected "=" character found while decoding base64
+ * sequence.
+ */
+ goto error;
+ }
+ }
+ b = 0;
+ }
+ else
+ {
+ b = -1;
+ if (c > 0 && c < 127)
+ b = b64lookup[(unsigned char) c];
+ if (b < 0)
+ {
+ /* invalid symbol found */
+ goto error;
+ }
+ }
+ /* add it to buffer */
+ buf = (buf << 6) + b;
+ pos++;
+ if (pos == 4)
+ {
+ /*
+ * Leave if there is an overflow in the area allocated for the
+ * decoded string.
+ */
+ if ((p - dst + 1) > dstlen)
+ goto error;
+ *p++ = (buf >> 16) & 255;
+
+ if (end == 0 || end > 1)
+ {
+ /* overflow check */
+ if ((p - dst + 1) > dstlen)
+ goto error;
+ *p++ = (buf >> 8) & 255;
+ }
+ if (end == 0 || end > 2)
+ {
+ /* overflow check */
+ if ((p - dst + 1) > dstlen)
+ goto error;
+ *p++ = buf & 255;
+ }
+ buf = 0;
+ pos = 0;
+ }
+ }
+
+ if (pos != 0)
+ {
+ /*
+ * base64 end sequence is invalid. Input data is missing padding, is
+ * truncated or is otherwise corrupted.
+ */
+ goto error;
+ }
+
+ Assert((p - dst) <= dstlen);
+ return p - dst;
+
+error:
+ memset(dst, 0, dstlen);
+ return -1;
+}
+
+/*
+ * pg_b64_enc_len
+ *
+ * Returns to caller the length of the string if it were encoded with
+ * base64 based on the length provided by caller. This is useful to
+ * estimate how large a buffer allocation needs to be done before doing
+ * the actual encoding.
+ */
+int
+pg_b64_enc_len(int srclen)
+{
+ /* 3 bytes will be converted to 4 */
+ return (srclen + 2) / 3 * 4;
+}
+
+/*
+ * pg_b64_dec_len
+ *
+ * Returns to caller the length of the string if it were to be decoded
+ * with base64, based on the length given by caller. This is useful to
+ * estimate how large a buffer allocation needs to be done before doing
+ * the actual decoding.
+ */
+int
+pg_b64_dec_len(int srclen)
+{
+ return (srclen * 3) >> 2;
+}
diff --git a/src/common/checksum_helper.c b/src/common/checksum_helper.c
new file mode 100644
index 0000000..21ff895
--- /dev/null
+++ b/src/common/checksum_helper.c
@@ -0,0 +1,232 @@
+/*-------------------------------------------------------------------------
+ *
+ * checksum_helper.c
+ * Compute a checksum of any of various types using common routines
+ *
+ * Portions Copyright (c) 2016-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/checksum_helper.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/checksum_helper.h"
+
+/*
+ * If 'name' is a recognized checksum type, set *type to the corresponding
+ * constant and return true. Otherwise, set *type to CHECKSUM_TYPE_NONE and
+ * return false.
+ */
+bool
+pg_checksum_parse_type(char *name, pg_checksum_type *type)
+{
+ pg_checksum_type result_type = CHECKSUM_TYPE_NONE;
+ bool result = true;
+
+ if (pg_strcasecmp(name, "none") == 0)
+ result_type = CHECKSUM_TYPE_NONE;
+ else if (pg_strcasecmp(name, "crc32c") == 0)
+ result_type = CHECKSUM_TYPE_CRC32C;
+ else if (pg_strcasecmp(name, "sha224") == 0)
+ result_type = CHECKSUM_TYPE_SHA224;
+ else if (pg_strcasecmp(name, "sha256") == 0)
+ result_type = CHECKSUM_TYPE_SHA256;
+ else if (pg_strcasecmp(name, "sha384") == 0)
+ result_type = CHECKSUM_TYPE_SHA384;
+ else if (pg_strcasecmp(name, "sha512") == 0)
+ result_type = CHECKSUM_TYPE_SHA512;
+ else
+ result = false;
+
+ *type = result_type;
+ return result;
+}
+
+/*
+ * Get the canonical human-readable name corresponding to a checksum type.
+ */
+char *
+pg_checksum_type_name(pg_checksum_type type)
+{
+ switch (type)
+ {
+ case CHECKSUM_TYPE_NONE:
+ return "NONE";
+ case CHECKSUM_TYPE_CRC32C:
+ return "CRC32C";
+ case CHECKSUM_TYPE_SHA224:
+ return "SHA224";
+ case CHECKSUM_TYPE_SHA256:
+ return "SHA256";
+ case CHECKSUM_TYPE_SHA384:
+ return "SHA384";
+ case CHECKSUM_TYPE_SHA512:
+ return "SHA512";
+ }
+
+ Assert(false);
+ return "???";
+}
+
+/*
+ * Initialize a checksum context for checksums of the given type.
+ * Returns 0 for a success, -1 for a failure.
+ */
+int
+pg_checksum_init(pg_checksum_context *context, pg_checksum_type type)
+{
+ context->type = type;
+
+ switch (type)
+ {
+ case CHECKSUM_TYPE_NONE:
+ /* do nothing */
+ break;
+ case CHECKSUM_TYPE_CRC32C:
+ INIT_CRC32C(context->raw_context.c_crc32c);
+ break;
+ case CHECKSUM_TYPE_SHA224:
+ context->raw_context.c_sha2 = pg_cryptohash_create(PG_SHA224);
+ if (context->raw_context.c_sha2 == NULL)
+ return -1;
+ if (pg_cryptohash_init(context->raw_context.c_sha2) < 0)
+ {
+ pg_cryptohash_free(context->raw_context.c_sha2);
+ return -1;
+ }
+ break;
+ case CHECKSUM_TYPE_SHA256:
+ context->raw_context.c_sha2 = pg_cryptohash_create(PG_SHA256);
+ if (context->raw_context.c_sha2 == NULL)
+ return -1;
+ if (pg_cryptohash_init(context->raw_context.c_sha2) < 0)
+ {
+ pg_cryptohash_free(context->raw_context.c_sha2);
+ return -1;
+ }
+ break;
+ case CHECKSUM_TYPE_SHA384:
+ context->raw_context.c_sha2 = pg_cryptohash_create(PG_SHA384);
+ if (context->raw_context.c_sha2 == NULL)
+ return -1;
+ if (pg_cryptohash_init(context->raw_context.c_sha2) < 0)
+ {
+ pg_cryptohash_free(context->raw_context.c_sha2);
+ return -1;
+ }
+ break;
+ case CHECKSUM_TYPE_SHA512:
+ context->raw_context.c_sha2 = pg_cryptohash_create(PG_SHA512);
+ if (context->raw_context.c_sha2 == NULL)
+ return -1;
+ if (pg_cryptohash_init(context->raw_context.c_sha2) < 0)
+ {
+ pg_cryptohash_free(context->raw_context.c_sha2);
+ return -1;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * Update a checksum context with new data.
+ * Returns 0 for a success, -1 for a failure.
+ */
+int
+pg_checksum_update(pg_checksum_context *context, const uint8 *input,
+ size_t len)
+{
+ switch (context->type)
+ {
+ case CHECKSUM_TYPE_NONE:
+ /* do nothing */
+ break;
+ case CHECKSUM_TYPE_CRC32C:
+ COMP_CRC32C(context->raw_context.c_crc32c, input, len);
+ break;
+ case CHECKSUM_TYPE_SHA224:
+ case CHECKSUM_TYPE_SHA256:
+ case CHECKSUM_TYPE_SHA384:
+ case CHECKSUM_TYPE_SHA512:
+ if (pg_cryptohash_update(context->raw_context.c_sha2, input, len) < 0)
+ return -1;
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * Finalize a checksum computation and write the result to an output buffer.
+ *
+ * The caller must ensure that the buffer is at least PG_CHECKSUM_MAX_LENGTH
+ * bytes in length. The return value is the number of bytes actually written,
+ * or -1 for a failure.
+ */
+int
+pg_checksum_final(pg_checksum_context *context, uint8 *output)
+{
+ int retval = 0;
+
+ StaticAssertDecl(sizeof(pg_crc32c) <= PG_CHECKSUM_MAX_LENGTH,
+ "CRC-32C digest too big for PG_CHECKSUM_MAX_LENGTH");
+ StaticAssertDecl(PG_SHA224_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH,
+ "SHA224 digest too big for PG_CHECKSUM_MAX_LENGTH");
+ StaticAssertDecl(PG_SHA256_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH,
+ "SHA256 digest too big for PG_CHECKSUM_MAX_LENGTH");
+ StaticAssertDecl(PG_SHA384_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH,
+ "SHA384 digest too big for PG_CHECKSUM_MAX_LENGTH");
+ StaticAssertDecl(PG_SHA512_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH,
+ "SHA512 digest too big for PG_CHECKSUM_MAX_LENGTH");
+
+ switch (context->type)
+ {
+ case CHECKSUM_TYPE_NONE:
+ break;
+ case CHECKSUM_TYPE_CRC32C:
+ FIN_CRC32C(context->raw_context.c_crc32c);
+ retval = sizeof(pg_crc32c);
+ memcpy(output, &context->raw_context.c_crc32c, retval);
+ break;
+ case CHECKSUM_TYPE_SHA224:
+ retval = PG_SHA224_DIGEST_LENGTH;
+ if (pg_cryptohash_final(context->raw_context.c_sha2,
+ output, retval) < 0)
+ return -1;
+ pg_cryptohash_free(context->raw_context.c_sha2);
+ break;
+ case CHECKSUM_TYPE_SHA256:
+ retval = PG_SHA256_DIGEST_LENGTH;
+ if (pg_cryptohash_final(context->raw_context.c_sha2,
+ output, retval) < 0)
+ return -1;
+ pg_cryptohash_free(context->raw_context.c_sha2);
+ break;
+ case CHECKSUM_TYPE_SHA384:
+ retval = PG_SHA384_DIGEST_LENGTH;
+ if (pg_cryptohash_final(context->raw_context.c_sha2,
+ output, retval) < 0)
+ return -1;
+ pg_cryptohash_free(context->raw_context.c_sha2);
+ break;
+ case CHECKSUM_TYPE_SHA512:
+ retval = PG_SHA512_DIGEST_LENGTH;
+ if (pg_cryptohash_final(context->raw_context.c_sha2,
+ output, retval) < 0)
+ return -1;
+ pg_cryptohash_free(context->raw_context.c_sha2);
+ break;
+ }
+
+ Assert(retval <= PG_CHECKSUM_MAX_LENGTH);
+ return retval;
+}
diff --git a/src/common/compression.c b/src/common/compression.c
new file mode 100644
index 0000000..ee93762
--- /dev/null
+++ b/src/common/compression.c
@@ -0,0 +1,476 @@
+/*-------------------------------------------------------------------------
+ *
+ * compression.c
+ *
+ * Shared code for compression methods and specifications.
+ *
+ * A compression specification specifies the parameters that should be used
+ * when performing compression with a specific algorithm. The simplest
+ * possible compression specification is an integer, which sets the
+ * compression level.
+ *
+ * Otherwise, a compression specification is a comma-separated list of items,
+ * each having the form keyword or keyword=value.
+ *
+ * Currently, the supported keywords are "level", "long", and "workers".
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/compression.c
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#ifdef USE_ZSTD
+#include <zstd.h>
+#endif
+#ifdef HAVE_LIBZ
+#include <zlib.h>
+#endif
+
+#include "common/compression.h"
+
+static int expect_integer_value(char *keyword, char *value,
+ pg_compress_specification *result);
+static bool expect_boolean_value(char *keyword, char *value,
+ pg_compress_specification *result);
+
+/*
+ * Look up a compression algorithm by name. Returns true and sets *algorithm
+ * if the name is recognized. Otherwise returns false.
+ */
+bool
+parse_compress_algorithm(char *name, pg_compress_algorithm *algorithm)
+{
+ if (strcmp(name, "none") == 0)
+ *algorithm = PG_COMPRESSION_NONE;
+ else if (strcmp(name, "gzip") == 0)
+ *algorithm = PG_COMPRESSION_GZIP;
+ else if (strcmp(name, "lz4") == 0)
+ *algorithm = PG_COMPRESSION_LZ4;
+ else if (strcmp(name, "zstd") == 0)
+ *algorithm = PG_COMPRESSION_ZSTD;
+ else
+ return false;
+ return true;
+}
+
+/*
+ * Get the human-readable name corresponding to a particular compression
+ * algorithm.
+ */
+const char *
+get_compress_algorithm_name(pg_compress_algorithm algorithm)
+{
+ switch (algorithm)
+ {
+ case PG_COMPRESSION_NONE:
+ return "none";
+ case PG_COMPRESSION_GZIP:
+ return "gzip";
+ case PG_COMPRESSION_LZ4:
+ return "lz4";
+ case PG_COMPRESSION_ZSTD:
+ return "zstd";
+ /* no default, to provoke compiler warnings if values are added */
+ }
+ Assert(false);
+ return "???"; /* placate compiler */
+}
+
+/*
+ * Parse a compression specification for a specified algorithm.
+ *
+ * See the file header comments for a brief description of what a compression
+ * specification is expected to look like.
+ *
+ * On return, all fields of the result object will be initialized.
+ * In particular, result->parse_error will be NULL if no errors occurred
+ * during parsing, and will otherwise contain an appropriate error message.
+ * The caller may free this error message string using pfree, if desired.
+ * Note, however, even if there's no parse error, the string might not make
+ * sense: e.g. for gzip, level=12 is not sensible, but it does parse OK.
+ *
+ * The compression level is assigned by default if not directly specified
+ * by the specification.
+ *
+ * Use validate_compress_specification() to find out whether a compression
+ * specification is semantically sensible.
+ */
+void
+parse_compress_specification(pg_compress_algorithm algorithm, char *specification,
+ pg_compress_specification *result)
+{
+ int bare_level;
+ char *bare_level_endp;
+
+ /* Initial setup of result object. */
+ result->algorithm = algorithm;
+ result->options = 0;
+ result->parse_error = NULL;
+
+ /*
+ * Assign a default level depending on the compression method. This may
+ * be enforced later.
+ */
+ switch (result->algorithm)
+ {
+ case PG_COMPRESSION_NONE:
+ result->level = 0;
+ break;
+ case PG_COMPRESSION_LZ4:
+#ifdef USE_LZ4
+ result->level = 0; /* fast compression mode */
+#else
+ result->parse_error =
+ psprintf(_("this build does not support compression with %s"),
+ "LZ4");
+#endif
+ break;
+ case PG_COMPRESSION_ZSTD:
+#ifdef USE_ZSTD
+ result->level = ZSTD_CLEVEL_DEFAULT;
+#else
+ result->parse_error =
+ psprintf(_("this build does not support compression with %s"),
+ "ZSTD");
+#endif
+ break;
+ case PG_COMPRESSION_GZIP:
+#ifdef HAVE_LIBZ
+ result->level = Z_DEFAULT_COMPRESSION;
+#else
+ result->parse_error =
+ psprintf(_("this build does not support compression with %s"),
+ "gzip");
+#endif
+ break;
+ }
+
+ /* If there is no specification, we're done already. */
+ if (specification == NULL)
+ return;
+
+ /* As a special case, the specification can be a bare integer. */
+ bare_level = strtol(specification, &bare_level_endp, 10);
+ if (specification != bare_level_endp && *bare_level_endp == '\0')
+ {
+ result->level = bare_level;
+ return;
+ }
+
+ /* Look for comma-separated keyword or keyword=value entries. */
+ while (1)
+ {
+ char *kwstart;
+ char *kwend;
+ char *vstart;
+ char *vend;
+ int kwlen;
+ int vlen;
+ bool has_value;
+ char *keyword;
+ char *value;
+
+ /* Figure start, end, and length of next keyword and any value. */
+ kwstart = kwend = specification;
+ while (*kwend != '\0' && *kwend != ',' && *kwend != '=')
+ ++kwend;
+ kwlen = kwend - kwstart;
+ if (*kwend != '=')
+ {
+ vstart = vend = NULL;
+ vlen = 0;
+ has_value = false;
+ }
+ else
+ {
+ vstart = vend = kwend + 1;
+ while (*vend != '\0' && *vend != ',')
+ ++vend;
+ vlen = vend - vstart;
+ has_value = true;
+ }
+
+ /* Reject empty keyword. */
+ if (kwlen == 0)
+ {
+ result->parse_error =
+ pstrdup(_("found empty string where a compression option was expected"));
+ break;
+ }
+
+ /* Extract keyword and value as separate C strings. */
+ keyword = palloc(kwlen + 1);
+ memcpy(keyword, kwstart, kwlen);
+ keyword[kwlen] = '\0';
+ if (!has_value)
+ value = NULL;
+ else
+ {
+ value = palloc(vlen + 1);
+ memcpy(value, vstart, vlen);
+ value[vlen] = '\0';
+ }
+
+ /* Handle whatever keyword we found. */
+ if (strcmp(keyword, "level") == 0)
+ {
+ result->level = expect_integer_value(keyword, value, result);
+
+ /*
+ * No need to set a flag in "options", there is a default level
+ * set at least thanks to the logic above.
+ */
+ }
+ else if (strcmp(keyword, "workers") == 0)
+ {
+ result->workers = expect_integer_value(keyword, value, result);
+ result->options |= PG_COMPRESSION_OPTION_WORKERS;
+ }
+ else if (strcmp(keyword, "long") == 0)
+ {
+ result->long_distance = expect_boolean_value(keyword, value, result);
+ result->options |= PG_COMPRESSION_OPTION_LONG_DISTANCE;
+ }
+ else
+ result->parse_error =
+ psprintf(_("unrecognized compression option: \"%s\""), keyword);
+
+ /* Release memory, just to be tidy. */
+ pfree(keyword);
+ if (value != NULL)
+ pfree(value);
+
+ /*
+ * If we got an error or have reached the end of the string, stop.
+ *
+ * If there is no value, then the end of the keyword might have been
+ * the end of the string. If there is a value, then the end of the
+ * keyword cannot have been the end of the string, but the end of the
+ * value might have been.
+ */
+ if (result->parse_error != NULL ||
+ (vend == NULL ? *kwend == '\0' : *vend == '\0'))
+ break;
+
+ /* Advance to next entry and loop around. */
+ specification = vend == NULL ? kwend + 1 : vend + 1;
+ }
+}
+
+/*
+ * Parse 'value' as an integer and return the result.
+ *
+ * If parsing fails, set result->parse_error to an appropriate message
+ * and return -1.
+ */
+static int
+expect_integer_value(char *keyword, char *value, pg_compress_specification *result)
+{
+ int ivalue;
+ char *ivalue_endp;
+
+ if (value == NULL)
+ {
+ result->parse_error =
+ psprintf(_("compression option \"%s\" requires a value"),
+ keyword);
+ return -1;
+ }
+
+ ivalue = strtol(value, &ivalue_endp, 10);
+ if (ivalue_endp == value || *ivalue_endp != '\0')
+ {
+ result->parse_error =
+ psprintf(_("value for compression option \"%s\" must be an integer"),
+ keyword);
+ return -1;
+ }
+ return ivalue;
+}
+
+/*
+ * Parse 'value' as a boolean and return the result.
+ *
+ * If parsing fails, set result->parse_error to an appropriate message
+ * and return -1. The caller must check result->parse_error to determine if
+ * the call was successful.
+ *
+ * Valid values are: yes, no, on, off, 1, 0.
+ *
+ * Inspired by ParseVariableBool().
+ */
+static bool
+expect_boolean_value(char *keyword, char *value, pg_compress_specification *result)
+{
+ if (value == NULL)
+ return true;
+
+ if (pg_strcasecmp(value, "yes") == 0)
+ return true;
+ if (pg_strcasecmp(value, "on") == 0)
+ return true;
+ if (pg_strcasecmp(value, "1") == 0)
+ return true;
+
+ if (pg_strcasecmp(value, "no") == 0)
+ return false;
+ if (pg_strcasecmp(value, "off") == 0)
+ return false;
+ if (pg_strcasecmp(value, "0") == 0)
+ return false;
+
+ result->parse_error =
+ psprintf(_("value for compression option \"%s\" must be a Boolean value"),
+ keyword);
+ return false;
+}
+
+/*
+ * Returns NULL if the compression specification string was syntactically
+ * valid and semantically sensible. Otherwise, returns an error message.
+ *
+ * Does not test whether this build of PostgreSQL supports the requested
+ * compression method.
+ */
+char *
+validate_compress_specification(pg_compress_specification *spec)
+{
+ int min_level = 1;
+ int max_level = 1;
+ int default_level = 0;
+
+ /* If it didn't even parse OK, it's definitely no good. */
+ if (spec->parse_error != NULL)
+ return spec->parse_error;
+
+ /*
+ * Check that the algorithm expects a compression level and it is within
+ * the legal range for the algorithm.
+ */
+ switch (spec->algorithm)
+ {
+ case PG_COMPRESSION_GZIP:
+ max_level = 9;
+#ifdef HAVE_LIBZ
+ default_level = Z_DEFAULT_COMPRESSION;
+#endif
+ break;
+ case PG_COMPRESSION_LZ4:
+ max_level = 12;
+ default_level = 0; /* fast mode */
+ break;
+ case PG_COMPRESSION_ZSTD:
+#ifdef USE_ZSTD
+ max_level = ZSTD_maxCLevel();
+ min_level = ZSTD_minCLevel();
+ default_level = ZSTD_CLEVEL_DEFAULT;
+#endif
+ break;
+ case PG_COMPRESSION_NONE:
+ if (spec->level != 0)
+ return psprintf(_("compression algorithm \"%s\" does not accept a compression level"),
+ get_compress_algorithm_name(spec->algorithm));
+ break;
+ }
+
+ if ((spec->level < min_level || spec->level > max_level) &&
+ spec->level != default_level)
+ return psprintf(_("compression algorithm \"%s\" expects a compression level between %d and %d (default at %d)"),
+ get_compress_algorithm_name(spec->algorithm),
+ min_level, max_level, default_level);
+
+ /*
+ * Of the compression algorithms that we currently support, only zstd
+ * allows parallel workers.
+ */
+ if ((spec->options & PG_COMPRESSION_OPTION_WORKERS) != 0 &&
+ (spec->algorithm != PG_COMPRESSION_ZSTD))
+ {
+ return psprintf(_("compression algorithm \"%s\" does not accept a worker count"),
+ get_compress_algorithm_name(spec->algorithm));
+ }
+
+ /*
+ * Of the compression algorithms that we currently support, only zstd
+ * supports long-distance mode.
+ */
+ if ((spec->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0 &&
+ (spec->algorithm != PG_COMPRESSION_ZSTD))
+ {
+ return psprintf(_("compression algorithm \"%s\" does not support long-distance mode"),
+ get_compress_algorithm_name(spec->algorithm));
+ }
+
+ return NULL;
+}
+
+#ifdef FRONTEND
+
+/*
+ * Basic parsing of a value specified through a command-line option, commonly
+ * -Z/--compress.
+ *
+ * The parsing consists of a METHOD:DETAIL string fed later to
+ * parse_compress_specification(). This only extracts METHOD and DETAIL.
+ * If only an integer is found, the method is implied by the value specified.
+ */
+void
+parse_compress_options(const char *option, char **algorithm, char **detail)
+{
+ char *sep;
+ char *endp;
+ long result;
+
+ /*
+ * Check whether the compression specification consists of a bare integer.
+ *
+ * For backward-compatibility, assume "none" if the integer found is zero
+ * and "gzip" otherwise.
+ */
+ result = strtol(option, &endp, 10);
+ if (*endp == '\0')
+ {
+ if (result == 0)
+ {
+ *algorithm = pstrdup("none");
+ *detail = NULL;
+ }
+ else
+ {
+ *algorithm = pstrdup("gzip");
+ *detail = pstrdup(option);
+ }
+ return;
+ }
+
+ /*
+ * Check whether there is a compression detail following the algorithm
+ * name.
+ */
+ sep = strchr(option, ':');
+ if (sep == NULL)
+ {
+ *algorithm = pstrdup(option);
+ *detail = NULL;
+ }
+ else
+ {
+ char *alg;
+
+ alg = palloc((sep - option) + 1);
+ memcpy(alg, option, sep - option);
+ alg[sep - option] = '\0';
+
+ *algorithm = alg;
+ *detail = pstrdup(sep + 1);
+ }
+}
+#endif /* FRONTEND */
diff --git a/src/common/config_info.c b/src/common/config_info.c
new file mode 100644
index 0000000..09e78a6
--- /dev/null
+++ b/src/common/config_info.c
@@ -0,0 +1,201 @@
+/*-------------------------------------------------------------------------
+ *
+ * config_info.c
+ * Common code for pg_config output
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/config_info.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/config_info.h"
+
+
+/*
+ * get_configdata(const char *my_exec_path, size_t *configdata_len)
+ *
+ * Get configure-time constants. The caller is responsible
+ * for pfreeing the result.
+ */
+ConfigData *
+get_configdata(const char *my_exec_path, size_t *configdata_len)
+{
+ ConfigData *configdata;
+ char path[MAXPGPATH];
+ char *lastsep;
+ int i = 0;
+
+ /* Adjust this to match the number of items filled below */
+ *configdata_len = 23;
+ configdata = palloc_array(ConfigData, *configdata_len);
+
+ configdata[i].name = pstrdup("BINDIR");
+ strlcpy(path, my_exec_path, sizeof(path));
+ lastsep = strrchr(path, '/');
+ if (lastsep)
+ *lastsep = '\0';
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("DOCDIR");
+ get_doc_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("HTMLDIR");
+ get_html_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("INCLUDEDIR");
+ get_include_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("PKGINCLUDEDIR");
+ get_pkginclude_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("INCLUDEDIR-SERVER");
+ get_includeserver_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("LIBDIR");
+ get_lib_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("PKGLIBDIR");
+ get_pkglib_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("LOCALEDIR");
+ get_locale_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("MANDIR");
+ get_man_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("SHAREDIR");
+ get_share_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("SYSCONFDIR");
+ get_etc_path(my_exec_path, path);
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("PGXS");
+ get_pkglib_path(my_exec_path, path);
+ strlcat(path, "/pgxs/src/makefiles/pgxs.mk", sizeof(path));
+ cleanup_path(path);
+ configdata[i].setting = pstrdup(path);
+ i++;
+
+ configdata[i].name = pstrdup("CONFIGURE");
+ configdata[i].setting = pstrdup(CONFIGURE_ARGS);
+ i++;
+
+ configdata[i].name = pstrdup("CC");
+#ifdef VAL_CC
+ configdata[i].setting = pstrdup(VAL_CC);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("CPPFLAGS");
+#ifdef VAL_CPPFLAGS
+ configdata[i].setting = pstrdup(VAL_CPPFLAGS);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("CFLAGS");
+#ifdef VAL_CFLAGS
+ configdata[i].setting = pstrdup(VAL_CFLAGS);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("CFLAGS_SL");
+#ifdef VAL_CFLAGS_SL
+ configdata[i].setting = pstrdup(VAL_CFLAGS_SL);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("LDFLAGS");
+#ifdef VAL_LDFLAGS
+ configdata[i].setting = pstrdup(VAL_LDFLAGS);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("LDFLAGS_EX");
+#ifdef VAL_LDFLAGS_EX
+ configdata[i].setting = pstrdup(VAL_LDFLAGS_EX);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("LDFLAGS_SL");
+#ifdef VAL_LDFLAGS_SL
+ configdata[i].setting = pstrdup(VAL_LDFLAGS_SL);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("LIBS");
+#ifdef VAL_LIBS
+ configdata[i].setting = pstrdup(VAL_LIBS);
+#else
+ configdata[i].setting = pstrdup(_("not recorded"));
+#endif
+ i++;
+
+ configdata[i].name = pstrdup("VERSION");
+ configdata[i].setting = pstrdup("PostgreSQL " PG_VERSION);
+ i++;
+
+ Assert(i == *configdata_len);
+
+ return configdata;
+}
diff --git a/src/common/controldata_utils.c b/src/common/controldata_utils.c
new file mode 100644
index 0000000..4d1cd1c
--- /dev/null
+++ b/src/common/controldata_utils.c
@@ -0,0 +1,269 @@
+/*-------------------------------------------------------------------------
+ *
+ * controldata_utils.c
+ * Common code for control data file output.
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/controldata_utils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+
+#include "access/xlog_internal.h"
+#include "catalog/pg_control.h"
+#include "common/controldata_utils.h"
+#include "common/file_perm.h"
+#ifdef FRONTEND
+#include "common/logging.h"
+#endif
+#include "port/pg_crc32c.h"
+
+#ifndef FRONTEND
+#include "pgstat.h"
+#include "storage/fd.h"
+#endif
+
+/*
+ * get_controlfile()
+ *
+ * Get controlfile values. The result is returned as a palloc'd copy of the
+ * control file data.
+ *
+ * crc_ok_p can be used by the caller to see whether the CRC of the control
+ * file data is correct.
+ */
+ControlFileData *
+get_controlfile(const char *DataDir, bool *crc_ok_p)
+{
+ ControlFileData *ControlFile;
+ int fd;
+ char ControlFilePath[MAXPGPATH];
+ pg_crc32c crc;
+ int r;
+#ifdef FRONTEND
+ pg_crc32c last_crc;
+ int retries = 0;
+#endif
+
+ Assert(crc_ok_p);
+
+ ControlFile = palloc_object(ControlFileData);
+ snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir);
+
+#ifdef FRONTEND
+ INIT_CRC32C(last_crc);
+
+retry:
+#endif
+
+#ifndef FRONTEND
+ if ((fd = OpenTransientFile(ControlFilePath, O_RDONLY | PG_BINARY)) == -1)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\" for reading: %m",
+ ControlFilePath)));
+#else
+ if ((fd = open(ControlFilePath, O_RDONLY | PG_BINARY, 0)) == -1)
+ pg_fatal("could not open file \"%s\" for reading: %m",
+ ControlFilePath);
+#endif
+
+ r = read(fd, ControlFile, sizeof(ControlFileData));
+ if (r != sizeof(ControlFileData))
+ {
+ if (r < 0)
+#ifndef FRONTEND
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m", ControlFilePath)));
+#else
+ pg_fatal("could not read file \"%s\": %m", ControlFilePath);
+#endif
+ else
+#ifndef FRONTEND
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("could not read file \"%s\": read %d of %zu",
+ ControlFilePath, r, sizeof(ControlFileData))));
+#else
+ pg_fatal("could not read file \"%s\": read %d of %zu",
+ ControlFilePath, r, sizeof(ControlFileData));
+#endif
+ }
+
+#ifndef FRONTEND
+ if (CloseTransientFile(fd) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close file \"%s\": %m",
+ ControlFilePath)));
+#else
+ if (close(fd) != 0)
+ pg_fatal("could not close file \"%s\": %m", ControlFilePath);
+#endif
+
+ /* Check the CRC. */
+ INIT_CRC32C(crc);
+ COMP_CRC32C(crc,
+ (char *) ControlFile,
+ offsetof(ControlFileData, crc));
+ FIN_CRC32C(crc);
+
+ *crc_ok_p = EQ_CRC32C(crc, ControlFile->crc);
+
+#ifdef FRONTEND
+
+ /*
+ * If the server was writing at the same time, it is possible that we read
+ * partially updated contents on some systems. If the CRC doesn't match,
+ * retry a limited number of times until we compute the same bad CRC twice
+ * in a row with a short sleep in between. Then the failure is unlikely
+ * to be due to a concurrent write.
+ */
+ if (!*crc_ok_p &&
+ (retries == 0 || !EQ_CRC32C(crc, last_crc)) &&
+ retries < 10)
+ {
+ retries++;
+ last_crc = crc;
+ pg_usleep(10000);
+ goto retry;
+ }
+#endif
+
+ /* Make sure the control file is valid byte order. */
+ if (ControlFile->pg_control_version % 65536 == 0 &&
+ ControlFile->pg_control_version / 65536 != 0)
+#ifndef FRONTEND
+ elog(ERROR, _("byte ordering mismatch"));
+#else
+ pg_log_warning("possible byte ordering mismatch\n"
+ "The byte ordering used to store the pg_control file might not match the one\n"
+ "used by this program. In that case the results below would be incorrect, and\n"
+ "the PostgreSQL installation would be incompatible with this data directory.");
+#endif
+
+ return ControlFile;
+}
+
+/*
+ * update_controlfile()
+ *
+ * Update controlfile values with the contents given by caller. The
+ * contents to write are included in "ControlFile". "do_sync" can be
+ * optionally used to flush the updated control file. Note that it is up
+ * to the caller to properly lock ControlFileLock when calling this
+ * routine in the backend.
+ */
+void
+update_controlfile(const char *DataDir,
+ ControlFileData *ControlFile, bool do_sync)
+{
+ int fd;
+ char buffer[PG_CONTROL_FILE_SIZE];
+ char ControlFilePath[MAXPGPATH];
+
+ /* Update timestamp */
+ ControlFile->time = (pg_time_t) time(NULL);
+
+ /* Recalculate CRC of control file */
+ INIT_CRC32C(ControlFile->crc);
+ COMP_CRC32C(ControlFile->crc,
+ (char *) ControlFile,
+ offsetof(ControlFileData, crc));
+ FIN_CRC32C(ControlFile->crc);
+
+ /*
+ * Write out PG_CONTROL_FILE_SIZE bytes into pg_control by zero-padding
+ * the excess over sizeof(ControlFileData), to avoid premature EOF related
+ * errors when reading it.
+ */
+ memset(buffer, 0, PG_CONTROL_FILE_SIZE);
+ memcpy(buffer, ControlFile, sizeof(ControlFileData));
+
+ snprintf(ControlFilePath, sizeof(ControlFilePath), "%s/%s", DataDir, XLOG_CONTROL_FILE);
+
+#ifndef FRONTEND
+
+ /*
+ * All errors issue a PANIC, so no need to use OpenTransientFile() and to
+ * worry about file descriptor leaks.
+ */
+ if ((fd = BasicOpenFile(ControlFilePath, O_RDWR | PG_BINARY)) < 0)
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m",
+ ControlFilePath)));
+#else
+ if ((fd = open(ControlFilePath, O_WRONLY | PG_BINARY,
+ pg_file_create_mode)) == -1)
+ pg_fatal("could not open file \"%s\": %m", ControlFilePath);
+#endif
+
+ errno = 0;
+#ifndef FRONTEND
+ pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE);
+#endif
+ if (write(fd, buffer, PG_CONTROL_FILE_SIZE) != PG_CONTROL_FILE_SIZE)
+ {
+ /* if write didn't set errno, assume problem is no disk space */
+ if (errno == 0)
+ errno = ENOSPC;
+
+#ifndef FRONTEND
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ ControlFilePath)));
+#else
+ pg_fatal("could not write file \"%s\": %m", ControlFilePath);
+#endif
+ }
+#ifndef FRONTEND
+ pgstat_report_wait_end();
+#endif
+
+ if (do_sync)
+ {
+#ifndef FRONTEND
+ pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE);
+ if (pg_fsync(fd) != 0)
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not fsync file \"%s\": %m",
+ ControlFilePath)));
+ pgstat_report_wait_end();
+#else
+ if (fsync(fd) != 0)
+ pg_fatal("could not fsync file \"%s\": %m", ControlFilePath);
+#endif
+ }
+
+ if (close(fd) != 0)
+ {
+#ifndef FRONTEND
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not close file \"%s\": %m",
+ ControlFilePath)));
+#else
+ pg_fatal("could not close file \"%s\": %m", ControlFilePath);
+#endif
+ }
+}
diff --git a/src/common/cryptohash.c b/src/common/cryptohash.c
new file mode 100644
index 0000000..42dbed7
--- /dev/null
+++ b/src/common/cryptohash.c
@@ -0,0 +1,273 @@
+/*-------------------------------------------------------------------------
+ *
+ * cryptohash.c
+ * Fallback implementations for cryptographic hash functions.
+ *
+ * This is the set of in-core functions used when there are no other
+ * alternative options like OpenSSL.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/cryptohash.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <sys/param.h>
+
+#include "common/cryptohash.h"
+#include "md5_int.h"
+#include "sha1_int.h"
+#include "sha2_int.h"
+
+/*
+ * In backend, use palloc/pfree to ease the error handling. In frontend,
+ * use malloc to be able to return a failure status back to the caller.
+ */
+#ifndef FRONTEND
+#define ALLOC(size) palloc(size)
+#define FREE(ptr) pfree(ptr)
+#else
+#define ALLOC(size) malloc(size)
+#define FREE(ptr) free(ptr)
+#endif
+
+/* Set of error states */
+typedef enum pg_cryptohash_errno
+{
+ PG_CRYPTOHASH_ERROR_NONE = 0,
+ PG_CRYPTOHASH_ERROR_DEST_LEN
+} pg_cryptohash_errno;
+
+/* Internal pg_cryptohash_ctx structure */
+struct pg_cryptohash_ctx
+{
+ pg_cryptohash_type type;
+ pg_cryptohash_errno error;
+
+ union
+ {
+ pg_md5_ctx md5;
+ pg_sha1_ctx sha1;
+ pg_sha224_ctx sha224;
+ pg_sha256_ctx sha256;
+ pg_sha384_ctx sha384;
+ pg_sha512_ctx sha512;
+ } data;
+};
+
+/*
+ * pg_cryptohash_create
+ *
+ * Allocate a hash context. Returns NULL on failure for an OOM. The
+ * backend issues an error, without returning.
+ */
+pg_cryptohash_ctx *
+pg_cryptohash_create(pg_cryptohash_type type)
+{
+ pg_cryptohash_ctx *ctx;
+
+ /*
+ * Note that this always allocates enough space for the largest hash. A
+ * smaller allocation would be enough for md5, sha224 and sha256, but the
+ * small extra amount of memory does not make it worth complicating this
+ * code.
+ */
+ ctx = ALLOC(sizeof(pg_cryptohash_ctx));
+ if (ctx == NULL)
+ return NULL;
+
+ memset(ctx, 0, sizeof(pg_cryptohash_ctx));
+ ctx->type = type;
+ ctx->error = PG_CRYPTOHASH_ERROR_NONE;
+ return ctx;
+}
+
+/*
+ * pg_cryptohash_init
+ *
+ * Initialize a hash context. Returns 0 on success, and -1 on failure.
+ */
+int
+pg_cryptohash_init(pg_cryptohash_ctx *ctx)
+{
+ if (ctx == NULL)
+ return -1;
+
+ switch (ctx->type)
+ {
+ case PG_MD5:
+ pg_md5_init(&ctx->data.md5);
+ break;
+ case PG_SHA1:
+ pg_sha1_init(&ctx->data.sha1);
+ break;
+ case PG_SHA224:
+ pg_sha224_init(&ctx->data.sha224);
+ break;
+ case PG_SHA256:
+ pg_sha256_init(&ctx->data.sha256);
+ break;
+ case PG_SHA384:
+ pg_sha384_init(&ctx->data.sha384);
+ break;
+ case PG_SHA512:
+ pg_sha512_init(&ctx->data.sha512);
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * pg_cryptohash_update
+ *
+ * Update a hash context. Returns 0 on success, and -1 on failure.
+ */
+int
+pg_cryptohash_update(pg_cryptohash_ctx *ctx, const uint8 *data, size_t len)
+{
+ if (ctx == NULL)
+ return -1;
+
+ switch (ctx->type)
+ {
+ case PG_MD5:
+ pg_md5_update(&ctx->data.md5, data, len);
+ break;
+ case PG_SHA1:
+ pg_sha1_update(&ctx->data.sha1, data, len);
+ break;
+ case PG_SHA224:
+ pg_sha224_update(&ctx->data.sha224, data, len);
+ break;
+ case PG_SHA256:
+ pg_sha256_update(&ctx->data.sha256, data, len);
+ break;
+ case PG_SHA384:
+ pg_sha384_update(&ctx->data.sha384, data, len);
+ break;
+ case PG_SHA512:
+ pg_sha512_update(&ctx->data.sha512, data, len);
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * pg_cryptohash_final
+ *
+ * Finalize a hash context. Returns 0 on success, and -1 on failure.
+ */
+int
+pg_cryptohash_final(pg_cryptohash_ctx *ctx, uint8 *dest, size_t len)
+{
+ if (ctx == NULL)
+ return -1;
+
+ switch (ctx->type)
+ {
+ case PG_MD5:
+ if (len < MD5_DIGEST_LENGTH)
+ {
+ ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN;
+ return -1;
+ }
+ pg_md5_final(&ctx->data.md5, dest);
+ break;
+ case PG_SHA1:
+ if (len < SHA1_DIGEST_LENGTH)
+ {
+ ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN;
+ return -1;
+ }
+ pg_sha1_final(&ctx->data.sha1, dest);
+ break;
+ case PG_SHA224:
+ if (len < PG_SHA224_DIGEST_LENGTH)
+ {
+ ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN;
+ return -1;
+ }
+ pg_sha224_final(&ctx->data.sha224, dest);
+ break;
+ case PG_SHA256:
+ if (len < PG_SHA256_DIGEST_LENGTH)
+ {
+ ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN;
+ return -1;
+ }
+ pg_sha256_final(&ctx->data.sha256, dest);
+ break;
+ case PG_SHA384:
+ if (len < PG_SHA384_DIGEST_LENGTH)
+ {
+ ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN;
+ return -1;
+ }
+ pg_sha384_final(&ctx->data.sha384, dest);
+ break;
+ case PG_SHA512:
+ if (len < PG_SHA512_DIGEST_LENGTH)
+ {
+ ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN;
+ return -1;
+ }
+ pg_sha512_final(&ctx->data.sha512, dest);
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * pg_cryptohash_free
+ *
+ * Free a hash context.
+ */
+void
+pg_cryptohash_free(pg_cryptohash_ctx *ctx)
+{
+ if (ctx == NULL)
+ return;
+
+ explicit_bzero(ctx, sizeof(pg_cryptohash_ctx));
+ FREE(ctx);
+}
+
+/*
+ * pg_cryptohash_error
+ *
+ * Returns a static string providing details about an error that
+ * happened during a computation.
+ */
+const char *
+pg_cryptohash_error(pg_cryptohash_ctx *ctx)
+{
+ /*
+ * This implementation would never fail because of an out-of-memory error,
+ * except when creating the context.
+ */
+ if (ctx == NULL)
+ return _("out of memory");
+
+ switch (ctx->error)
+ {
+ case PG_CRYPTOHASH_ERROR_NONE:
+ return _("success");
+ case PG_CRYPTOHASH_ERROR_DEST_LEN:
+ return _("destination buffer too small");
+ }
+
+ Assert(false);
+ return _("success");
+}
diff --git a/src/common/cryptohash_openssl.c b/src/common/cryptohash_openssl.c
new file mode 100644
index 0000000..a654cd4
--- /dev/null
+++ b/src/common/cryptohash_openssl.c
@@ -0,0 +1,353 @@
+/*-------------------------------------------------------------------------
+ *
+ * cryptohash_openssl.c
+ * Set of wrapper routines on top of OpenSSL to support cryptographic
+ * hash functions.
+ *
+ * This should only be used if code is compiled with OpenSSL support.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/cryptohash_openssl.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <openssl/err.h>
+#include <openssl/evp.h>
+
+#include "common/cryptohash.h"
+#include "common/md5.h"
+#include "common/sha1.h"
+#include "common/sha2.h"
+#ifndef FRONTEND
+#include "utils/memutils.h"
+#include "utils/resowner.h"
+#include "utils/resowner_private.h"
+#endif
+
+/*
+ * In the backend, use an allocation in TopMemoryContext to count for
+ * resowner cleanup handling. In the frontend, use malloc to be able
+ * to return a failure status back to the caller.
+ */
+#ifndef FRONTEND
+#define ALLOC(size) MemoryContextAlloc(TopMemoryContext, size)
+#define FREE(ptr) pfree(ptr)
+#else
+#define ALLOC(size) malloc(size)
+#define FREE(ptr) free(ptr)
+#endif
+
+/* Set of error states */
+typedef enum pg_cryptohash_errno
+{
+ PG_CRYPTOHASH_ERROR_NONE = 0,
+ PG_CRYPTOHASH_ERROR_DEST_LEN,
+ PG_CRYPTOHASH_ERROR_OPENSSL
+} pg_cryptohash_errno;
+
+/*
+ * Internal pg_cryptohash_ctx structure.
+ *
+ * This tracks the resource owner associated to each EVP context data
+ * for the backend.
+ */
+struct pg_cryptohash_ctx
+{
+ pg_cryptohash_type type;
+ pg_cryptohash_errno error;
+ const char *errreason;
+
+ EVP_MD_CTX *evpctx;
+
+#ifndef FRONTEND
+ ResourceOwner resowner;
+#endif
+};
+
+static const char *
+SSLerrmessage(unsigned long ecode)
+{
+ if (ecode == 0)
+ return NULL;
+
+ /*
+ * This may return NULL, but we would fall back to a default error path if
+ * that were the case.
+ */
+ return ERR_reason_error_string(ecode);
+}
+
+/*
+ * pg_cryptohash_create
+ *
+ * Allocate a hash context. Returns NULL on failure for an OOM. The
+ * backend issues an error, without returning.
+ */
+pg_cryptohash_ctx *
+pg_cryptohash_create(pg_cryptohash_type type)
+{
+ pg_cryptohash_ctx *ctx;
+
+ /*
+ * Make sure that the resource owner has space to remember this reference.
+ * This can error out with "out of memory", so do this before any other
+ * allocation to avoid leaking.
+ */
+#ifndef FRONTEND
+ ResourceOwnerEnlargeCryptoHash(CurrentResourceOwner);
+#endif
+
+ ctx = ALLOC(sizeof(pg_cryptohash_ctx));
+ if (ctx == NULL)
+ return NULL;
+ memset(ctx, 0, sizeof(pg_cryptohash_ctx));
+ ctx->type = type;
+ ctx->error = PG_CRYPTOHASH_ERROR_NONE;
+ ctx->errreason = NULL;
+
+ /*
+ * Initialization takes care of assigning the correct type for OpenSSL.
+ * Also ensure that there aren't any unconsumed errors in the queue from
+ * previous runs.
+ */
+ ERR_clear_error();
+ ctx->evpctx = EVP_MD_CTX_create();
+
+ if (ctx->evpctx == NULL)
+ {
+ explicit_bzero(ctx, sizeof(pg_cryptohash_ctx));
+ FREE(ctx);
+#ifndef FRONTEND
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+#else
+ return NULL;
+#endif
+ }
+
+#ifndef FRONTEND
+ ctx->resowner = CurrentResourceOwner;
+ ResourceOwnerRememberCryptoHash(CurrentResourceOwner,
+ PointerGetDatum(ctx));
+#endif
+
+ return ctx;
+}
+
+/*
+ * pg_cryptohash_init
+ *
+ * Initialize a hash context. Returns 0 on success, and -1 on failure.
+ */
+int
+pg_cryptohash_init(pg_cryptohash_ctx *ctx)
+{
+ int status = 0;
+
+ if (ctx == NULL)
+ return -1;
+
+ switch (ctx->type)
+ {
+ case PG_MD5:
+ status = EVP_DigestInit_ex(ctx->evpctx, EVP_md5(), NULL);
+ break;
+ case PG_SHA1:
+ status = EVP_DigestInit_ex(ctx->evpctx, EVP_sha1(), NULL);
+ break;
+ case PG_SHA224:
+ status = EVP_DigestInit_ex(ctx->evpctx, EVP_sha224(), NULL);
+ break;
+ case PG_SHA256:
+ status = EVP_DigestInit_ex(ctx->evpctx, EVP_sha256(), NULL);
+ break;
+ case PG_SHA384:
+ status = EVP_DigestInit_ex(ctx->evpctx, EVP_sha384(), NULL);
+ break;
+ case PG_SHA512:
+ status = EVP_DigestInit_ex(ctx->evpctx, EVP_sha512(), NULL);
+ break;
+ }
+
+ /* OpenSSL internals return 1 on success, 0 on failure */
+ if (status <= 0)
+ {
+ ctx->errreason = SSLerrmessage(ERR_get_error());
+ ctx->error = PG_CRYPTOHASH_ERROR_OPENSSL;
+
+ /*
+ * The OpenSSL error queue should normally be empty since we've
+ * consumed an error, but cipher initialization can in FIPS-enabled
+ * OpenSSL builds generate two errors so clear the queue here as well.
+ */
+ ERR_clear_error();
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * pg_cryptohash_update
+ *
+ * Update a hash context. Returns 0 on success, and -1 on failure.
+ */
+int
+pg_cryptohash_update(pg_cryptohash_ctx *ctx, const uint8 *data, size_t len)
+{
+ int status = 0;
+
+ if (ctx == NULL)
+ return -1;
+
+ status = EVP_DigestUpdate(ctx->evpctx, data, len);
+
+ /* OpenSSL internals return 1 on success, 0 on failure */
+ if (status <= 0)
+ {
+ ctx->errreason = SSLerrmessage(ERR_get_error());
+ ctx->error = PG_CRYPTOHASH_ERROR_OPENSSL;
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * pg_cryptohash_final
+ *
+ * Finalize a hash context. Returns 0 on success, and -1 on failure.
+ */
+int
+pg_cryptohash_final(pg_cryptohash_ctx *ctx, uint8 *dest, size_t len)
+{
+ int status = 0;
+
+ if (ctx == NULL)
+ return -1;
+
+ switch (ctx->type)
+ {
+ case PG_MD5:
+ if (len < MD5_DIGEST_LENGTH)
+ {
+ ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN;
+ return -1;
+ }
+ break;
+ case PG_SHA1:
+ if (len < SHA1_DIGEST_LENGTH)
+ {
+ ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN;
+ return -1;
+ }
+ break;
+ case PG_SHA224:
+ if (len < PG_SHA224_DIGEST_LENGTH)
+ {
+ ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN;
+ return -1;
+ }
+ break;
+ case PG_SHA256:
+ if (len < PG_SHA256_DIGEST_LENGTH)
+ {
+ ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN;
+ return -1;
+ }
+ break;
+ case PG_SHA384:
+ if (len < PG_SHA384_DIGEST_LENGTH)
+ {
+ ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN;
+ return -1;
+ }
+ break;
+ case PG_SHA512:
+ if (len < PG_SHA512_DIGEST_LENGTH)
+ {
+ ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN;
+ return -1;
+ }
+ break;
+ }
+
+ status = EVP_DigestFinal_ex(ctx->evpctx, dest, 0);
+
+ /* OpenSSL internals return 1 on success, 0 on failure */
+ if (status <= 0)
+ {
+ ctx->errreason = SSLerrmessage(ERR_get_error());
+ ctx->error = PG_CRYPTOHASH_ERROR_OPENSSL;
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * pg_cryptohash_free
+ *
+ * Free a hash context.
+ */
+void
+pg_cryptohash_free(pg_cryptohash_ctx *ctx)
+{
+ if (ctx == NULL)
+ return;
+
+ EVP_MD_CTX_destroy(ctx->evpctx);
+
+#ifndef FRONTEND
+ ResourceOwnerForgetCryptoHash(ctx->resowner,
+ PointerGetDatum(ctx));
+#endif
+
+ explicit_bzero(ctx, sizeof(pg_cryptohash_ctx));
+ FREE(ctx);
+}
+
+/*
+ * pg_cryptohash_error
+ *
+ * Returns a static string providing details about an error that
+ * happened during a computation.
+ */
+const char *
+pg_cryptohash_error(pg_cryptohash_ctx *ctx)
+{
+ /*
+ * This implementation would never fail because of an out-of-memory error,
+ * except when creating the context.
+ */
+ if (ctx == NULL)
+ return _("out of memory");
+
+ /*
+ * If a reason is provided, rely on it, else fallback to any error code
+ * set.
+ */
+ if (ctx->errreason)
+ return ctx->errreason;
+
+ switch (ctx->error)
+ {
+ case PG_CRYPTOHASH_ERROR_NONE:
+ return _("success");
+ case PG_CRYPTOHASH_ERROR_DEST_LEN:
+ return _("destination buffer too small");
+ case PG_CRYPTOHASH_ERROR_OPENSSL:
+ return _("OpenSSL failure");
+ }
+
+ Assert(false); /* cannot be reached */
+ return _("success");
+}
diff --git a/src/common/d2s.c b/src/common/d2s.c
new file mode 100644
index 0000000..614e981
--- /dev/null
+++ b/src/common/d2s.c
@@ -0,0 +1,1076 @@
+/*---------------------------------------------------------------------------
+ *
+ * Ryu floating-point output for double precision.
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/d2s.c
+ *
+ * This is a modification of code taken from github.com/ulfjack/ryu under the
+ * terms of the Boost license (not the Apache license). The original copyright
+ * notice follows:
+ *
+ * Copyright 2018 Ulf Adams
+ *
+ * The contents of this file may be used under the terms of the Apache
+ * License, Version 2.0.
+ *
+ * (See accompanying file LICENSE-Apache or copy at
+ * http://www.apache.org/licenses/LICENSE-2.0)
+ *
+ * Alternatively, the contents of this file may be used under the terms of the
+ * Boost Software License, Version 1.0.
+ *
+ * (See accompanying file LICENSE-Boost or copy at
+ * https://www.boost.org/LICENSE_1_0.txt)
+ *
+ * Unless required by applicable law or agreed to in writing, this software is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+/*
+ * Runtime compiler options:
+ *
+ * -DRYU_ONLY_64_BIT_OPS Avoid using uint128 or 64-bit intrinsics. Slower,
+ * depending on your compiler.
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/shortest_dec.h"
+
+/*
+ * For consistency, we use 128-bit types if and only if the rest of PG also
+ * does, even though we could use them here without worrying about the
+ * alignment concerns that apply elsewhere.
+ */
+#if !defined(HAVE_INT128) && defined(_MSC_VER) \
+ && !defined(RYU_ONLY_64_BIT_OPS) && defined(_M_X64)
+#define HAS_64_BIT_INTRINSICS
+#endif
+
+#include "ryu_common.h"
+#include "digit_table.h"
+#include "d2s_full_table.h"
+#include "d2s_intrinsics.h"
+
+#define DOUBLE_MANTISSA_BITS 52
+#define DOUBLE_EXPONENT_BITS 11
+#define DOUBLE_BIAS 1023
+
+#define DOUBLE_POW5_INV_BITCOUNT 122
+#define DOUBLE_POW5_BITCOUNT 121
+
+
+static inline uint32
+pow5Factor(uint64 value)
+{
+ uint32 count = 0;
+
+ for (;;)
+ {
+ Assert(value != 0);
+ const uint64 q = div5(value);
+ const uint32 r = (uint32) (value - 5 * q);
+
+ if (r != 0)
+ break;
+
+ value = q;
+ ++count;
+ }
+ return count;
+}
+
+/* Returns true if value is divisible by 5^p. */
+static inline bool
+multipleOfPowerOf5(const uint64 value, const uint32 p)
+{
+ /*
+ * I tried a case distinction on p, but there was no performance
+ * difference.
+ */
+ return pow5Factor(value) >= p;
+}
+
+/* Returns true if value is divisible by 2^p. */
+static inline bool
+multipleOfPowerOf2(const uint64 value, const uint32 p)
+{
+ /* return __builtin_ctzll(value) >= p; */
+ return (value & ((UINT64CONST(1) << p) - 1)) == 0;
+}
+
+/*
+ * We need a 64x128-bit multiplication and a subsequent 128-bit shift.
+ *
+ * Multiplication:
+ *
+ * The 64-bit factor is variable and passed in, the 128-bit factor comes
+ * from a lookup table. We know that the 64-bit factor only has 55
+ * significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
+ * factor only has 124 significant bits (i.e., the 4 topmost bits are
+ * zeros).
+ *
+ * Shift:
+ *
+ * In principle, the multiplication result requires 55 + 124 = 179 bits to
+ * represent. However, we then shift this value to the right by j, which is
+ * at least j >= 115, so the result is guaranteed to fit into 179 - 115 =
+ * 64 bits. This means that we only need the topmost 64 significant bits of
+ * the 64x128-bit multiplication.
+ *
+ * There are several ways to do this:
+ *
+ * 1. Best case: the compiler exposes a 128-bit type.
+ * We perform two 64x64-bit multiplications, add the higher 64 bits of the
+ * lower result to the higher result, and shift by j - 64 bits.
+ *
+ * We explicitly cast from 64-bit to 128-bit, so the compiler can tell
+ * that these are only 64-bit inputs, and can map these to the best
+ * possible sequence of assembly instructions. x86-64 machines happen to
+ * have matching assembly instructions for 64x64-bit multiplications and
+ * 128-bit shifts.
+ *
+ * 2. Second best case: the compiler exposes intrinsics for the x86-64
+ * assembly instructions mentioned in 1.
+ *
+ * 3. We only have 64x64 bit instructions that return the lower 64 bits of
+ * the result, i.e., we have to use plain C.
+ *
+ * Our inputs are less than the full width, so we have three options:
+ * a. Ignore this fact and just implement the intrinsics manually.
+ * b. Split both into 31-bit pieces, which guarantees no internal
+ * overflow, but requires extra work upfront (unless we change the
+ * lookup table).
+ * c. Split only the first factor into 31-bit pieces, which also
+ * guarantees no internal overflow, but requires extra work since the
+ * intermediate results are not perfectly aligned.
+ */
+#if defined(HAVE_INT128)
+
+/* Best case: use 128-bit type. */
+static inline uint64
+mulShift(const uint64 m, const uint64 *const mul, const int32 j)
+{
+ const uint128 b0 = ((uint128) m) * mul[0];
+ const uint128 b2 = ((uint128) m) * mul[1];
+
+ return (uint64) (((b0 >> 64) + b2) >> (j - 64));
+}
+
+static inline uint64
+mulShiftAll(const uint64 m, const uint64 *const mul, const int32 j,
+ uint64 *const vp, uint64 *const vm, const uint32 mmShift)
+{
+ *vp = mulShift(4 * m + 2, mul, j);
+ *vm = mulShift(4 * m - 1 - mmShift, mul, j);
+ return mulShift(4 * m, mul, j);
+}
+
+#elif defined(HAS_64_BIT_INTRINSICS)
+
+static inline uint64
+mulShift(const uint64 m, const uint64 *const mul, const int32 j)
+{
+ /* m is maximum 55 bits */
+ uint64 high1;
+
+ /* 128 */
+ const uint64 low1 = umul128(m, mul[1], &high1);
+
+ /* 64 */
+ uint64 high0;
+ uint64 sum;
+
+ /* 64 */
+ umul128(m, mul[0], &high0);
+ /* 0 */
+ sum = high0 + low1;
+
+ if (sum < high0)
+ {
+ ++high1;
+ /* overflow into high1 */
+ }
+ return shiftright128(sum, high1, j - 64);
+}
+
+static inline uint64
+mulShiftAll(const uint64 m, const uint64 *const mul, const int32 j,
+ uint64 *const vp, uint64 *const vm, const uint32 mmShift)
+{
+ *vp = mulShift(4 * m + 2, mul, j);
+ *vm = mulShift(4 * m - 1 - mmShift, mul, j);
+ return mulShift(4 * m, mul, j);
+}
+
+#else /* // !defined(HAVE_INT128) &&
+ * !defined(HAS_64_BIT_INTRINSICS) */
+
+static inline uint64
+mulShiftAll(uint64 m, const uint64 *const mul, const int32 j,
+ uint64 *const vp, uint64 *const vm, const uint32 mmShift)
+{
+ m <<= 1; /* m is maximum 55 bits */
+
+ uint64 tmp;
+ const uint64 lo = umul128(m, mul[0], &tmp);
+ uint64 hi;
+ const uint64 mid = tmp + umul128(m, mul[1], &hi);
+
+ hi += mid < tmp; /* overflow into hi */
+
+ const uint64 lo2 = lo + mul[0];
+ const uint64 mid2 = mid + mul[1] + (lo2 < lo);
+ const uint64 hi2 = hi + (mid2 < mid);
+
+ *vp = shiftright128(mid2, hi2, j - 64 - 1);
+
+ if (mmShift == 1)
+ {
+ const uint64 lo3 = lo - mul[0];
+ const uint64 mid3 = mid - mul[1] - (lo3 > lo);
+ const uint64 hi3 = hi - (mid3 > mid);
+
+ *vm = shiftright128(mid3, hi3, j - 64 - 1);
+ }
+ else
+ {
+ const uint64 lo3 = lo + lo;
+ const uint64 mid3 = mid + mid + (lo3 < lo);
+ const uint64 hi3 = hi + hi + (mid3 < mid);
+ const uint64 lo4 = lo3 - mul[0];
+ const uint64 mid4 = mid3 - mul[1] - (lo4 > lo3);
+ const uint64 hi4 = hi3 - (mid4 > mid3);
+
+ *vm = shiftright128(mid4, hi4, j - 64);
+ }
+
+ return shiftright128(mid, hi, j - 64 - 1);
+}
+
+#endif /* // HAS_64_BIT_INTRINSICS */
+
+static inline uint32
+decimalLength(const uint64 v)
+{
+ /* This is slightly faster than a loop. */
+ /* The average output length is 16.38 digits, so we check high-to-low. */
+ /* Function precondition: v is not an 18, 19, or 20-digit number. */
+ /* (17 digits are sufficient for round-tripping.) */
+ Assert(v < 100000000000000000L);
+ if (v >= 10000000000000000L)
+ {
+ return 17;
+ }
+ if (v >= 1000000000000000L)
+ {
+ return 16;
+ }
+ if (v >= 100000000000000L)
+ {
+ return 15;
+ }
+ if (v >= 10000000000000L)
+ {
+ return 14;
+ }
+ if (v >= 1000000000000L)
+ {
+ return 13;
+ }
+ if (v >= 100000000000L)
+ {
+ return 12;
+ }
+ if (v >= 10000000000L)
+ {
+ return 11;
+ }
+ if (v >= 1000000000L)
+ {
+ return 10;
+ }
+ if (v >= 100000000L)
+ {
+ return 9;
+ }
+ if (v >= 10000000L)
+ {
+ return 8;
+ }
+ if (v >= 1000000L)
+ {
+ return 7;
+ }
+ if (v >= 100000L)
+ {
+ return 6;
+ }
+ if (v >= 10000L)
+ {
+ return 5;
+ }
+ if (v >= 1000L)
+ {
+ return 4;
+ }
+ if (v >= 100L)
+ {
+ return 3;
+ }
+ if (v >= 10L)
+ {
+ return 2;
+ }
+ return 1;
+}
+
+/* A floating decimal representing m * 10^e. */
+typedef struct floating_decimal_64
+{
+ uint64 mantissa;
+ int32 exponent;
+} floating_decimal_64;
+
+static inline floating_decimal_64
+d2d(const uint64 ieeeMantissa, const uint32 ieeeExponent)
+{
+ int32 e2;
+ uint64 m2;
+
+ if (ieeeExponent == 0)
+ {
+ /* We subtract 2 so that the bounds computation has 2 additional bits. */
+ e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
+ m2 = ieeeMantissa;
+ }
+ else
+ {
+ e2 = ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
+ m2 = (UINT64CONST(1) << DOUBLE_MANTISSA_BITS) | ieeeMantissa;
+ }
+
+#if STRICTLY_SHORTEST
+ const bool even = (m2 & 1) == 0;
+ const bool acceptBounds = even;
+#else
+ const bool acceptBounds = false;
+#endif
+
+ /* Step 2: Determine the interval of legal decimal representations. */
+ const uint64 mv = 4 * m2;
+
+ /* Implicit bool -> int conversion. True is 1, false is 0. */
+ const uint32 mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
+
+ /* We would compute mp and mm like this: */
+ /* uint64 mp = 4 * m2 + 2; */
+ /* uint64 mm = mv - 1 - mmShift; */
+
+ /* Step 3: Convert to a decimal power base using 128-bit arithmetic. */
+ uint64 vr,
+ vp,
+ vm;
+ int32 e10;
+ bool vmIsTrailingZeros = false;
+ bool vrIsTrailingZeros = false;
+
+ if (e2 >= 0)
+ {
+ /*
+ * I tried special-casing q == 0, but there was no effect on
+ * performance.
+ *
+ * This expr is slightly faster than max(0, log10Pow2(e2) - 1).
+ */
+ const uint32 q = log10Pow2(e2) - (e2 > 3);
+ const int32 k = DOUBLE_POW5_INV_BITCOUNT + pow5bits(q) - 1;
+ const int32 i = -e2 + q + k;
+
+ e10 = q;
+
+ vr = mulShiftAll(m2, DOUBLE_POW5_INV_SPLIT[q], i, &vp, &vm, mmShift);
+
+ if (q <= 21)
+ {
+ /*
+ * This should use q <= 22, but I think 21 is also safe. Smaller
+ * values may still be safe, but it's more difficult to reason
+ * about them.
+ *
+ * Only one of mp, mv, and mm can be a multiple of 5, if any.
+ */
+ const uint32 mvMod5 = (uint32) (mv - 5 * div5(mv));
+
+ if (mvMod5 == 0)
+ {
+ vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
+ }
+ else if (acceptBounds)
+ {
+ /*----
+ * Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q
+ * <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q
+ * <=> true && pow5Factor(mm) >= q, since e2 >= q.
+ *----
+ */
+ vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q);
+ }
+ else
+ {
+ /* Same as min(e2 + 1, pow5Factor(mp)) >= q. */
+ vp -= multipleOfPowerOf5(mv + 2, q);
+ }
+ }
+ }
+ else
+ {
+ /*
+ * This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
+ */
+ const uint32 q = log10Pow5(-e2) - (-e2 > 1);
+ const int32 i = -e2 - q;
+ const int32 k = pow5bits(i) - DOUBLE_POW5_BITCOUNT;
+ const int32 j = q - k;
+
+ e10 = q + e2;
+
+ vr = mulShiftAll(m2, DOUBLE_POW5_SPLIT[i], j, &vp, &vm, mmShift);
+
+ if (q <= 1)
+ {
+ /*
+ * {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q
+ * trailing 0 bits.
+ */
+ /* mv = 4 * m2, so it always has at least two trailing 0 bits. */
+ vrIsTrailingZeros = true;
+ if (acceptBounds)
+ {
+ /*
+ * mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff
+ * mmShift == 1.
+ */
+ vmIsTrailingZeros = mmShift == 1;
+ }
+ else
+ {
+ /*
+ * mp = mv + 2, so it always has at least one trailing 0 bit.
+ */
+ --vp;
+ }
+ }
+ else if (q < 63)
+ {
+ /* TODO(ulfjack):Use a tighter bound here. */
+ /*
+ * We need to compute min(ntz(mv), pow5Factor(mv) - e2) >= q - 1
+ */
+ /* <=> ntz(mv) >= q - 1 && pow5Factor(mv) - e2 >= q - 1 */
+ /* <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q) */
+ /* <=> (mv & ((1 << (q - 1)) - 1)) == 0 */
+
+ /*
+ * We also need to make sure that the left shift does not
+ * overflow.
+ */
+ vrIsTrailingZeros = multipleOfPowerOf2(mv, q - 1);
+ }
+ }
+
+ /*
+ * Step 4: Find the shortest decimal representation in the interval of
+ * legal representations.
+ */
+ uint32 removed = 0;
+ uint8 lastRemovedDigit = 0;
+ uint64 output;
+
+ /* On average, we remove ~2 digits. */
+ if (vmIsTrailingZeros || vrIsTrailingZeros)
+ {
+ /* General case, which happens rarely (~0.7%). */
+ for (;;)
+ {
+ const uint64 vpDiv10 = div10(vp);
+ const uint64 vmDiv10 = div10(vm);
+
+ if (vpDiv10 <= vmDiv10)
+ break;
+
+ const uint32 vmMod10 = (uint32) (vm - 10 * vmDiv10);
+ const uint64 vrDiv10 = div10(vr);
+ const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10);
+
+ vmIsTrailingZeros &= vmMod10 == 0;
+ vrIsTrailingZeros &= lastRemovedDigit == 0;
+ lastRemovedDigit = (uint8) vrMod10;
+ vr = vrDiv10;
+ vp = vpDiv10;
+ vm = vmDiv10;
+ ++removed;
+ }
+
+ if (vmIsTrailingZeros)
+ {
+ for (;;)
+ {
+ const uint64 vmDiv10 = div10(vm);
+ const uint32 vmMod10 = (uint32) (vm - 10 * vmDiv10);
+
+ if (vmMod10 != 0)
+ break;
+
+ const uint64 vpDiv10 = div10(vp);
+ const uint64 vrDiv10 = div10(vr);
+ const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10);
+
+ vrIsTrailingZeros &= lastRemovedDigit == 0;
+ lastRemovedDigit = (uint8) vrMod10;
+ vr = vrDiv10;
+ vp = vpDiv10;
+ vm = vmDiv10;
+ ++removed;
+ }
+ }
+
+ if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0)
+ {
+ /* Round even if the exact number is .....50..0. */
+ lastRemovedDigit = 4;
+ }
+
+ /*
+ * We need to take vr + 1 if vr is outside bounds or we need to round
+ * up.
+ */
+ output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
+ }
+ else
+ {
+ /*
+ * Specialized for the common case (~99.3%). Percentages below are
+ * relative to this.
+ */
+ bool roundUp = false;
+ const uint64 vpDiv100 = div100(vp);
+ const uint64 vmDiv100 = div100(vm);
+
+ if (vpDiv100 > vmDiv100)
+ {
+ /* Optimization:remove two digits at a time(~86.2 %). */
+ const uint64 vrDiv100 = div100(vr);
+ const uint32 vrMod100 = (uint32) (vr - 100 * vrDiv100);
+
+ roundUp = vrMod100 >= 50;
+ vr = vrDiv100;
+ vp = vpDiv100;
+ vm = vmDiv100;
+ removed += 2;
+ }
+
+ /*----
+ * Loop iterations below (approximately), without optimization
+ * above:
+ *
+ * 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%,
+ * 6+: 0.02%
+ *
+ * Loop iterations below (approximately), with optimization
+ * above:
+ *
+ * 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
+ *----
+ */
+ for (;;)
+ {
+ const uint64 vpDiv10 = div10(vp);
+ const uint64 vmDiv10 = div10(vm);
+
+ if (vpDiv10 <= vmDiv10)
+ break;
+
+ const uint64 vrDiv10 = div10(vr);
+ const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10);
+
+ roundUp = vrMod10 >= 5;
+ vr = vrDiv10;
+ vp = vpDiv10;
+ vm = vmDiv10;
+ ++removed;
+ }
+
+ /*
+ * We need to take vr + 1 if vr is outside bounds or we need to round
+ * up.
+ */
+ output = vr + (vr == vm || roundUp);
+ }
+
+ const int32 exp = e10 + removed;
+
+ floating_decimal_64 fd;
+
+ fd.exponent = exp;
+ fd.mantissa = output;
+ return fd;
+}
+
+static inline int
+to_chars_df(const floating_decimal_64 v, const uint32 olength, char *const result)
+{
+ /* Step 5: Print the decimal representation. */
+ int index = 0;
+
+ uint64 output = v.mantissa;
+ int32 exp = v.exponent;
+
+ /*----
+ * On entry, mantissa * 10^exp is the result to be output.
+ * Caller has already done the - sign if needed.
+ *
+ * We want to insert the point somewhere depending on the output length
+ * and exponent, which might mean adding zeros:
+ *
+ * exp | format
+ * 1+ | ddddddddd000000
+ * 0 | ddddddddd
+ * -1 .. -len+1 | dddddddd.d to d.ddddddddd
+ * -len ... | 0.ddddddddd to 0.000dddddd
+ */
+ uint32 i = 0;
+ int32 nexp = exp + olength;
+
+ if (nexp <= 0)
+ {
+ /* -nexp is number of 0s to add after '.' */
+ Assert(nexp >= -3);
+ /* 0.000ddddd */
+ index = 2 - nexp;
+ /* won't need more than this many 0s */
+ memcpy(result, "0.000000", 8);
+ }
+ else if (exp < 0)
+ {
+ /*
+ * dddd.dddd; leave space at the start and move the '.' in after
+ */
+ index = 1;
+ }
+ else
+ {
+ /*
+ * We can save some code later by pre-filling with zeros. We know that
+ * there can be no more than 16 output digits in this form, otherwise
+ * we would not choose fixed-point output.
+ */
+ Assert(exp < 16 && exp + olength <= 16);
+ memset(result, '0', 16);
+ }
+
+ /*
+ * We prefer 32-bit operations, even on 64-bit platforms. We have at most
+ * 17 digits, and uint32 can store 9 digits. If output doesn't fit into
+ * uint32, we cut off 8 digits, so the rest will fit into uint32.
+ */
+ if ((output >> 32) != 0)
+ {
+ /* Expensive 64-bit division. */
+ const uint64 q = div1e8(output);
+ uint32 output2 = (uint32) (output - 100000000 * q);
+ const uint32 c = output2 % 10000;
+
+ output = q;
+ output2 /= 10000;
+
+ const uint32 d = output2 % 10000;
+ const uint32 c0 = (c % 100) << 1;
+ const uint32 c1 = (c / 100) << 1;
+ const uint32 d0 = (d % 100) << 1;
+ const uint32 d1 = (d / 100) << 1;
+
+ memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2);
+ memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2);
+ memcpy(result + index + olength - i - 6, DIGIT_TABLE + d0, 2);
+ memcpy(result + index + olength - i - 8, DIGIT_TABLE + d1, 2);
+ i += 8;
+ }
+
+ uint32 output2 = (uint32) output;
+
+ while (output2 >= 10000)
+ {
+ const uint32 c = output2 - 10000 * (output2 / 10000);
+ const uint32 c0 = (c % 100) << 1;
+ const uint32 c1 = (c / 100) << 1;
+
+ output2 /= 10000;
+ memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2);
+ memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2);
+ i += 4;
+ }
+ if (output2 >= 100)
+ {
+ const uint32 c = (output2 % 100) << 1;
+
+ output2 /= 100;
+ memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2);
+ i += 2;
+ }
+ if (output2 >= 10)
+ {
+ const uint32 c = output2 << 1;
+
+ memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2);
+ }
+ else
+ {
+ result[index] = (char) ('0' + output2);
+ }
+
+ if (index == 1)
+ {
+ /*
+ * nexp is 1..15 here, representing the number of digits before the
+ * point. A value of 16 is not possible because we switch to
+ * scientific notation when the display exponent reaches 15.
+ */
+ Assert(nexp < 16);
+ /* gcc only seems to want to optimize memmove for small 2^n */
+ if (nexp & 8)
+ {
+ memmove(result + index - 1, result + index, 8);
+ index += 8;
+ }
+ if (nexp & 4)
+ {
+ memmove(result + index - 1, result + index, 4);
+ index += 4;
+ }
+ if (nexp & 2)
+ {
+ memmove(result + index - 1, result + index, 2);
+ index += 2;
+ }
+ if (nexp & 1)
+ {
+ result[index - 1] = result[index];
+ }
+ result[nexp] = '.';
+ index = olength + 1;
+ }
+ else if (exp >= 0)
+ {
+ /* we supplied the trailing zeros earlier, now just set the length. */
+ index = olength + exp;
+ }
+ else
+ {
+ index = olength + (2 - nexp);
+ }
+
+ return index;
+}
+
+static inline int
+to_chars(floating_decimal_64 v, const bool sign, char *const result)
+{
+ /* Step 5: Print the decimal representation. */
+ int index = 0;
+
+ uint64 output = v.mantissa;
+ uint32 olength = decimalLength(output);
+ int32 exp = v.exponent + olength - 1;
+
+ if (sign)
+ {
+ result[index++] = '-';
+ }
+
+ /*
+ * The thresholds for fixed-point output are chosen to match printf
+ * defaults. Beware that both the code of to_chars_df and the value of
+ * DOUBLE_SHORTEST_DECIMAL_LEN are sensitive to these thresholds.
+ */
+ if (exp >= -4 && exp < 15)
+ return to_chars_df(v, olength, result + index) + sign;
+
+ /*
+ * If v.exponent is exactly 0, we might have reached here via the small
+ * integer fast path, in which case v.mantissa might contain trailing
+ * (decimal) zeros. For scientific notation we need to move these zeros
+ * into the exponent. (For fixed point this doesn't matter, which is why
+ * we do this here rather than above.)
+ *
+ * Since we already calculated the display exponent (exp) above based on
+ * the old decimal length, that value does not change here. Instead, we
+ * just reduce the display length for each digit removed.
+ *
+ * If we didn't get here via the fast path, the raw exponent will not
+ * usually be 0, and there will be no trailing zeros, so we pay no more
+ * than one div10/multiply extra cost. We claw back half of that by
+ * checking for divisibility by 2 before dividing by 10.
+ */
+ if (v.exponent == 0)
+ {
+ while ((output & 1) == 0)
+ {
+ const uint64 q = div10(output);
+ const uint32 r = (uint32) (output - 10 * q);
+
+ if (r != 0)
+ break;
+ output = q;
+ --olength;
+ }
+ }
+
+ /*----
+ * Print the decimal digits.
+ *
+ * The following code is equivalent to:
+ *
+ * for (uint32 i = 0; i < olength - 1; ++i) {
+ * const uint32 c = output % 10; output /= 10;
+ * result[index + olength - i] = (char) ('0' + c);
+ * }
+ * result[index] = '0' + output % 10;
+ *----
+ */
+
+ uint32 i = 0;
+
+ /*
+ * We prefer 32-bit operations, even on 64-bit platforms. We have at most
+ * 17 digits, and uint32 can store 9 digits. If output doesn't fit into
+ * uint32, we cut off 8 digits, so the rest will fit into uint32.
+ */
+ if ((output >> 32) != 0)
+ {
+ /* Expensive 64-bit division. */
+ const uint64 q = div1e8(output);
+ uint32 output2 = (uint32) (output - 100000000 * q);
+
+ output = q;
+
+ const uint32 c = output2 % 10000;
+
+ output2 /= 10000;
+
+ const uint32 d = output2 % 10000;
+ const uint32 c0 = (c % 100) << 1;
+ const uint32 c1 = (c / 100) << 1;
+ const uint32 d0 = (d % 100) << 1;
+ const uint32 d1 = (d / 100) << 1;
+
+ memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2);
+ memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2);
+ memcpy(result + index + olength - i - 5, DIGIT_TABLE + d0, 2);
+ memcpy(result + index + olength - i - 7, DIGIT_TABLE + d1, 2);
+ i += 8;
+ }
+
+ uint32 output2 = (uint32) output;
+
+ while (output2 >= 10000)
+ {
+ const uint32 c = output2 - 10000 * (output2 / 10000);
+
+ output2 /= 10000;
+
+ const uint32 c0 = (c % 100) << 1;
+ const uint32 c1 = (c / 100) << 1;
+
+ memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2);
+ memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2);
+ i += 4;
+ }
+ if (output2 >= 100)
+ {
+ const uint32 c = (output2 % 100) << 1;
+
+ output2 /= 100;
+ memcpy(result + index + olength - i - 1, DIGIT_TABLE + c, 2);
+ i += 2;
+ }
+ if (output2 >= 10)
+ {
+ const uint32 c = output2 << 1;
+
+ /*
+ * We can't use memcpy here: the decimal dot goes between these two
+ * digits.
+ */
+ result[index + olength - i] = DIGIT_TABLE[c + 1];
+ result[index] = DIGIT_TABLE[c];
+ }
+ else
+ {
+ result[index] = (char) ('0' + output2);
+ }
+
+ /* Print decimal point if needed. */
+ if (olength > 1)
+ {
+ result[index + 1] = '.';
+ index += olength + 1;
+ }
+ else
+ {
+ ++index;
+ }
+
+ /* Print the exponent. */
+ result[index++] = 'e';
+ if (exp < 0)
+ {
+ result[index++] = '-';
+ exp = -exp;
+ }
+ else
+ result[index++] = '+';
+
+ if (exp >= 100)
+ {
+ const int32 c = exp % 10;
+
+ memcpy(result + index, DIGIT_TABLE + 2 * (exp / 10), 2);
+ result[index + 2] = (char) ('0' + c);
+ index += 3;
+ }
+ else
+ {
+ memcpy(result + index, DIGIT_TABLE + 2 * exp, 2);
+ index += 2;
+ }
+
+ return index;
+}
+
+static inline bool
+d2d_small_int(const uint64 ieeeMantissa,
+ const uint32 ieeeExponent,
+ floating_decimal_64 *v)
+{
+ const int32 e2 = (int32) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;
+
+ /*
+ * Avoid using multiple "return false;" here since it tends to provoke the
+ * compiler into inlining multiple copies of d2d, which is undesirable.
+ */
+
+ if (e2 >= -DOUBLE_MANTISSA_BITS && e2 <= 0)
+ {
+ /*----
+ * Since 2^52 <= m2 < 2^53 and 0 <= -e2 <= 52:
+ * 1 <= f = m2 / 2^-e2 < 2^53.
+ *
+ * Test if the lower -e2 bits of the significand are 0, i.e. whether
+ * the fraction is 0. We can use ieeeMantissa here, since the implied
+ * 1 bit can never be tested by this; the implied 1 can only be part
+ * of a fraction if e2 < -DOUBLE_MANTISSA_BITS which we already
+ * checked. (e.g. 0.5 gives ieeeMantissa == 0 and e2 == -53)
+ */
+ const uint64 mask = (UINT64CONST(1) << -e2) - 1;
+ const uint64 fraction = ieeeMantissa & mask;
+
+ if (fraction == 0)
+ {
+ /*----
+ * f is an integer in the range [1, 2^53).
+ * Note: mantissa might contain trailing (decimal) 0's.
+ * Note: since 2^53 < 10^16, there is no need to adjust
+ * decimalLength().
+ */
+ const uint64 m2 = (UINT64CONST(1) << DOUBLE_MANTISSA_BITS) | ieeeMantissa;
+
+ v->mantissa = m2 >> -e2;
+ v->exponent = 0;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Store the shortest decimal representation of the given double as an
+ * UNTERMINATED string in the caller's supplied buffer (which must be at least
+ * DOUBLE_SHORTEST_DECIMAL_LEN-1 bytes long).
+ *
+ * Returns the number of bytes stored.
+ */
+int
+double_to_shortest_decimal_bufn(double f, char *result)
+{
+ /*
+ * Step 1: Decode the floating-point number, and unify normalized and
+ * subnormal cases.
+ */
+ const uint64 bits = double_to_bits(f);
+
+ /* Decode bits into sign, mantissa, and exponent. */
+ const bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;
+ const uint64 ieeeMantissa = bits & ((UINT64CONST(1) << DOUBLE_MANTISSA_BITS) - 1);
+ const uint32 ieeeExponent = (uint32) ((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1));
+
+ /* Case distinction; exit early for the easy cases. */
+ if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0))
+ {
+ return copy_special_str(result, ieeeSign, (ieeeExponent != 0), (ieeeMantissa != 0));
+ }
+
+ floating_decimal_64 v;
+ const bool isSmallInt = d2d_small_int(ieeeMantissa, ieeeExponent, &v);
+
+ if (!isSmallInt)
+ {
+ v = d2d(ieeeMantissa, ieeeExponent);
+ }
+
+ return to_chars(v, ieeeSign, result);
+}
+
+/*
+ * Store the shortest decimal representation of the given double as a
+ * null-terminated string in the caller's supplied buffer (which must be at
+ * least DOUBLE_SHORTEST_DECIMAL_LEN bytes long).
+ *
+ * Returns the string length.
+ */
+int
+double_to_shortest_decimal_buf(double f, char *result)
+{
+ const int index = double_to_shortest_decimal_bufn(f, result);
+
+ /* Terminate the string. */
+ Assert(index < DOUBLE_SHORTEST_DECIMAL_LEN);
+ result[index] = '\0';
+ return index;
+}
+
+/*
+ * Return the shortest decimal representation as a null-terminated palloc'd
+ * string (outside the backend, uses malloc() instead).
+ *
+ * Caller is responsible for freeing the result.
+ */
+char *
+double_to_shortest_decimal(double f)
+{
+ char *const result = (char *) palloc(DOUBLE_SHORTEST_DECIMAL_LEN);
+
+ double_to_shortest_decimal_buf(f, result);
+ return result;
+}
diff --git a/src/common/d2s_full_table.h b/src/common/d2s_full_table.h
new file mode 100644
index 0000000..23f5e9a
--- /dev/null
+++ b/src/common/d2s_full_table.h
@@ -0,0 +1,358 @@
+/*---------------------------------------------------------------------------
+ *
+ * Ryu floating-point output for double precision.
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/d2s_full_table.h
+ *
+ * This is a modification of code taken from github.com/ulfjack/ryu under the
+ * terms of the Boost license (not the Apache license). The original copyright
+ * notice follows:
+ *
+ * Copyright 2018 Ulf Adams
+ *
+ * The contents of this file may be used under the terms of the Apache
+ * License, Version 2.0.
+ *
+ * (See accompanying file LICENSE-Apache or copy at
+ * http://www.apache.org/licenses/LICENSE-2.0)
+ *
+ * Alternatively, the contents of this file may be used under the terms of the
+ * Boost Software License, Version 1.0.
+ *
+ * (See accompanying file LICENSE-Boost or copy at
+ * https://www.boost.org/LICENSE_1_0.txt)
+ *
+ * Unless required by applicable law or agreed to in writing, this software is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+#ifndef RYU_D2S_FULL_TABLE_H
+#define RYU_D2S_FULL_TABLE_H
+
+/*
+ * These tables are generated (by the upstream) using PrintDoubleLookupTable
+ * from the upstream sources at github.com/ulfjack/ryu, and then modified (by
+ * us) by adding UINT64CONST.
+ */
+static const uint64 DOUBLE_POW5_INV_SPLIT[292][2] = {
+ {UINT64CONST(1), UINT64CONST(288230376151711744)}, {UINT64CONST(3689348814741910324), UINT64CONST(230584300921369395)},
+ {UINT64CONST(2951479051793528259), UINT64CONST(184467440737095516)}, {UINT64CONST(17118578500402463900), UINT64CONST(147573952589676412)},
+ {UINT64CONST(12632330341676300947), UINT64CONST(236118324143482260)}, {UINT64CONST(10105864273341040758), UINT64CONST(188894659314785808)},
+ {UINT64CONST(15463389048156653253), UINT64CONST(151115727451828646)}, {UINT64CONST(17362724847566824558), UINT64CONST(241785163922925834)},
+ {UINT64CONST(17579528692795369969), UINT64CONST(193428131138340667)}, {UINT64CONST(6684925324752475329), UINT64CONST(154742504910672534)},
+ {UINT64CONST(18074578149087781173), UINT64CONST(247588007857076054)}, {UINT64CONST(18149011334012135262), UINT64CONST(198070406285660843)},
+ {UINT64CONST(3451162622983977240), UINT64CONST(158456325028528675)}, {UINT64CONST(5521860196774363583), UINT64CONST(253530120045645880)},
+ {UINT64CONST(4417488157419490867), UINT64CONST(202824096036516704)}, {UINT64CONST(7223339340677503017), UINT64CONST(162259276829213363)},
+ {UINT64CONST(7867994130342094503), UINT64CONST(259614842926741381)}, {UINT64CONST(2605046489531765280), UINT64CONST(207691874341393105)},
+ {UINT64CONST(2084037191625412224), UINT64CONST(166153499473114484)}, {UINT64CONST(10713157136084480204), UINT64CONST(265845599156983174)},
+ {UINT64CONST(12259874523609494487), UINT64CONST(212676479325586539)}, {UINT64CONST(13497248433629505913), UINT64CONST(170141183460469231)},
+ {UINT64CONST(14216899864323388813), UINT64CONST(272225893536750770)}, {UINT64CONST(11373519891458711051), UINT64CONST(217780714829400616)},
+ {UINT64CONST(5409467098425058518), UINT64CONST(174224571863520493)}, {UINT64CONST(4965798542738183305), UINT64CONST(278759314981632789)},
+ {UINT64CONST(7661987648932456967), UINT64CONST(223007451985306231)}, {UINT64CONST(2440241304404055250), UINT64CONST(178405961588244985)},
+ {UINT64CONST(3904386087046488400), UINT64CONST(285449538541191976)}, {UINT64CONST(17880904128604832013), UINT64CONST(228359630832953580)},
+ {UINT64CONST(14304723302883865611), UINT64CONST(182687704666362864)}, {UINT64CONST(15133127457049002812), UINT64CONST(146150163733090291)},
+ {UINT64CONST(16834306301794583852), UINT64CONST(233840261972944466)}, {UINT64CONST(9778096226693756759), UINT64CONST(187072209578355573)},
+ {UINT64CONST(15201174610838826053), UINT64CONST(149657767662684458)}, {UINT64CONST(2185786488890659746), UINT64CONST(239452428260295134)},
+ {UINT64CONST(5437978005854438120), UINT64CONST(191561942608236107)}, {UINT64CONST(15418428848909281466), UINT64CONST(153249554086588885)},
+ {UINT64CONST(6222742084545298729), UINT64CONST(245199286538542217)}, {UINT64CONST(16046240111861969953), UINT64CONST(196159429230833773)},
+ {UINT64CONST(1768945645263844993), UINT64CONST(156927543384667019)}, {UINT64CONST(10209010661905972635), UINT64CONST(251084069415467230)},
+ {UINT64CONST(8167208529524778108), UINT64CONST(200867255532373784)}, {UINT64CONST(10223115638361732810), UINT64CONST(160693804425899027)},
+ {UINT64CONST(1599589762411131202), UINT64CONST(257110087081438444)}, {UINT64CONST(4969020624670815285), UINT64CONST(205688069665150755)},
+ {UINT64CONST(3975216499736652228), UINT64CONST(164550455732120604)}, {UINT64CONST(13739044029062464211), UINT64CONST(263280729171392966)},
+ {UINT64CONST(7301886408508061046), UINT64CONST(210624583337114373)}, {UINT64CONST(13220206756290269483), UINT64CONST(168499666669691498)},
+ {UINT64CONST(17462981995322520850), UINT64CONST(269599466671506397)}, {UINT64CONST(6591687966774196033), UINT64CONST(215679573337205118)},
+ {UINT64CONST(12652048002903177473), UINT64CONST(172543658669764094)}, {UINT64CONST(9175230360419352987), UINT64CONST(276069853871622551)},
+ {UINT64CONST(3650835473593572067), UINT64CONST(220855883097298041)}, {UINT64CONST(17678063637842498946), UINT64CONST(176684706477838432)},
+ {UINT64CONST(13527506561580357021), UINT64CONST(282695530364541492)}, {UINT64CONST(3443307619780464970), UINT64CONST(226156424291633194)},
+ {UINT64CONST(6443994910566282300), UINT64CONST(180925139433306555)}, {UINT64CONST(5155195928453025840), UINT64CONST(144740111546645244)},
+ {UINT64CONST(15627011115008661990), UINT64CONST(231584178474632390)}, {UINT64CONST(12501608892006929592), UINT64CONST(185267342779705912)},
+ {UINT64CONST(2622589484121723027), UINT64CONST(148213874223764730)}, {UINT64CONST(4196143174594756843), UINT64CONST(237142198758023568)},
+ {UINT64CONST(10735612169159626121), UINT64CONST(189713759006418854)}, {UINT64CONST(12277838550069611220), UINT64CONST(151771007205135083)},
+ {UINT64CONST(15955192865369467629), UINT64CONST(242833611528216133)}, {UINT64CONST(1696107848069843133), UINT64CONST(194266889222572907)},
+ {UINT64CONST(12424932722681605476), UINT64CONST(155413511378058325)}, {UINT64CONST(1433148282581017146), UINT64CONST(248661618204893321)},
+ {UINT64CONST(15903913885032455010), UINT64CONST(198929294563914656)}, {UINT64CONST(9033782293284053685), UINT64CONST(159143435651131725)},
+ {UINT64CONST(14454051669254485895), UINT64CONST(254629497041810760)}, {UINT64CONST(11563241335403588716), UINT64CONST(203703597633448608)},
+ {UINT64CONST(16629290697806691620), UINT64CONST(162962878106758886)}, {UINT64CONST(781423413297334329), UINT64CONST(260740604970814219)},
+ {UINT64CONST(4314487545379777786), UINT64CONST(208592483976651375)}, {UINT64CONST(3451590036303822229), UINT64CONST(166873987181321100)},
+ {UINT64CONST(5522544058086115566), UINT64CONST(266998379490113760)}, {UINT64CONST(4418035246468892453), UINT64CONST(213598703592091008)},
+ {UINT64CONST(10913125826658934609), UINT64CONST(170878962873672806)}, {UINT64CONST(10082303693170474728), UINT64CONST(273406340597876490)},
+ {UINT64CONST(8065842954536379782), UINT64CONST(218725072478301192)}, {UINT64CONST(17520720807854834795), UINT64CONST(174980057982640953)},
+ {UINT64CONST(5897060404116273733), UINT64CONST(279968092772225526)}, {UINT64CONST(1028299508551108663), UINT64CONST(223974474217780421)},
+ {UINT64CONST(15580034865808528224), UINT64CONST(179179579374224336)}, {UINT64CONST(17549358155809824511), UINT64CONST(286687326998758938)},
+ {UINT64CONST(2971440080422128639), UINT64CONST(229349861599007151)}, {UINT64CONST(17134547323305344204), UINT64CONST(183479889279205720)},
+ {UINT64CONST(13707637858644275364), UINT64CONST(146783911423364576)}, {UINT64CONST(14553522944347019935), UINT64CONST(234854258277383322)},
+ {UINT64CONST(4264120725993795302), UINT64CONST(187883406621906658)}, {UINT64CONST(10789994210278856888), UINT64CONST(150306725297525326)},
+ {UINT64CONST(9885293106962350374), UINT64CONST(240490760476040522)}, {UINT64CONST(529536856086059653), UINT64CONST(192392608380832418)},
+ {UINT64CONST(7802327114352668369), UINT64CONST(153914086704665934)}, {UINT64CONST(1415676938738538420), UINT64CONST(246262538727465495)},
+ {UINT64CONST(1132541550990830736), UINT64CONST(197010030981972396)}, {UINT64CONST(15663428499760305882), UINT64CONST(157608024785577916)},
+ {UINT64CONST(17682787970132668764), UINT64CONST(252172839656924666)}, {UINT64CONST(10456881561364224688), UINT64CONST(201738271725539733)},
+ {UINT64CONST(15744202878575200397), UINT64CONST(161390617380431786)}, {UINT64CONST(17812026976236499989), UINT64CONST(258224987808690858)},
+ {UINT64CONST(3181575136763469022), UINT64CONST(206579990246952687)}, {UINT64CONST(13613306553636506187), UINT64CONST(165263992197562149)},
+ {UINT64CONST(10713244041592678929), UINT64CONST(264422387516099439)}, {UINT64CONST(12259944048016053467), UINT64CONST(211537910012879551)},
+ {UINT64CONST(6118606423670932450), UINT64CONST(169230328010303641)}, {UINT64CONST(2411072648389671274), UINT64CONST(270768524816485826)},
+ {UINT64CONST(16686253377679378312), UINT64CONST(216614819853188660)}, {UINT64CONST(13349002702143502650), UINT64CONST(173291855882550928)},
+ {UINT64CONST(17669055508687693916), UINT64CONST(277266969412081485)}, {UINT64CONST(14135244406950155133), UINT64CONST(221813575529665188)},
+ {UINT64CONST(240149081334393137), UINT64CONST(177450860423732151)}, {UINT64CONST(11452284974360759988), UINT64CONST(283921376677971441)},
+ {UINT64CONST(5472479164746697667), UINT64CONST(227137101342377153)}, {UINT64CONST(11756680961281178780), UINT64CONST(181709681073901722)},
+ {UINT64CONST(2026647139541122378), UINT64CONST(145367744859121378)}, {UINT64CONST(18000030682233437097), UINT64CONST(232588391774594204)},
+ {UINT64CONST(18089373360528660001), UINT64CONST(186070713419675363)}, {UINT64CONST(3403452244197197031), UINT64CONST(148856570735740291)},
+ {UINT64CONST(16513570034941246220), UINT64CONST(238170513177184465)}, {UINT64CONST(13210856027952996976), UINT64CONST(190536410541747572)},
+ {UINT64CONST(3189987192878576934), UINT64CONST(152429128433398058)}, {UINT64CONST(1414630693863812771), UINT64CONST(243886605493436893)},
+ {UINT64CONST(8510402184574870864), UINT64CONST(195109284394749514)}, {UINT64CONST(10497670562401807014), UINT64CONST(156087427515799611)},
+ {UINT64CONST(9417575270359070576), UINT64CONST(249739884025279378)}, {UINT64CONST(14912757845771077107), UINT64CONST(199791907220223502)},
+ {UINT64CONST(4551508647133041040), UINT64CONST(159833525776178802)}, {UINT64CONST(10971762650154775986), UINT64CONST(255733641241886083)},
+ {UINT64CONST(16156107749607641435), UINT64CONST(204586912993508866)}, {UINT64CONST(9235537384944202825), UINT64CONST(163669530394807093)},
+ {UINT64CONST(11087511001168814197), UINT64CONST(261871248631691349)}, {UINT64CONST(12559357615676961681), UINT64CONST(209496998905353079)},
+ {UINT64CONST(13736834907283479668), UINT64CONST(167597599124282463)}, {UINT64CONST(18289587036911657145), UINT64CONST(268156158598851941)},
+ {UINT64CONST(10942320814787415393), UINT64CONST(214524926879081553)}, {UINT64CONST(16132554281313752961), UINT64CONST(171619941503265242)},
+ {UINT64CONST(11054691591134363444), UINT64CONST(274591906405224388)}, {UINT64CONST(16222450902391311402), UINT64CONST(219673525124179510)},
+ {UINT64CONST(12977960721913049122), UINT64CONST(175738820099343608)}, {UINT64CONST(17075388340318968271), UINT64CONST(281182112158949773)},
+ {UINT64CONST(2592264228029443648), UINT64CONST(224945689727159819)}, {UINT64CONST(5763160197165465241), UINT64CONST(179956551781727855)},
+ {UINT64CONST(9221056315464744386), UINT64CONST(287930482850764568)}, {UINT64CONST(14755542681855616155), UINT64CONST(230344386280611654)},
+ {UINT64CONST(15493782960226403247), UINT64CONST(184275509024489323)}, {UINT64CONST(1326979923955391628), UINT64CONST(147420407219591459)},
+ {UINT64CONST(9501865507812447252), UINT64CONST(235872651551346334)}, {UINT64CONST(11290841220991868125), UINT64CONST(188698121241077067)},
+ {UINT64CONST(1653975347309673853), UINT64CONST(150958496992861654)}, {UINT64CONST(10025058185179298811), UINT64CONST(241533595188578646)},
+ {UINT64CONST(4330697733401528726), UINT64CONST(193226876150862917)}, {UINT64CONST(14532604630946953951), UINT64CONST(154581500920690333)},
+ {UINT64CONST(1116074521063664381), UINT64CONST(247330401473104534)}, {UINT64CONST(4582208431592841828), UINT64CONST(197864321178483627)},
+ {UINT64CONST(14733813189500004432), UINT64CONST(158291456942786901)}, {UINT64CONST(16195403473716186445), UINT64CONST(253266331108459042)},
+ {UINT64CONST(5577625149489128510), UINT64CONST(202613064886767234)}, {UINT64CONST(8151448934333213131), UINT64CONST(162090451909413787)},
+ {UINT64CONST(16731667109675051333), UINT64CONST(259344723055062059)}, {UINT64CONST(17074682502481951390), UINT64CONST(207475778444049647)},
+ {UINT64CONST(6281048372501740465), UINT64CONST(165980622755239718)}, {UINT64CONST(6360328581260874421), UINT64CONST(265568996408383549)},
+ {UINT64CONST(8777611679750609860), UINT64CONST(212455197126706839)}, {UINT64CONST(10711438158542398211), UINT64CONST(169964157701365471)},
+ {UINT64CONST(9759603424184016492), UINT64CONST(271942652322184754)}, {UINT64CONST(11497031554089123517), UINT64CONST(217554121857747803)},
+ {UINT64CONST(16576322872755119460), UINT64CONST(174043297486198242)}, {UINT64CONST(11764721337440549842), UINT64CONST(278469275977917188)},
+ {UINT64CONST(16790474699436260520), UINT64CONST(222775420782333750)}, {UINT64CONST(13432379759549008416), UINT64CONST(178220336625867000)},
+ {UINT64CONST(3045063541568861850), UINT64CONST(285152538601387201)}, {UINT64CONST(17193446092222730773), UINT64CONST(228122030881109760)},
+ {UINT64CONST(13754756873778184618), UINT64CONST(182497624704887808)}, {UINT64CONST(18382503128506368341), UINT64CONST(145998099763910246)},
+ {UINT64CONST(3586563302416817083), UINT64CONST(233596959622256395)}, {UINT64CONST(2869250641933453667), UINT64CONST(186877567697805116)},
+ {UINT64CONST(17052795772514404226), UINT64CONST(149502054158244092)}, {UINT64CONST(12527077977055405469), UINT64CONST(239203286653190548)},
+ {UINT64CONST(17400360011128145022), UINT64CONST(191362629322552438)}, {UINT64CONST(2852241564676785048), UINT64CONST(153090103458041951)},
+ {UINT64CONST(15631632947708587046), UINT64CONST(244944165532867121)}, {UINT64CONST(8815957543424959314), UINT64CONST(195955332426293697)},
+ {UINT64CONST(18120812478965698421), UINT64CONST(156764265941034957)}, {UINT64CONST(14235904707377476180), UINT64CONST(250822825505655932)},
+ {UINT64CONST(4010026136418160298), UINT64CONST(200658260404524746)}, {UINT64CONST(17965416168102169531), UINT64CONST(160526608323619796)},
+ {UINT64CONST(2919224165770098987), UINT64CONST(256842573317791675)}, {UINT64CONST(2335379332616079190), UINT64CONST(205474058654233340)},
+ {UINT64CONST(1868303466092863352), UINT64CONST(164379246923386672)}, {UINT64CONST(6678634360490491686), UINT64CONST(263006795077418675)},
+ {UINT64CONST(5342907488392393349), UINT64CONST(210405436061934940)}, {UINT64CONST(4274325990713914679), UINT64CONST(168324348849547952)},
+ {UINT64CONST(10528270399884173809), UINT64CONST(269318958159276723)}, {UINT64CONST(15801313949391159694), UINT64CONST(215455166527421378)},
+ {UINT64CONST(1573004715287196786), UINT64CONST(172364133221937103)}, {UINT64CONST(17274202803427156150), UINT64CONST(275782613155099364)},
+ {UINT64CONST(17508711057483635243), UINT64CONST(220626090524079491)}, {UINT64CONST(10317620031244997871), UINT64CONST(176500872419263593)},
+ {UINT64CONST(12818843235250086271), UINT64CONST(282401395870821749)}, {UINT64CONST(13944423402941979340), UINT64CONST(225921116696657399)},
+ {UINT64CONST(14844887537095493795), UINT64CONST(180736893357325919)}, {UINT64CONST(15565258844418305359), UINT64CONST(144589514685860735)},
+ {UINT64CONST(6457670077359736959), UINT64CONST(231343223497377177)}, {UINT64CONST(16234182506113520537), UINT64CONST(185074578797901741)},
+ {UINT64CONST(9297997190148906106), UINT64CONST(148059663038321393)}, {UINT64CONST(11187446689496339446), UINT64CONST(236895460861314229)},
+ {UINT64CONST(12639306166338981880), UINT64CONST(189516368689051383)}, {UINT64CONST(17490142562555006151), UINT64CONST(151613094951241106)},
+ {UINT64CONST(2158786396894637579), UINT64CONST(242580951921985771)}, {UINT64CONST(16484424376483351356), UINT64CONST(194064761537588616)},
+ {UINT64CONST(9498190686444770762), UINT64CONST(155251809230070893)}, {UINT64CONST(11507756283569722895), UINT64CONST(248402894768113429)},
+ {UINT64CONST(12895553841597688639), UINT64CONST(198722315814490743)}, {UINT64CONST(17695140702761971558), UINT64CONST(158977852651592594)},
+ {UINT64CONST(17244178680193423523), UINT64CONST(254364564242548151)}, {UINT64CONST(10105994129412828495), UINT64CONST(203491651394038521)},
+ {UINT64CONST(4395446488788352473), UINT64CONST(162793321115230817)}, {UINT64CONST(10722063196803274280), UINT64CONST(260469313784369307)},
+ {UINT64CONST(1198952927958798777), UINT64CONST(208375451027495446)}, {UINT64CONST(15716557601334680315), UINT64CONST(166700360821996356)},
+ {UINT64CONST(17767794532651667857), UINT64CONST(266720577315194170)}, {UINT64CONST(14214235626121334286), UINT64CONST(213376461852155336)},
+ {UINT64CONST(7682039686155157106), UINT64CONST(170701169481724269)}, {UINT64CONST(1223217053622520399), UINT64CONST(273121871170758831)},
+ {UINT64CONST(15735968901865657612), UINT64CONST(218497496936607064)}, {UINT64CONST(16278123936234436413), UINT64CONST(174797997549285651)},
+ {UINT64CONST(219556594781725998), UINT64CONST(279676796078857043)}, {UINT64CONST(7554342905309201445), UINT64CONST(223741436863085634)},
+ {UINT64CONST(9732823138989271479), UINT64CONST(178993149490468507)}, {UINT64CONST(815121763415193074), UINT64CONST(286389039184749612)},
+ {UINT64CONST(11720143854957885429), UINT64CONST(229111231347799689)}, {UINT64CONST(13065463898708218666), UINT64CONST(183288985078239751)},
+ {UINT64CONST(6763022304224664610), UINT64CONST(146631188062591801)}, {UINT64CONST(3442138057275642729), UINT64CONST(234609900900146882)},
+ {UINT64CONST(13821756890046245153), UINT64CONST(187687920720117505)}, {UINT64CONST(11057405512036996122), UINT64CONST(150150336576094004)},
+ {UINT64CONST(6623802375033462826), UINT64CONST(240240538521750407)}, {UINT64CONST(16367088344252501231), UINT64CONST(192192430817400325)},
+ {UINT64CONST(13093670675402000985), UINT64CONST(153753944653920260)}, {UINT64CONST(2503129006933649959), UINT64CONST(246006311446272417)},
+ {UINT64CONST(13070549649772650937), UINT64CONST(196805049157017933)}, {UINT64CONST(17835137349301941396), UINT64CONST(157444039325614346)},
+ {UINT64CONST(2710778055689733971), UINT64CONST(251910462920982955)}, {UINT64CONST(2168622444551787177), UINT64CONST(201528370336786364)},
+ {UINT64CONST(5424246770383340065), UINT64CONST(161222696269429091)}, {UINT64CONST(1300097203129523457), UINT64CONST(257956314031086546)},
+ {UINT64CONST(15797473021471260058), UINT64CONST(206365051224869236)}, {UINT64CONST(8948629602435097724), UINT64CONST(165092040979895389)},
+ {UINT64CONST(3249760919670425388), UINT64CONST(264147265567832623)}, {UINT64CONST(9978506365220160957), UINT64CONST(211317812454266098)},
+ {UINT64CONST(15361502721659949412), UINT64CONST(169054249963412878)}, {UINT64CONST(2442311466204457120), UINT64CONST(270486799941460606)},
+ {UINT64CONST(16711244431931206989), UINT64CONST(216389439953168484)}, {UINT64CONST(17058344360286875914), UINT64CONST(173111551962534787)},
+ {UINT64CONST(12535955717491360170), UINT64CONST(276978483140055660)}, {UINT64CONST(10028764573993088136), UINT64CONST(221582786512044528)},
+ {UINT64CONST(15401709288678291155), UINT64CONST(177266229209635622)}, {UINT64CONST(9885339602917624555), UINT64CONST(283625966735416996)},
+ {UINT64CONST(4218922867592189321), UINT64CONST(226900773388333597)}, {UINT64CONST(14443184738299482427), UINT64CONST(181520618710666877)},
+ {UINT64CONST(4175850161155765295), UINT64CONST(145216494968533502)}, {UINT64CONST(10370709072591134795), UINT64CONST(232346391949653603)},
+ {UINT64CONST(15675264887556728482), UINT64CONST(185877113559722882)}, {UINT64CONST(5161514280561562140), UINT64CONST(148701690847778306)},
+ {UINT64CONST(879725219414678777), UINT64CONST(237922705356445290)}, {UINT64CONST(703780175531743021), UINT64CONST(190338164285156232)},
+ {UINT64CONST(11631070584651125387), UINT64CONST(152270531428124985)}, {UINT64CONST(162968861732249003), UINT64CONST(243632850284999977)},
+ {UINT64CONST(11198421533611530172), UINT64CONST(194906280227999981)}, {UINT64CONST(5269388412147313814), UINT64CONST(155925024182399985)},
+ {UINT64CONST(8431021459435702103), UINT64CONST(249480038691839976)}, {UINT64CONST(3055468352806651359), UINT64CONST(199584030953471981)},
+ {UINT64CONST(17201769941212962380), UINT64CONST(159667224762777584)}, {UINT64CONST(16454785461715008838), UINT64CONST(255467559620444135)},
+ {UINT64CONST(13163828369372007071), UINT64CONST(204374047696355308)}, {UINT64CONST(17909760324981426303), UINT64CONST(163499238157084246)},
+ {UINT64CONST(2830174816776909822), UINT64CONST(261598781051334795)}, {UINT64CONST(2264139853421527858), UINT64CONST(209279024841067836)},
+ {UINT64CONST(16568707141704863579), UINT64CONST(167423219872854268)}, {UINT64CONST(4373838538276319787), UINT64CONST(267877151796566830)},
+ {UINT64CONST(3499070830621055830), UINT64CONST(214301721437253464)}, {UINT64CONST(6488605479238754987), UINT64CONST(171441377149802771)},
+ {UINT64CONST(3003071137298187333), UINT64CONST(274306203439684434)}, {UINT64CONST(6091805724580460189), UINT64CONST(219444962751747547)},
+ {UINT64CONST(15941491023890099121), UINT64CONST(175555970201398037)}, {UINT64CONST(10748990379256517301), UINT64CONST(280889552322236860)},
+ {UINT64CONST(8599192303405213841), UINT64CONST(224711641857789488)}, {UINT64CONST(14258051472207991719), UINT64CONST(179769313486231590)}
+};
+
+static const uint64 DOUBLE_POW5_SPLIT[326][2] = {
+ {UINT64CONST(0), UINT64CONST(72057594037927936)}, {UINT64CONST(0), UINT64CONST(90071992547409920)},
+ {UINT64CONST(0), UINT64CONST(112589990684262400)}, {UINT64CONST(0), UINT64CONST(140737488355328000)},
+ {UINT64CONST(0), UINT64CONST(87960930222080000)}, {UINT64CONST(0), UINT64CONST(109951162777600000)},
+ {UINT64CONST(0), UINT64CONST(137438953472000000)}, {UINT64CONST(0), UINT64CONST(85899345920000000)},
+ {UINT64CONST(0), UINT64CONST(107374182400000000)}, {UINT64CONST(0), UINT64CONST(134217728000000000)},
+ {UINT64CONST(0), UINT64CONST(83886080000000000)}, {UINT64CONST(0), UINT64CONST(104857600000000000)},
+ {UINT64CONST(0), UINT64CONST(131072000000000000)}, {UINT64CONST(0), UINT64CONST(81920000000000000)},
+ {UINT64CONST(0), UINT64CONST(102400000000000000)}, {UINT64CONST(0), UINT64CONST(128000000000000000)},
+ {UINT64CONST(0), UINT64CONST(80000000000000000)}, {UINT64CONST(0), UINT64CONST(100000000000000000)},
+ {UINT64CONST(0), UINT64CONST(125000000000000000)}, {UINT64CONST(0), UINT64CONST(78125000000000000)},
+ {UINT64CONST(0), UINT64CONST(97656250000000000)}, {UINT64CONST(0), UINT64CONST(122070312500000000)},
+ {UINT64CONST(0), UINT64CONST(76293945312500000)}, {UINT64CONST(0), UINT64CONST(95367431640625000)},
+ {UINT64CONST(0), UINT64CONST(119209289550781250)}, {UINT64CONST(4611686018427387904), UINT64CONST(74505805969238281)},
+ {UINT64CONST(10376293541461622784), UINT64CONST(93132257461547851)}, {UINT64CONST(8358680908399640576), UINT64CONST(116415321826934814)},
+ {UINT64CONST(612489549322387456), UINT64CONST(72759576141834259)}, {UINT64CONST(14600669991935148032), UINT64CONST(90949470177292823)},
+ {UINT64CONST(13639151471491547136), UINT64CONST(113686837721616029)}, {UINT64CONST(3213881284082270208), UINT64CONST(142108547152020037)},
+ {UINT64CONST(4314518811765112832), UINT64CONST(88817841970012523)}, {UINT64CONST(781462496279003136), UINT64CONST(111022302462515654)},
+ {UINT64CONST(10200200157203529728), UINT64CONST(138777878078144567)}, {UINT64CONST(13292654125893287936), UINT64CONST(86736173798840354)},
+ {UINT64CONST(7392445620511834112), UINT64CONST(108420217248550443)}, {UINT64CONST(4628871007212404736), UINT64CONST(135525271560688054)},
+ {UINT64CONST(16728102434789916672), UINT64CONST(84703294725430033)}, {UINT64CONST(7075069988205232128), UINT64CONST(105879118406787542)},
+ {UINT64CONST(18067209522111315968), UINT64CONST(132348898008484427)}, {UINT64CONST(8986162942105878528), UINT64CONST(82718061255302767)},
+ {UINT64CONST(6621017659204960256), UINT64CONST(103397576569128459)}, {UINT64CONST(3664586055578812416), UINT64CONST(129246970711410574)},
+ {UINT64CONST(16125424340018921472), UINT64CONST(80779356694631608)}, {UINT64CONST(1710036351314100224), UINT64CONST(100974195868289511)},
+ {UINT64CONST(15972603494424788992), UINT64CONST(126217744835361888)}, {UINT64CONST(9982877184015493120), UINT64CONST(78886090522101180)},
+ {UINT64CONST(12478596480019366400), UINT64CONST(98607613152626475)}, {UINT64CONST(10986559581596820096), UINT64CONST(123259516440783094)},
+ {UINT64CONST(2254913720070624656), UINT64CONST(77037197775489434)}, {UINT64CONST(12042014186943056628), UINT64CONST(96296497219361792)},
+ {UINT64CONST(15052517733678820785), UINT64CONST(120370621524202240)}, {UINT64CONST(9407823583549262990), UINT64CONST(75231638452626400)},
+ {UINT64CONST(11759779479436578738), UINT64CONST(94039548065783000)}, {UINT64CONST(14699724349295723422), UINT64CONST(117549435082228750)},
+ {UINT64CONST(4575641699882439235), UINT64CONST(73468396926392969)}, {UINT64CONST(10331238143280436948), UINT64CONST(91835496157991211)},
+ {UINT64CONST(8302361660673158281), UINT64CONST(114794370197489014)}, {UINT64CONST(1154580038986672043), UINT64CONST(143492962746861268)},
+ {UINT64CONST(9944984561221445835), UINT64CONST(89683101716788292)}, {UINT64CONST(12431230701526807293), UINT64CONST(112103877145985365)},
+ {UINT64CONST(1703980321626345405), UINT64CONST(140129846432481707)}, {UINT64CONST(17205888765512323542), UINT64CONST(87581154020301066)},
+ {UINT64CONST(12283988920035628619), UINT64CONST(109476442525376333)}, {UINT64CONST(1519928094762372062), UINT64CONST(136845553156720417)},
+ {UINT64CONST(12479170105294952299), UINT64CONST(85528470722950260)}, {UINT64CONST(15598962631618690374), UINT64CONST(106910588403687825)},
+ {UINT64CONST(5663645234241199255), UINT64CONST(133638235504609782)}, {UINT64CONST(17374836326682913246), UINT64CONST(83523897190381113)},
+ {UINT64CONST(7883487353071477846), UINT64CONST(104404871487976392)}, {UINT64CONST(9854359191339347308), UINT64CONST(130506089359970490)},
+ {UINT64CONST(10770660513014479971), UINT64CONST(81566305849981556)}, {UINT64CONST(13463325641268099964), UINT64CONST(101957882312476945)},
+ {UINT64CONST(2994098996302961243), UINT64CONST(127447352890596182)}, {UINT64CONST(15706369927971514489), UINT64CONST(79654595556622613)},
+ {UINT64CONST(5797904354682229399), UINT64CONST(99568244445778267)}, {UINT64CONST(2635694424925398845), UINT64CONST(124460305557222834)},
+ {UINT64CONST(6258995034005762182), UINT64CONST(77787690973264271)}, {UINT64CONST(3212057774079814824), UINT64CONST(97234613716580339)},
+ {UINT64CONST(17850130272881932242), UINT64CONST(121543267145725423)}, {UINT64CONST(18073860448192289507), UINT64CONST(75964541966078389)},
+ {UINT64CONST(8757267504958198172), UINT64CONST(94955677457597987)}, {UINT64CONST(6334898362770359811), UINT64CONST(118694596821997484)},
+ {UINT64CONST(13182683513586250689), UINT64CONST(74184123013748427)}, {UINT64CONST(11866668373555425458), UINT64CONST(92730153767185534)},
+ {UINT64CONST(5609963430089506015), UINT64CONST(115912692208981918)}, {UINT64CONST(17341285199088104971), UINT64CONST(72445432630613698)},
+ {UINT64CONST(12453234462005355406), UINT64CONST(90556790788267123)}, {UINT64CONST(10954857059079306353), UINT64CONST(113195988485333904)},
+ {UINT64CONST(13693571323849132942), UINT64CONST(141494985606667380)}, {UINT64CONST(17781854114260483896), UINT64CONST(88434366004167112)},
+ {UINT64CONST(3780573569116053255), UINT64CONST(110542957505208891)}, {UINT64CONST(114030942967678664), UINT64CONST(138178696881511114)},
+ {UINT64CONST(4682955357782187069), UINT64CONST(86361685550944446)}, {UINT64CONST(15077066234082509644), UINT64CONST(107952106938680557)},
+ {UINT64CONST(5011274737320973344), UINT64CONST(134940133673350697)}, {UINT64CONST(14661261756894078100), UINT64CONST(84337583545844185)},
+ {UINT64CONST(4491519140835433913), UINT64CONST(105421979432305232)}, {UINT64CONST(5614398926044292391), UINT64CONST(131777474290381540)},
+ {UINT64CONST(12732371365632458552), UINT64CONST(82360921431488462)}, {UINT64CONST(6692092170185797382), UINT64CONST(102951151789360578)},
+ {UINT64CONST(17588487249587022536), UINT64CONST(128688939736700722)}, {UINT64CONST(15604490549419276989), UINT64CONST(80430587335437951)},
+ {UINT64CONST(14893927168346708332), UINT64CONST(100538234169297439)}, {UINT64CONST(14005722942005997511), UINT64CONST(125672792711621799)},
+ {UINT64CONST(15671105866394830300), UINT64CONST(78545495444763624)}, {UINT64CONST(1142138259283986260), UINT64CONST(98181869305954531)},
+ {UINT64CONST(15262730879387146537), UINT64CONST(122727336632443163)}, {UINT64CONST(7233363790403272633), UINT64CONST(76704585395276977)},
+ {UINT64CONST(13653390756431478696), UINT64CONST(95880731744096221)}, {UINT64CONST(3231680390257184658), UINT64CONST(119850914680120277)},
+ {UINT64CONST(4325643253124434363), UINT64CONST(74906821675075173)}, {UINT64CONST(10018740084832930858), UINT64CONST(93633527093843966)},
+ {UINT64CONST(3300053069186387764), UINT64CONST(117041908867304958)}, {UINT64CONST(15897591223523656064), UINT64CONST(73151193042065598)},
+ {UINT64CONST(10648616992549794273), UINT64CONST(91438991302581998)}, {UINT64CONST(4087399203832467033), UINT64CONST(114298739128227498)},
+ {UINT64CONST(14332621041645359599), UINT64CONST(142873423910284372)}, {UINT64CONST(18181260187883125557), UINT64CONST(89295889943927732)},
+ {UINT64CONST(4279831161144355331), UINT64CONST(111619862429909666)}, {UINT64CONST(14573160988285219972), UINT64CONST(139524828037387082)},
+ {UINT64CONST(13719911636105650386), UINT64CONST(87203017523366926)}, {UINT64CONST(7926517508277287175), UINT64CONST(109003771904208658)},
+ {UINT64CONST(684774848491833161), UINT64CONST(136254714880260823)}, {UINT64CONST(7345513307948477581), UINT64CONST(85159196800163014)},
+ {UINT64CONST(18405263671790372785), UINT64CONST(106448996000203767)}, {UINT64CONST(18394893571310578077), UINT64CONST(133061245000254709)},
+ {UINT64CONST(13802651491282805250), UINT64CONST(83163278125159193)}, {UINT64CONST(3418256308821342851), UINT64CONST(103954097656448992)},
+ {UINT64CONST(4272820386026678563), UINT64CONST(129942622070561240)}, {UINT64CONST(2670512741266674102), UINT64CONST(81214138794100775)},
+ {UINT64CONST(17173198981865506339), UINT64CONST(101517673492625968)}, {UINT64CONST(3019754653622331308), UINT64CONST(126897091865782461)},
+ {UINT64CONST(4193189667727651020), UINT64CONST(79310682416114038)}, {UINT64CONST(14464859121514339583), UINT64CONST(99138353020142547)},
+ {UINT64CONST(13469387883465536574), UINT64CONST(123922941275178184)}, {UINT64CONST(8418367427165960359), UINT64CONST(77451838296986365)},
+ {UINT64CONST(15134645302384838353), UINT64CONST(96814797871232956)}, {UINT64CONST(471562554271496325), UINT64CONST(121018497339041196)},
+ {UINT64CONST(9518098633274461011), UINT64CONST(75636560836900747)}, {UINT64CONST(7285937273165688360), UINT64CONST(94545701046125934)},
+ {UINT64CONST(18330793628311886258), UINT64CONST(118182126307657417)}, {UINT64CONST(4539216990053847055), UINT64CONST(73863828942285886)},
+ {UINT64CONST(14897393274422084627), UINT64CONST(92329786177857357)}, {UINT64CONST(4786683537745442072), UINT64CONST(115412232722321697)},
+ {UINT64CONST(14520892257159371055), UINT64CONST(72132645451451060)}, {UINT64CONST(18151115321449213818), UINT64CONST(90165806814313825)},
+ {UINT64CONST(8853836096529353561), UINT64CONST(112707258517892282)}, {UINT64CONST(1843923083806916143), UINT64CONST(140884073147365353)},
+ {UINT64CONST(12681666973447792349), UINT64CONST(88052545717103345)}, {UINT64CONST(2017025661527576725), UINT64CONST(110065682146379182)},
+ {UINT64CONST(11744654113764246714), UINT64CONST(137582102682973977)}, {UINT64CONST(422879793461572340), UINT64CONST(85988814176858736)},
+ {UINT64CONST(528599741826965425), UINT64CONST(107486017721073420)}, {UINT64CONST(660749677283706782), UINT64CONST(134357522151341775)},
+ {UINT64CONST(7330497575943398595), UINT64CONST(83973451344588609)}, {UINT64CONST(13774807988356636147), UINT64CONST(104966814180735761)},
+ {UINT64CONST(3383451930163631472), UINT64CONST(131208517725919702)}, {UINT64CONST(15949715511634433382), UINT64CONST(82005323578699813)},
+ {UINT64CONST(6102086334260878016), UINT64CONST(102506654473374767)}, {UINT64CONST(3015921899398709616), UINT64CONST(128133318091718459)},
+ {UINT64CONST(18025852251620051174), UINT64CONST(80083323807324036)}, {UINT64CONST(4085571240815512351), UINT64CONST(100104154759155046)},
+ {UINT64CONST(14330336087874166247), UINT64CONST(125130193448943807)}, {UINT64CONST(15873989082562435760), UINT64CONST(78206370905589879)},
+ {UINT64CONST(15230800334775656796), UINT64CONST(97757963631987349)}, {UINT64CONST(5203442363187407284), UINT64CONST(122197454539984187)},
+ {UINT64CONST(946308467778435600), UINT64CONST(76373409087490117)}, {UINT64CONST(5794571603150432404), UINT64CONST(95466761359362646)},
+ {UINT64CONST(16466586540792816313), UINT64CONST(119333451699203307)}, {UINT64CONST(7985773578781816244), UINT64CONST(74583407312002067)},
+ {UINT64CONST(5370530955049882401), UINT64CONST(93229259140002584)}, {UINT64CONST(6713163693812353001), UINT64CONST(116536573925003230)},
+ {UINT64CONST(18030785363914884337), UINT64CONST(72835358703127018)}, {UINT64CONST(13315109668038829614), UINT64CONST(91044198378908773)},
+ {UINT64CONST(2808829029766373305), UINT64CONST(113805247973635967)}, {UINT64CONST(17346094342490130344), UINT64CONST(142256559967044958)},
+ {UINT64CONST(6229622945628943561), UINT64CONST(88910349979403099)}, {UINT64CONST(3175342663608791547), UINT64CONST(111137937474253874)},
+ {UINT64CONST(13192550366365765242), UINT64CONST(138922421842817342)}, {UINT64CONST(3633657960551215372), UINT64CONST(86826513651760839)},
+ {UINT64CONST(18377130505971182927), UINT64CONST(108533142064701048)}, {UINT64CONST(4524669058754427043), UINT64CONST(135666427580876311)},
+ {UINT64CONST(9745447189362598758), UINT64CONST(84791517238047694)}, {UINT64CONST(2958436949848472639), UINT64CONST(105989396547559618)},
+ {UINT64CONST(12921418224165366607), UINT64CONST(132486745684449522)}, {UINT64CONST(12687572408530742033), UINT64CONST(82804216052780951)},
+ {UINT64CONST(11247779492236039638), UINT64CONST(103505270065976189)}, {UINT64CONST(224666310012885835), UINT64CONST(129381587582470237)},
+ {UINT64CONST(2446259452971747599), UINT64CONST(80863492239043898)}, {UINT64CONST(12281196353069460307), UINT64CONST(101079365298804872)},
+ {UINT64CONST(15351495441336825384), UINT64CONST(126349206623506090)}, {UINT64CONST(14206370669262903769), UINT64CONST(78968254139691306)},
+ {UINT64CONST(8534591299723853903), UINT64CONST(98710317674614133)}, {UINT64CONST(15279925143082205283), UINT64CONST(123387897093267666)},
+ {UINT64CONST(14161639232853766206), UINT64CONST(77117435683292291)}, {UINT64CONST(13090363022639819853), UINT64CONST(96396794604115364)},
+ {UINT64CONST(16362953778299774816), UINT64CONST(120495993255144205)}, {UINT64CONST(12532689120651053212), UINT64CONST(75309995784465128)},
+ {UINT64CONST(15665861400813816515), UINT64CONST(94137494730581410)}, {UINT64CONST(10358954714162494836), UINT64CONST(117671868413226763)},
+ {UINT64CONST(4168503687137865320), UINT64CONST(73544917758266727)}, {UINT64CONST(598943590494943747), UINT64CONST(91931147197833409)},
+ {UINT64CONST(5360365506546067587), UINT64CONST(114913933997291761)}, {UINT64CONST(11312142901609972388), UINT64CONST(143642417496614701)},
+ {UINT64CONST(9375932322719926695), UINT64CONST(89776510935384188)}, {UINT64CONST(11719915403399908368), UINT64CONST(112220638669230235)},
+ {UINT64CONST(10038208235822497557), UINT64CONST(140275798336537794)}, {UINT64CONST(10885566165816448877), UINT64CONST(87672373960336121)},
+ {UINT64CONST(18218643725697949000), UINT64CONST(109590467450420151)}, {UINT64CONST(18161618638695048346), UINT64CONST(136988084313025189)},
+ {UINT64CONST(13656854658398099168), UINT64CONST(85617552695640743)}, {UINT64CONST(12459382304570236056), UINT64CONST(107021940869550929)},
+ {UINT64CONST(1739169825430631358), UINT64CONST(133777426086938662)}, {UINT64CONST(14922039196176308311), UINT64CONST(83610891304336663)},
+ {UINT64CONST(14040862976792997485), UINT64CONST(104513614130420829)}, {UINT64CONST(3716020665709083144), UINT64CONST(130642017663026037)},
+ {UINT64CONST(4628355925281870917), UINT64CONST(81651261039391273)}, {UINT64CONST(10397130925029726550), UINT64CONST(102064076299239091)},
+ {UINT64CONST(8384727637859770284), UINT64CONST(127580095374048864)}, {UINT64CONST(5240454773662356427), UINT64CONST(79737559608780540)},
+ {UINT64CONST(6550568467077945534), UINT64CONST(99671949510975675)}, {UINT64CONST(3576524565420044014), UINT64CONST(124589936888719594)},
+ {UINT64CONST(6847013871814915412), UINT64CONST(77868710555449746)}, {UINT64CONST(17782139376623420074), UINT64CONST(97335888194312182)},
+ {UINT64CONST(13004302183924499284), UINT64CONST(121669860242890228)}, {UINT64CONST(17351060901807587860), UINT64CONST(76043662651806392)},
+ {UINT64CONST(3242082053549933210), UINT64CONST(95054578314757991)}, {UINT64CONST(17887660622219580224), UINT64CONST(118818222893447488)},
+ {UINT64CONST(11179787888887237640), UINT64CONST(74261389308404680)}, {UINT64CONST(13974734861109047050), UINT64CONST(92826736635505850)},
+ {UINT64CONST(8245046539531533005), UINT64CONST(116033420794382313)}, {UINT64CONST(16682369133275677888), UINT64CONST(72520887996488945)},
+ {UINT64CONST(7017903361312433648), UINT64CONST(90651109995611182)}, {UINT64CONST(17995751238495317868), UINT64CONST(113313887494513977)},
+ {UINT64CONST(8659630992836983623), UINT64CONST(141642359368142472)}, {UINT64CONST(5412269370523114764), UINT64CONST(88526474605089045)},
+ {UINT64CONST(11377022731581281359), UINT64CONST(110658093256361306)}, {UINT64CONST(4997906377621825891), UINT64CONST(138322616570451633)},
+ {UINT64CONST(14652906532082110942), UINT64CONST(86451635356532270)}, {UINT64CONST(9092761128247862869), UINT64CONST(108064544195665338)},
+ {UINT64CONST(2142579373455052779), UINT64CONST(135080680244581673)}, {UINT64CONST(12868327154477877747), UINT64CONST(84425425152863545)},
+ {UINT64CONST(2250350887815183471), UINT64CONST(105531781441079432)}, {UINT64CONST(2812938609768979339), UINT64CONST(131914726801349290)},
+ {UINT64CONST(6369772649532999991), UINT64CONST(82446704250843306)}, {UINT64CONST(17185587848771025797), UINT64CONST(103058380313554132)},
+ {UINT64CONST(3035240737254230630), UINT64CONST(128822975391942666)}, {UINT64CONST(6508711479211282048), UINT64CONST(80514359619964166)},
+ {UINT64CONST(17359261385868878368), UINT64CONST(100642949524955207)}, {UINT64CONST(17087390713908710056), UINT64CONST(125803686906194009)},
+ {UINT64CONST(3762090168551861929), UINT64CONST(78627304316371256)}, {UINT64CONST(4702612710689827411), UINT64CONST(98284130395464070)},
+ {UINT64CONST(15101637925217060072), UINT64CONST(122855162994330087)}, {UINT64CONST(16356052730901744401), UINT64CONST(76784476871456304)},
+ {UINT64CONST(1998321839917628885), UINT64CONST(95980596089320381)}, {UINT64CONST(7109588318324424010), UINT64CONST(119975745111650476)},
+ {UINT64CONST(13666864735807540814), UINT64CONST(74984840694781547)}, {UINT64CONST(12471894901332038114), UINT64CONST(93731050868476934)},
+ {UINT64CONST(6366496589810271835), UINT64CONST(117163813585596168)}, {UINT64CONST(3979060368631419896), UINT64CONST(73227383490997605)},
+ {UINT64CONST(9585511479216662775), UINT64CONST(91534229363747006)}, {UINT64CONST(2758517312166052660), UINT64CONST(114417786704683758)},
+ {UINT64CONST(12671518677062341634), UINT64CONST(143022233380854697)}, {UINT64CONST(1002170145522881665), UINT64CONST(89388895863034186)},
+ {UINT64CONST(10476084718758377889), UINT64CONST(111736119828792732)}, {UINT64CONST(13095105898447972362), UINT64CONST(139670149785990915)},
+ {UINT64CONST(5878598177316288774), UINT64CONST(87293843616244322)}, {UINT64CONST(16571619758500136775), UINT64CONST(109117304520305402)},
+ {UINT64CONST(11491152661270395161), UINT64CONST(136396630650381753)}, {UINT64CONST(264441385652915120), UINT64CONST(85247894156488596)},
+ {UINT64CONST(330551732066143900), UINT64CONST(106559867695610745)}, {UINT64CONST(5024875683510067779), UINT64CONST(133199834619513431)},
+ {UINT64CONST(10058076329834874218), UINT64CONST(83249896637195894)}, {UINT64CONST(3349223375438816964), UINT64CONST(104062370796494868)},
+ {UINT64CONST(4186529219298521205), UINT64CONST(130077963495618585)}, {UINT64CONST(14145795808130045513), UINT64CONST(81298727184761615)},
+ {UINT64CONST(13070558741735168987), UINT64CONST(101623408980952019)}, {UINT64CONST(11726512408741573330), UINT64CONST(127029261226190024)},
+ {UINT64CONST(7329070255463483331), UINT64CONST(79393288266368765)}, {UINT64CONST(13773023837756742068), UINT64CONST(99241610332960956)},
+ {UINT64CONST(17216279797195927585), UINT64CONST(124052012916201195)}, {UINT64CONST(8454331864033760789), UINT64CONST(77532508072625747)},
+ {UINT64CONST(5956228811614813082), UINT64CONST(96915635090782184)}, {UINT64CONST(7445286014518516353), UINT64CONST(121144543863477730)},
+ {UINT64CONST(9264989777501460624), UINT64CONST(75715339914673581)}, {UINT64CONST(16192923240304213684), UINT64CONST(94644174893341976)},
+ {UINT64CONST(1794409976670715490), UINT64CONST(118305218616677471)}, {UINT64CONST(8039035263060279037), UINT64CONST(73940761635423419)},
+ {UINT64CONST(5437108060397960892), UINT64CONST(92425952044279274)}, {UINT64CONST(16019757112352226923), UINT64CONST(115532440055349092)},
+ {UINT64CONST(788976158365366019), UINT64CONST(72207775034593183)}, {UINT64CONST(14821278253238871236), UINT64CONST(90259718793241478)},
+ {UINT64CONST(9303225779693813237), UINT64CONST(112824648491551848)}, {UINT64CONST(11629032224617266546), UINT64CONST(141030810614439810)},
+ {UINT64CONST(11879831158813179495), UINT64CONST(88144256634024881)}, {UINT64CONST(1014730893234310657), UINT64CONST(110180320792531102)},
+ {UINT64CONST(10491785653397664129), UINT64CONST(137725400990663877)}, {UINT64CONST(8863209042587234033), UINT64CONST(86078375619164923)},
+ {UINT64CONST(6467325284806654637), UINT64CONST(107597969523956154)}, {UINT64CONST(17307528642863094104), UINT64CONST(134497461904945192)},
+ {UINT64CONST(10817205401789433815), UINT64CONST(84060913690590745)}, {UINT64CONST(18133192770664180173), UINT64CONST(105076142113238431)},
+ {UINT64CONST(18054804944902837312), UINT64CONST(131345177641548039)}, {UINT64CONST(18201782118205355176), UINT64CONST(82090736025967524)},
+ {UINT64CONST(4305483574047142354), UINT64CONST(102613420032459406)}, {UINT64CONST(14605226504413703751), UINT64CONST(128266775040574257)},
+ {UINT64CONST(2210737537617482988), UINT64CONST(80166734400358911)}, {UINT64CONST(16598479977304017447), UINT64CONST(100208418000448638)},
+ {UINT64CONST(11524727934775246001), UINT64CONST(125260522500560798)}, {UINT64CONST(2591268940807140847), UINT64CONST(78287826562850499)},
+ {UINT64CONST(17074144231291089770), UINT64CONST(97859783203563123)}, {UINT64CONST(16730994270686474309), UINT64CONST(122324729004453904)},
+ {UINT64CONST(10456871419179046443), UINT64CONST(76452955627783690)}, {UINT64CONST(3847717237119032246), UINT64CONST(95566194534729613)},
+ {UINT64CONST(9421332564826178211), UINT64CONST(119457743168412016)}, {UINT64CONST(5888332853016361382), UINT64CONST(74661089480257510)},
+ {UINT64CONST(16583788103125227536), UINT64CONST(93326361850321887)}, {UINT64CONST(16118049110479146516), UINT64CONST(116657952312902359)},
+ {UINT64CONST(16991309721690548428), UINT64CONST(72911220195563974)}, {UINT64CONST(12015765115258409727), UINT64CONST(91139025244454968)},
+ {UINT64CONST(15019706394073012159), UINT64CONST(113923781555568710)}, {UINT64CONST(9551260955736489391), UINT64CONST(142404726944460888)},
+ {UINT64CONST(5969538097335305869), UINT64CONST(89002954340288055)}, {UINT64CONST(2850236603241744433), UINT64CONST(111253692925360069)}
+};
+
+#endif /* RYU_D2S_FULL_TABLE_H */
diff --git a/src/common/d2s_intrinsics.h b/src/common/d2s_intrinsics.h
new file mode 100644
index 0000000..ae0f28d
--- /dev/null
+++ b/src/common/d2s_intrinsics.h
@@ -0,0 +1,202 @@
+/*---------------------------------------------------------------------------
+ *
+ * Ryu floating-point output for double precision.
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/d2s_intrinsics.h
+ *
+ * This is a modification of code taken from github.com/ulfjack/ryu under the
+ * terms of the Boost license (not the Apache license). The original copyright
+ * notice follows:
+ *
+ * Copyright 2018 Ulf Adams
+ *
+ * The contents of this file may be used under the terms of the Apache
+ * License, Version 2.0.
+ *
+ * (See accompanying file LICENSE-Apache or copy at
+ * http://www.apache.org/licenses/LICENSE-2.0)
+ *
+ * Alternatively, the contents of this file may be used under the terms of the
+ * Boost Software License, Version 1.0.
+ *
+ * (See accompanying file LICENSE-Boost or copy at
+ * https://www.boost.org/LICENSE_1_0.txt)
+ *
+ * Unless required by applicable law or agreed to in writing, this software is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.
+ *
+ *---------------------------------------------------------------------------
+ */
+#ifndef RYU_D2S_INTRINSICS_H
+#define RYU_D2S_INTRINSICS_H
+
+#if defined(HAS_64_BIT_INTRINSICS)
+
+#include <intrin.h>
+
+static inline uint64
+umul128(const uint64 a, const uint64 b, uint64 *const productHi)
+{
+ return _umul128(a, b, productHi);
+}
+
+static inline uint64
+shiftright128(const uint64 lo, const uint64 hi, const uint32 dist)
+{
+ /*
+ * For the __shiftright128 intrinsic, the shift value is always modulo 64.
+ * In the current implementation of the double-precision version of Ryu,
+ * the shift value is always < 64. (In the case RYU_OPTIMIZE_SIZE == 0,
+ * the shift value is in the range [49, 58]. Otherwise in the range [2,
+ * 59].) Check this here in case a future change requires larger shift
+ * values. In this case this function needs to be adjusted.
+ */
+ Assert(dist < 64);
+ return __shiftright128(lo, hi, (unsigned char) dist);
+}
+
+#else /* defined(HAS_64_BIT_INTRINSICS) */
+
+static inline uint64
+umul128(const uint64 a, const uint64 b, uint64 *const productHi)
+{
+ /*
+ * The casts here help MSVC to avoid calls to the __allmul library
+ * function.
+ */
+ const uint32 aLo = (uint32) a;
+ const uint32 aHi = (uint32) (a >> 32);
+ const uint32 bLo = (uint32) b;
+ const uint32 bHi = (uint32) (b >> 32);
+
+ const uint64 b00 = (uint64) aLo * bLo;
+ const uint64 b01 = (uint64) aLo * bHi;
+ const uint64 b10 = (uint64) aHi * bLo;
+ const uint64 b11 = (uint64) aHi * bHi;
+
+ const uint32 b00Lo = (uint32) b00;
+ const uint32 b00Hi = (uint32) (b00 >> 32);
+
+ const uint64 mid1 = b10 + b00Hi;
+ const uint32 mid1Lo = (uint32) (mid1);
+ const uint32 mid1Hi = (uint32) (mid1 >> 32);
+
+ const uint64 mid2 = b01 + mid1Lo;
+ const uint32 mid2Lo = (uint32) (mid2);
+ const uint32 mid2Hi = (uint32) (mid2 >> 32);
+
+ const uint64 pHi = b11 + mid1Hi + mid2Hi;
+ const uint64 pLo = ((uint64) mid2Lo << 32) + b00Lo;
+
+ *productHi = pHi;
+ return pLo;
+}
+
+static inline uint64
+shiftright128(const uint64 lo, const uint64 hi, const uint32 dist)
+{
+ /* We don't need to handle the case dist >= 64 here (see above). */
+ Assert(dist < 64);
+#if !defined(RYU_32_BIT_PLATFORM)
+ Assert(dist > 0);
+ return (hi << (64 - dist)) | (lo >> dist);
+#else
+ /* Avoid a 64-bit shift by taking advantage of the range of shift values. */
+ Assert(dist >= 32);
+ return (hi << (64 - dist)) | ((uint32) (lo >> 32) >> (dist - 32));
+#endif
+}
+
+#endif /* // defined(HAS_64_BIT_INTRINSICS) */
+
+#ifdef RYU_32_BIT_PLATFORM
+
+/* Returns the high 64 bits of the 128-bit product of a and b. */
+static inline uint64
+umulh(const uint64 a, const uint64 b)
+{
+ /*
+ * Reuse the umul128 implementation. Optimizers will likely eliminate the
+ * instructions used to compute the low part of the product.
+ */
+ uint64 hi;
+
+ umul128(a, b, &hi);
+ return hi;
+}
+
+/*----
+ * On 32-bit platforms, compilers typically generate calls to library
+ * functions for 64-bit divisions, even if the divisor is a constant.
+ *
+ * E.g.:
+ * https://bugs.llvm.org/show_bug.cgi?id=37932
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=17958
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37443
+ *
+ * The functions here perform division-by-constant using multiplications
+ * in the same way as 64-bit compilers would do.
+ *
+ * NB:
+ * The multipliers and shift values are the ones generated by clang x64
+ * for expressions like x/5, x/10, etc.
+ *----
+ */
+
+static inline uint64
+div5(const uint64 x)
+{
+ return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 2;
+}
+
+static inline uint64
+div10(const uint64 x)
+{
+ return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 3;
+}
+
+static inline uint64
+div100(const uint64 x)
+{
+ return umulh(x >> 2, UINT64CONST(0x28F5C28F5C28F5C3)) >> 2;
+}
+
+static inline uint64
+div1e8(const uint64 x)
+{
+ return umulh(x, UINT64CONST(0xABCC77118461CEFD)) >> 26;
+}
+
+#else /* RYU_32_BIT_PLATFORM */
+
+static inline uint64
+div5(const uint64 x)
+{
+ return x / 5;
+}
+
+static inline uint64
+div10(const uint64 x)
+{
+ return x / 10;
+}
+
+static inline uint64
+div100(const uint64 x)
+{
+ return x / 100;
+}
+
+static inline uint64
+div1e8(const uint64 x)
+{
+ return x / 100000000;
+}
+
+#endif /* RYU_32_BIT_PLATFORM */
+
+#endif /* RYU_D2S_INTRINSICS_H */
diff --git a/src/common/digit_table.h b/src/common/digit_table.h
new file mode 100644
index 0000000..483aa17
--- /dev/null
+++ b/src/common/digit_table.h
@@ -0,0 +1,21 @@
+#ifndef RYU_DIGIT_TABLE_H
+#define RYU_DIGIT_TABLE_H
+
+/*
+ * A table of all two-digit numbers. This is used to speed up decimal digit
+ * generation by copying pairs of digits into the final output.
+ */
+static const char DIGIT_TABLE[200] = {
+ '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
+ '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
+ '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
+ '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9',
+ '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
+ '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
+ '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
+ '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9',
+ '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
+ '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'
+};
+
+#endif /* RYU_DIGIT_TABLE_H */
diff --git a/src/common/encnames.c b/src/common/encnames.c
new file mode 100644
index 0000000..0412a82
--- /dev/null
+++ b/src/common/encnames.c
@@ -0,0 +1,598 @@
+/*-------------------------------------------------------------------------
+ *
+ * encnames.c
+ * Encoding names and routines for working with them.
+ *
+ * Portions Copyright (c) 2001-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/encnames.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include <ctype.h>
+#include <unistd.h>
+
+#include "mb/pg_wchar.h"
+
+
+/* ----------
+ * All encoding names, sorted: *** A L P H A B E T I C ***
+ *
+ * All names must be without irrelevant chars, search routines use
+ * isalnum() chars only. It means ISO-8859-1, iso_8859-1 and Iso8859_1
+ * are always converted to 'iso88591'. All must be lower case.
+ *
+ * The table doesn't contain 'cs' aliases (like csISOLatin1). It's needed?
+ *
+ * Karel Zak, Aug 2001
+ * ----------
+ */
+typedef struct pg_encname
+{
+ const char *name;
+ pg_enc encoding;
+} pg_encname;
+
+static const pg_encname pg_encname_tbl[] =
+{
+ {
+ "abc", PG_WIN1258
+ }, /* alias for WIN1258 */
+ {
+ "alt", PG_WIN866
+ }, /* IBM866 */
+ {
+ "big5", PG_BIG5
+ }, /* Big5; Chinese for Taiwan multibyte set */
+ {
+ "euccn", PG_EUC_CN
+ }, /* EUC-CN; Extended Unix Code for simplified
+ * Chinese */
+ {
+ "eucjis2004", PG_EUC_JIS_2004
+ }, /* EUC-JIS-2004; Extended UNIX Code fixed
+ * Width for Japanese, standard JIS X 0213 */
+ {
+ "eucjp", PG_EUC_JP
+ }, /* EUC-JP; Extended UNIX Code fixed Width for
+ * Japanese, standard OSF */
+ {
+ "euckr", PG_EUC_KR
+ }, /* EUC-KR; Extended Unix Code for Korean , KS
+ * X 1001 standard */
+ {
+ "euctw", PG_EUC_TW
+ }, /* EUC-TW; Extended Unix Code for
+ *
+ * traditional Chinese */
+ {
+ "gb18030", PG_GB18030
+ }, /* GB18030;GB18030 */
+ {
+ "gbk", PG_GBK
+ }, /* GBK; Chinese Windows CodePage 936
+ * simplified Chinese */
+ {
+ "iso88591", PG_LATIN1
+ }, /* ISO-8859-1; RFC1345,KXS2 */
+ {
+ "iso885910", PG_LATIN6
+ }, /* ISO-8859-10; RFC1345,KXS2 */
+ {
+ "iso885913", PG_LATIN7
+ }, /* ISO-8859-13; RFC1345,KXS2 */
+ {
+ "iso885914", PG_LATIN8
+ }, /* ISO-8859-14; RFC1345,KXS2 */
+ {
+ "iso885915", PG_LATIN9
+ }, /* ISO-8859-15; RFC1345,KXS2 */
+ {
+ "iso885916", PG_LATIN10
+ }, /* ISO-8859-16; RFC1345,KXS2 */
+ {
+ "iso88592", PG_LATIN2
+ }, /* ISO-8859-2; RFC1345,KXS2 */
+ {
+ "iso88593", PG_LATIN3
+ }, /* ISO-8859-3; RFC1345,KXS2 */
+ {
+ "iso88594", PG_LATIN4
+ }, /* ISO-8859-4; RFC1345,KXS2 */
+ {
+ "iso88595", PG_ISO_8859_5
+ }, /* ISO-8859-5; RFC1345,KXS2 */
+ {
+ "iso88596", PG_ISO_8859_6
+ }, /* ISO-8859-6; RFC1345,KXS2 */
+ {
+ "iso88597", PG_ISO_8859_7
+ }, /* ISO-8859-7; RFC1345,KXS2 */
+ {
+ "iso88598", PG_ISO_8859_8
+ }, /* ISO-8859-8; RFC1345,KXS2 */
+ {
+ "iso88599", PG_LATIN5
+ }, /* ISO-8859-9; RFC1345,KXS2 */
+ {
+ "johab", PG_JOHAB
+ }, /* JOHAB; Extended Unix Code for simplified
+ * Chinese */
+ {
+ "koi8", PG_KOI8R
+ }, /* _dirty_ alias for KOI8-R (backward
+ * compatibility) */
+ {
+ "koi8r", PG_KOI8R
+ }, /* KOI8-R; RFC1489 */
+ {
+ "koi8u", PG_KOI8U
+ }, /* KOI8-U; RFC2319 */
+ {
+ "latin1", PG_LATIN1
+ }, /* alias for ISO-8859-1 */
+ {
+ "latin10", PG_LATIN10
+ }, /* alias for ISO-8859-16 */
+ {
+ "latin2", PG_LATIN2
+ }, /* alias for ISO-8859-2 */
+ {
+ "latin3", PG_LATIN3
+ }, /* alias for ISO-8859-3 */
+ {
+ "latin4", PG_LATIN4
+ }, /* alias for ISO-8859-4 */
+ {
+ "latin5", PG_LATIN5
+ }, /* alias for ISO-8859-9 */
+ {
+ "latin6", PG_LATIN6
+ }, /* alias for ISO-8859-10 */
+ {
+ "latin7", PG_LATIN7
+ }, /* alias for ISO-8859-13 */
+ {
+ "latin8", PG_LATIN8
+ }, /* alias for ISO-8859-14 */
+ {
+ "latin9", PG_LATIN9
+ }, /* alias for ISO-8859-15 */
+ {
+ "mskanji", PG_SJIS
+ }, /* alias for Shift_JIS */
+ {
+ "muleinternal", PG_MULE_INTERNAL
+ },
+ {
+ "shiftjis", PG_SJIS
+ }, /* Shift_JIS; JIS X 0202-1991 */
+
+ {
+ "shiftjis2004", PG_SHIFT_JIS_2004
+ }, /* SHIFT-JIS-2004; Shift JIS for Japanese,
+ * standard JIS X 0213 */
+ {
+ "sjis", PG_SJIS
+ }, /* alias for Shift_JIS */
+ {
+ "sqlascii", PG_SQL_ASCII
+ },
+ {
+ "tcvn", PG_WIN1258
+ }, /* alias for WIN1258 */
+ {
+ "tcvn5712", PG_WIN1258
+ }, /* alias for WIN1258 */
+ {
+ "uhc", PG_UHC
+ }, /* UHC; Korean Windows CodePage 949 */
+ {
+ "unicode", PG_UTF8
+ }, /* alias for UTF8 */
+ {
+ "utf8", PG_UTF8
+ }, /* alias for UTF8 */
+ {
+ "vscii", PG_WIN1258
+ }, /* alias for WIN1258 */
+ {
+ "win", PG_WIN1251
+ }, /* _dirty_ alias for windows-1251 (backward
+ * compatibility) */
+ {
+ "win1250", PG_WIN1250
+ }, /* alias for Windows-1250 */
+ {
+ "win1251", PG_WIN1251
+ }, /* alias for Windows-1251 */
+ {
+ "win1252", PG_WIN1252
+ }, /* alias for Windows-1252 */
+ {
+ "win1253", PG_WIN1253
+ }, /* alias for Windows-1253 */
+ {
+ "win1254", PG_WIN1254
+ }, /* alias for Windows-1254 */
+ {
+ "win1255", PG_WIN1255
+ }, /* alias for Windows-1255 */
+ {
+ "win1256", PG_WIN1256
+ }, /* alias for Windows-1256 */
+ {
+ "win1257", PG_WIN1257
+ }, /* alias for Windows-1257 */
+ {
+ "win1258", PG_WIN1258
+ }, /* alias for Windows-1258 */
+ {
+ "win866", PG_WIN866
+ }, /* IBM866 */
+ {
+ "win874", PG_WIN874
+ }, /* alias for Windows-874 */
+ {
+ "win932", PG_SJIS
+ }, /* alias for Shift_JIS */
+ {
+ "win936", PG_GBK
+ }, /* alias for GBK */
+ {
+ "win949", PG_UHC
+ }, /* alias for UHC */
+ {
+ "win950", PG_BIG5
+ }, /* alias for BIG5 */
+ {
+ "windows1250", PG_WIN1250
+ }, /* Windows-1251; Microsoft */
+ {
+ "windows1251", PG_WIN1251
+ }, /* Windows-1251; Microsoft */
+ {
+ "windows1252", PG_WIN1252
+ }, /* Windows-1252; Microsoft */
+ {
+ "windows1253", PG_WIN1253
+ }, /* Windows-1253; Microsoft */
+ {
+ "windows1254", PG_WIN1254
+ }, /* Windows-1254; Microsoft */
+ {
+ "windows1255", PG_WIN1255
+ }, /* Windows-1255; Microsoft */
+ {
+ "windows1256", PG_WIN1256
+ }, /* Windows-1256; Microsoft */
+ {
+ "windows1257", PG_WIN1257
+ }, /* Windows-1257; Microsoft */
+ {
+ "windows1258", PG_WIN1258
+ }, /* Windows-1258; Microsoft */
+ {
+ "windows866", PG_WIN866
+ }, /* IBM866 */
+ {
+ "windows874", PG_WIN874
+ }, /* Windows-874; Microsoft */
+ {
+ "windows932", PG_SJIS
+ }, /* alias for Shift_JIS */
+ {
+ "windows936", PG_GBK
+ }, /* alias for GBK */
+ {
+ "windows949", PG_UHC
+ }, /* alias for UHC */
+ {
+ "windows950", PG_BIG5
+ } /* alias for BIG5 */
+};
+
+/* ----------
+ * These are "official" encoding names.
+ * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
+ * ----------
+ */
+#ifndef WIN32
+#define DEF_ENC2NAME(name, codepage) { #name, PG_##name }
+#else
+#define DEF_ENC2NAME(name, codepage) { #name, PG_##name, codepage }
+#endif
+
+const pg_enc2name pg_enc2name_tbl[] =
+{
+ DEF_ENC2NAME(SQL_ASCII, 0),
+ DEF_ENC2NAME(EUC_JP, 20932),
+ DEF_ENC2NAME(EUC_CN, 20936),
+ DEF_ENC2NAME(EUC_KR, 51949),
+ DEF_ENC2NAME(EUC_TW, 0),
+ DEF_ENC2NAME(EUC_JIS_2004, 20932),
+ DEF_ENC2NAME(UTF8, 65001),
+ DEF_ENC2NAME(MULE_INTERNAL, 0),
+ DEF_ENC2NAME(LATIN1, 28591),
+ DEF_ENC2NAME(LATIN2, 28592),
+ DEF_ENC2NAME(LATIN3, 28593),
+ DEF_ENC2NAME(LATIN4, 28594),
+ DEF_ENC2NAME(LATIN5, 28599),
+ DEF_ENC2NAME(LATIN6, 0),
+ DEF_ENC2NAME(LATIN7, 0),
+ DEF_ENC2NAME(LATIN8, 0),
+ DEF_ENC2NAME(LATIN9, 28605),
+ DEF_ENC2NAME(LATIN10, 0),
+ DEF_ENC2NAME(WIN1256, 1256),
+ DEF_ENC2NAME(WIN1258, 1258),
+ DEF_ENC2NAME(WIN866, 866),
+ DEF_ENC2NAME(WIN874, 874),
+ DEF_ENC2NAME(KOI8R, 20866),
+ DEF_ENC2NAME(WIN1251, 1251),
+ DEF_ENC2NAME(WIN1252, 1252),
+ DEF_ENC2NAME(ISO_8859_5, 28595),
+ DEF_ENC2NAME(ISO_8859_6, 28596),
+ DEF_ENC2NAME(ISO_8859_7, 28597),
+ DEF_ENC2NAME(ISO_8859_8, 28598),
+ DEF_ENC2NAME(WIN1250, 1250),
+ DEF_ENC2NAME(WIN1253, 1253),
+ DEF_ENC2NAME(WIN1254, 1254),
+ DEF_ENC2NAME(WIN1255, 1255),
+ DEF_ENC2NAME(WIN1257, 1257),
+ DEF_ENC2NAME(KOI8U, 21866),
+ DEF_ENC2NAME(SJIS, 932),
+ DEF_ENC2NAME(BIG5, 950),
+ DEF_ENC2NAME(GBK, 936),
+ DEF_ENC2NAME(UHC, 949),
+ DEF_ENC2NAME(GB18030, 54936),
+ DEF_ENC2NAME(JOHAB, 0),
+ DEF_ENC2NAME(SHIFT_JIS_2004, 932)
+};
+
+/* ----------
+ * These are encoding names for gettext.
+ *
+ * This covers all encodings except MULE_INTERNAL, which is alien to gettext.
+ * ----------
+ */
+const pg_enc2gettext pg_enc2gettext_tbl[] =
+{
+ {PG_SQL_ASCII, "US-ASCII"},
+ {PG_UTF8, "UTF-8"},
+ {PG_LATIN1, "LATIN1"},
+ {PG_LATIN2, "LATIN2"},
+ {PG_LATIN3, "LATIN3"},
+ {PG_LATIN4, "LATIN4"},
+ {PG_ISO_8859_5, "ISO-8859-5"},
+ {PG_ISO_8859_6, "ISO_8859-6"},
+ {PG_ISO_8859_7, "ISO-8859-7"},
+ {PG_ISO_8859_8, "ISO-8859-8"},
+ {PG_LATIN5, "LATIN5"},
+ {PG_LATIN6, "LATIN6"},
+ {PG_LATIN7, "LATIN7"},
+ {PG_LATIN8, "LATIN8"},
+ {PG_LATIN9, "LATIN-9"},
+ {PG_LATIN10, "LATIN10"},
+ {PG_KOI8R, "KOI8-R"},
+ {PG_KOI8U, "KOI8-U"},
+ {PG_WIN1250, "CP1250"},
+ {PG_WIN1251, "CP1251"},
+ {PG_WIN1252, "CP1252"},
+ {PG_WIN1253, "CP1253"},
+ {PG_WIN1254, "CP1254"},
+ {PG_WIN1255, "CP1255"},
+ {PG_WIN1256, "CP1256"},
+ {PG_WIN1257, "CP1257"},
+ {PG_WIN1258, "CP1258"},
+ {PG_WIN866, "CP866"},
+ {PG_WIN874, "CP874"},
+ {PG_EUC_CN, "EUC-CN"},
+ {PG_EUC_JP, "EUC-JP"},
+ {PG_EUC_KR, "EUC-KR"},
+ {PG_EUC_TW, "EUC-TW"},
+ {PG_EUC_JIS_2004, "EUC-JP"},
+ {PG_SJIS, "SHIFT-JIS"},
+ {PG_BIG5, "BIG5"},
+ {PG_GBK, "GBK"},
+ {PG_UHC, "UHC"},
+ {PG_GB18030, "GB18030"},
+ {PG_JOHAB, "JOHAB"},
+ {PG_SHIFT_JIS_2004, "SHIFT_JISX0213"},
+ {0, NULL}
+};
+
+
+/*
+ * Table of encoding names for ICU (currently covers backend encodings only)
+ *
+ * Reference: <https://ssl.icu-project.org/icu-bin/convexp>
+ *
+ * NULL entries are not supported by ICU, or their mapping is unclear.
+ */
+static const char *const pg_enc2icu_tbl[] =
+{
+ NULL, /* PG_SQL_ASCII */
+ "EUC-JP", /* PG_EUC_JP */
+ "EUC-CN", /* PG_EUC_CN */
+ "EUC-KR", /* PG_EUC_KR */
+ "EUC-TW", /* PG_EUC_TW */
+ NULL, /* PG_EUC_JIS_2004 */
+ "UTF-8", /* PG_UTF8 */
+ NULL, /* PG_MULE_INTERNAL */
+ "ISO-8859-1", /* PG_LATIN1 */
+ "ISO-8859-2", /* PG_LATIN2 */
+ "ISO-8859-3", /* PG_LATIN3 */
+ "ISO-8859-4", /* PG_LATIN4 */
+ "ISO-8859-9", /* PG_LATIN5 */
+ "ISO-8859-10", /* PG_LATIN6 */
+ "ISO-8859-13", /* PG_LATIN7 */
+ "ISO-8859-14", /* PG_LATIN8 */
+ "ISO-8859-15", /* PG_LATIN9 */
+ NULL, /* PG_LATIN10 */
+ "CP1256", /* PG_WIN1256 */
+ "CP1258", /* PG_WIN1258 */
+ "CP866", /* PG_WIN866 */
+ NULL, /* PG_WIN874 */
+ "KOI8-R", /* PG_KOI8R */
+ "CP1251", /* PG_WIN1251 */
+ "CP1252", /* PG_WIN1252 */
+ "ISO-8859-5", /* PG_ISO_8859_5 */
+ "ISO-8859-6", /* PG_ISO_8859_6 */
+ "ISO-8859-7", /* PG_ISO_8859_7 */
+ "ISO-8859-8", /* PG_ISO_8859_8 */
+ "CP1250", /* PG_WIN1250 */
+ "CP1253", /* PG_WIN1253 */
+ "CP1254", /* PG_WIN1254 */
+ "CP1255", /* PG_WIN1255 */
+ "CP1257", /* PG_WIN1257 */
+ "KOI8-U", /* PG_KOI8U */
+};
+
+StaticAssertDecl(lengthof(pg_enc2icu_tbl) == PG_ENCODING_BE_LAST + 1,
+ "pg_enc2icu_tbl incomplete");
+
+
+/*
+ * Is this encoding supported by ICU?
+ */
+bool
+is_encoding_supported_by_icu(int encoding)
+{
+ if (!PG_VALID_BE_ENCODING(encoding))
+ return false;
+ return (pg_enc2icu_tbl[encoding] != NULL);
+}
+
+/*
+ * Returns ICU's name for encoding, or NULL if not supported
+ */
+const char *
+get_encoding_name_for_icu(int encoding)
+{
+ if (!PG_VALID_BE_ENCODING(encoding))
+ return NULL;
+ return pg_enc2icu_tbl[encoding];
+}
+
+
+/* ----------
+ * Encoding checks, for error returns -1 else encoding id
+ * ----------
+ */
+int
+pg_valid_client_encoding(const char *name)
+{
+ int enc;
+
+ if ((enc = pg_char_to_encoding(name)) < 0)
+ return -1;
+
+ if (!PG_VALID_FE_ENCODING(enc))
+ return -1;
+
+ return enc;
+}
+
+int
+pg_valid_server_encoding(const char *name)
+{
+ int enc;
+
+ if ((enc = pg_char_to_encoding(name)) < 0)
+ return -1;
+
+ if (!PG_VALID_BE_ENCODING(enc))
+ return -1;
+
+ return enc;
+}
+
+int
+pg_valid_server_encoding_id(int encoding)
+{
+ return PG_VALID_BE_ENCODING(encoding);
+}
+
+/*
+ * Remove irrelevant chars from encoding name, store at *newkey
+ *
+ * (Caller's responsibility to provide a large enough buffer)
+ */
+static char *
+clean_encoding_name(const char *key, char *newkey)
+{
+ const char *p;
+ char *np;
+
+ for (p = key, np = newkey; *p != '\0'; p++)
+ {
+ if (isalnum((unsigned char) *p))
+ {
+ if (*p >= 'A' && *p <= 'Z')
+ *np++ = *p + 'a' - 'A';
+ else
+ *np++ = *p;
+ }
+ }
+ *np = '\0';
+ return newkey;
+}
+
+/*
+ * Search encoding by encoding name
+ *
+ * Returns encoding ID, or -1 if not recognized
+ */
+int
+pg_char_to_encoding(const char *name)
+{
+ unsigned int nel = lengthof(pg_encname_tbl);
+ const pg_encname *base = pg_encname_tbl,
+ *last = base + nel - 1,
+ *position;
+ int result;
+ char buff[NAMEDATALEN],
+ *key;
+
+ if (name == NULL || *name == '\0')
+ return -1;
+
+ if (strlen(name) >= NAMEDATALEN)
+ return -1; /* it's certainly not in the table */
+
+ key = clean_encoding_name(name, buff);
+
+ while (last >= base)
+ {
+ position = base + ((last - base) >> 1);
+ result = key[0] - position->name[0];
+
+ if (result == 0)
+ {
+ result = strcmp(key, position->name);
+ if (result == 0)
+ return position->encoding;
+ }
+ if (result < 0)
+ last = position - 1;
+ else
+ base = position + 1;
+ }
+ return -1;
+}
+
+const char *
+pg_encoding_to_char(int encoding)
+{
+ if (PG_VALID_ENCODING(encoding))
+ {
+ const pg_enc2name *p = &pg_enc2name_tbl[encoding];
+
+ Assert(encoding == p->encoding);
+ return p->name;
+ }
+ return "";
+}
diff --git a/src/common/exec.c b/src/common/exec.c
new file mode 100644
index 0000000..f209b93
--- /dev/null
+++ b/src/common/exec.c
@@ -0,0 +1,719 @@
+/*-------------------------------------------------------------------------
+ *
+ * exec.c
+ * Functions for finding and validating executable files
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/exec.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * On macOS, "man realpath" avers:
+ * Defining _DARWIN_C_SOURCE or _DARWIN_BETTER_REALPATH before including
+ * stdlib.h will cause the provided implementation of realpath() to use
+ * F_GETPATH from fcntl(2) to discover the path.
+ * This should be harmless everywhere else.
+ */
+#define _DARWIN_BETTER_REALPATH
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <signal.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#ifdef EXEC_BACKEND
+#if defined(HAVE_SYS_PERSONALITY_H)
+#include <sys/personality.h>
+#elif defined(HAVE_SYS_PROCCTL_H)
+#include <sys/procctl.h>
+#endif
+#endif
+
+/* Inhibit mingw CRT's auto-globbing of command line arguments */
+#if defined(WIN32) && !defined(_MSC_VER)
+extern int _CRT_glob = 0; /* 0 turns off globbing; 1 turns it on */
+#endif
+
+/*
+ * Hacky solution to allow expressing both frontend and backend error reports
+ * in one macro call. First argument of log_error is an errcode() call of
+ * some sort (ignored if FRONTEND); the rest are errmsg_internal() arguments,
+ * i.e. message string and any parameters for it.
+ *
+ * Caller must provide the gettext wrapper around the message string, if
+ * appropriate, so that it gets translated in the FRONTEND case; this
+ * motivates using errmsg_internal() not errmsg(). We handle appending a
+ * newline, if needed, inside the macro, so that there's only one translatable
+ * string per call not two.
+ */
+#ifndef FRONTEND
+#define log_error(errcodefn, ...) \
+ ereport(LOG, (errcodefn, errmsg_internal(__VA_ARGS__)))
+#else
+#define log_error(errcodefn, ...) \
+ (fprintf(stderr, __VA_ARGS__), fputc('\n', stderr))
+#endif
+
+static int normalize_exec_path(char *path);
+static char *pg_realpath(const char *fname);
+
+#ifdef WIN32
+static BOOL GetTokenUser(HANDLE hToken, PTOKEN_USER *ppTokenUser);
+#endif
+
+/*
+ * validate_exec -- validate "path" as an executable file
+ *
+ * returns 0 if the file is found and no error is encountered.
+ * -1 if the regular file "path" does not exist or cannot be executed.
+ * -2 if the file is otherwise valid but cannot be read.
+ * in the failure cases, errno is set appropriately
+ */
+int
+validate_exec(const char *path)
+{
+ struct stat buf;
+ int is_r;
+ int is_x;
+
+#ifdef WIN32
+ char path_exe[MAXPGPATH + sizeof(".exe") - 1];
+
+ /* Win32 requires a .exe suffix for stat() */
+ if (strlen(path) < strlen(".exe") ||
+ pg_strcasecmp(path + strlen(path) - strlen(".exe"), ".exe") != 0)
+ {
+ strlcpy(path_exe, path, sizeof(path_exe) - 4);
+ strcat(path_exe, ".exe");
+ path = path_exe;
+ }
+#endif
+
+ /*
+ * Ensure that the file exists and is a regular file.
+ *
+ * XXX if you have a broken system where stat() looks at the symlink
+ * instead of the underlying file, you lose.
+ */
+ if (stat(path, &buf) < 0)
+ return -1;
+
+ if (!S_ISREG(buf.st_mode))
+ {
+ /*
+ * POSIX offers no errno code that's simply "not a regular file". If
+ * it's a directory we can use EISDIR. Otherwise, it's most likely a
+ * device special file, and EPERM (Operation not permitted) isn't too
+ * horribly off base.
+ */
+ errno = S_ISDIR(buf.st_mode) ? EISDIR : EPERM;
+ return -1;
+ }
+
+ /*
+ * Ensure that the file is both executable and readable (required for
+ * dynamic loading).
+ */
+#ifndef WIN32
+ is_r = (access(path, R_OK) == 0);
+ is_x = (access(path, X_OK) == 0);
+ /* access() will set errno if it returns -1 */
+#else
+ is_r = buf.st_mode & S_IRUSR;
+ is_x = buf.st_mode & S_IXUSR;
+ errno = EACCES; /* appropriate thing if we return nonzero */
+#endif
+ return is_x ? (is_r ? 0 : -2) : -1;
+}
+
+
+/*
+ * find_my_exec -- find an absolute path to this program's executable
+ *
+ * argv0 is the name passed on the command line
+ * retpath is the output area (must be of size MAXPGPATH)
+ * Returns 0 if OK, -1 if error.
+ *
+ * The reason we have to work so hard to find an absolute path is that
+ * on some platforms we can't do dynamic loading unless we know the
+ * executable's location. Also, we need an absolute path not a relative
+ * path because we may later change working directory. Finally, we want
+ * a true path not a symlink location, so that we can locate other files
+ * that are part of our installation relative to the executable.
+ */
+int
+find_my_exec(const char *argv0, char *retpath)
+{
+ char *path;
+
+ /*
+ * If argv0 contains a separator, then PATH wasn't used.
+ */
+ strlcpy(retpath, argv0, MAXPGPATH);
+ if (first_dir_separator(retpath) != NULL)
+ {
+ if (validate_exec(retpath) == 0)
+ return normalize_exec_path(retpath);
+
+ log_error(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ _("invalid binary \"%s\": %m"), retpath);
+ return -1;
+ }
+
+#ifdef WIN32
+ /* Win32 checks the current directory first for names without slashes */
+ if (validate_exec(retpath) == 0)
+ return normalize_exec_path(retpath);
+#endif
+
+ /*
+ * Since no explicit path was supplied, the user must have been relying on
+ * PATH. We'll search the same PATH.
+ */
+ if ((path = getenv("PATH")) && *path)
+ {
+ char *startp = NULL,
+ *endp = NULL;
+
+ do
+ {
+ if (!startp)
+ startp = path;
+ else
+ startp = endp + 1;
+
+ endp = first_path_var_separator(startp);
+ if (!endp)
+ endp = startp + strlen(startp); /* point to end */
+
+ strlcpy(retpath, startp, Min(endp - startp + 1, MAXPGPATH));
+
+ join_path_components(retpath, retpath, argv0);
+ canonicalize_path(retpath);
+
+ switch (validate_exec(retpath))
+ {
+ case 0: /* found ok */
+ return normalize_exec_path(retpath);
+ case -1: /* wasn't even a candidate, keep looking */
+ break;
+ case -2: /* found but disqualified */
+ log_error(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ _("could not read binary \"%s\": %m"),
+ retpath);
+ break;
+ }
+ } while (*endp);
+ }
+
+ log_error(errcode(ERRCODE_UNDEFINED_FILE),
+ _("could not find a \"%s\" to execute"), argv0);
+ return -1;
+}
+
+
+/*
+ * normalize_exec_path - resolve symlinks and convert to absolute path
+ *
+ * Given a path that refers to an executable, chase through any symlinks
+ * to find the real file location; then convert that to an absolute path.
+ *
+ * On success, replaces the contents of "path" with the absolute path.
+ * ("path" is assumed to be of size MAXPGPATH.)
+ * Returns 0 if OK, -1 if error.
+ */
+static int
+normalize_exec_path(char *path)
+{
+ /*
+ * We used to do a lot of work ourselves here, but now we just let
+ * realpath(3) do all the heavy lifting.
+ */
+ char *abspath = pg_realpath(path);
+
+ if (abspath == NULL)
+ {
+ log_error(errcode_for_file_access(),
+ _("could not resolve path \"%s\" to absolute form: %m"),
+ path);
+ return -1;
+ }
+ strlcpy(path, abspath, MAXPGPATH);
+ free(abspath);
+
+#ifdef WIN32
+ /* On Windows, be sure to convert '\' to '/' */
+ canonicalize_path(path);
+#endif
+
+ return 0;
+}
+
+
+/*
+ * pg_realpath() - realpath(3) with POSIX.1-2008 semantics
+ *
+ * This is equivalent to realpath(fname, NULL), in that it returns a
+ * malloc'd buffer containing the absolute path equivalent to fname.
+ * On error, returns NULL with errno set.
+ *
+ * On Windows, what you get is spelled per platform conventions,
+ * so you probably want to apply canonicalize_path() to the result.
+ *
+ * For now, this is needed only here so mark it static. If you choose to
+ * move it into its own file, move the _DARWIN_BETTER_REALPATH #define too!
+ */
+static char *
+pg_realpath(const char *fname)
+{
+ char *path;
+
+#ifndef WIN32
+ path = realpath(fname, NULL);
+ if (path == NULL && errno == EINVAL)
+ {
+ /*
+ * Cope with old-POSIX systems that require a user-provided buffer.
+ * Assume MAXPGPATH is enough room on all such systems.
+ */
+ char *buf = malloc(MAXPGPATH);
+
+ if (buf == NULL)
+ return NULL; /* assume errno is set */
+ path = realpath(fname, buf);
+ if (path == NULL) /* don't leak memory */
+ {
+ int save_errno = errno;
+
+ free(buf);
+ errno = save_errno;
+ }
+ }
+#else /* WIN32 */
+
+ /*
+ * Microsoft is resolutely non-POSIX, but _fullpath() does the same thing.
+ * The documentation claims it reports errors by setting errno, which is a
+ * bit surprising for Microsoft, but we'll believe that until it's proven
+ * wrong. Clear errno first, though, so we can at least tell if a failure
+ * occurs and doesn't set it.
+ */
+ errno = 0;
+ path = _fullpath(NULL, fname, 0);
+#endif
+
+ return path;
+}
+
+
+/*
+ * Find another program in our binary's directory,
+ * then make sure it is the proper version.
+ */
+int
+find_other_exec(const char *argv0, const char *target,
+ const char *versionstr, char *retpath)
+{
+ char cmd[MAXPGPATH];
+ char line[MAXPGPATH];
+
+ if (find_my_exec(argv0, retpath) < 0)
+ return -1;
+
+ /* Trim off program name and keep just directory */
+ *last_dir_separator(retpath) = '\0';
+ canonicalize_path(retpath);
+
+ /* Now append the other program's name */
+ snprintf(retpath + strlen(retpath), MAXPGPATH - strlen(retpath),
+ "/%s%s", target, EXE);
+
+ if (validate_exec(retpath) != 0)
+ return -1;
+
+ snprintf(cmd, sizeof(cmd), "\"%s\" -V", retpath);
+
+ if (!pipe_read_line(cmd, line, sizeof(line)))
+ return -1;
+
+ if (strcmp(line, versionstr) != 0)
+ return -2;
+
+ return 0;
+}
+
+
+/*
+ * Execute a command in a pipe and read the first line from it.
+ */
+char *
+pipe_read_line(char *cmd, char *line, int maxsize)
+{
+ FILE *pgver;
+
+ fflush(NULL);
+
+ errno = 0;
+ if ((pgver = popen(cmd, "r")) == NULL)
+ {
+ perror("popen failure");
+ return NULL;
+ }
+
+ errno = 0;
+ if (fgets(line, maxsize, pgver) == NULL)
+ {
+ if (feof(pgver))
+ fprintf(stderr, "no data was returned by command \"%s\"\n", cmd);
+ else
+ perror("fgets failure");
+ pclose(pgver); /* no error checking */
+ return NULL;
+ }
+
+ if (pclose_check(pgver))
+ return NULL;
+
+ return line;
+}
+
+
+/*
+ * pclose() plus useful error reporting
+ */
+int
+pclose_check(FILE *stream)
+{
+ int exitstatus;
+ char *reason;
+
+ exitstatus = pclose(stream);
+
+ if (exitstatus == 0)
+ return 0; /* all is well */
+
+ if (exitstatus == -1)
+ {
+ /* pclose() itself failed, and hopefully set errno */
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ _("%s() failed: %m"), "pclose");
+ }
+ else
+ {
+ reason = wait_result_to_str(exitstatus);
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "%s", reason);
+ pfree(reason);
+ }
+ return exitstatus;
+}
+
+/*
+ * set_pglocale_pgservice
+ *
+ * Set application-specific locale and service directory
+ *
+ * This function takes the value of argv[0] rather than a full path.
+ *
+ * (You may be wondering why this is in exec.c. It requires this module's
+ * services and doesn't introduce any new dependencies, so this seems as
+ * good as anyplace.)
+ */
+void
+set_pglocale_pgservice(const char *argv0, const char *app)
+{
+ char path[MAXPGPATH];
+ char my_exec_path[MAXPGPATH];
+
+ /* don't set LC_ALL in the backend */
+ if (strcmp(app, PG_TEXTDOMAIN("postgres")) != 0)
+ {
+ setlocale(LC_ALL, "");
+
+ /*
+ * One could make a case for reproducing here PostmasterMain()'s test
+ * for whether the process is multithreaded. Unlike the postmaster,
+ * no frontend program calls sigprocmask() or otherwise provides for
+ * mutual exclusion between signal handlers. While frontends using
+ * fork(), if multithreaded, are formally exposed to undefined
+ * behavior, we have not witnessed a concrete bug. Therefore,
+ * complaining about multithreading here may be mere pedantry.
+ */
+ }
+
+ if (find_my_exec(argv0, my_exec_path) < 0)
+ return;
+
+#ifdef ENABLE_NLS
+ get_locale_path(my_exec_path, path);
+ bindtextdomain(app, path);
+ textdomain(app);
+ /* set for libpq to use, but don't override existing setting */
+ setenv("PGLOCALEDIR", path, 0);
+#endif
+
+ if (getenv("PGSYSCONFDIR") == NULL)
+ {
+ get_etc_path(my_exec_path, path);
+ /* set for libpq to use */
+ setenv("PGSYSCONFDIR", path, 0);
+ }
+}
+
+#ifdef EXEC_BACKEND
+/*
+ * For the benefit of PostgreSQL developers testing EXEC_BACKEND on Unix
+ * systems (code paths normally exercised only on Windows), provide a way to
+ * disable address space layout randomization, if we know how on this platform.
+ * Otherwise, backends may fail to attach to shared memory at the fixed address
+ * chosen by the postmaster. (See also the macOS-specific hack in
+ * sysv_shmem.c.)
+ */
+int
+pg_disable_aslr(void)
+{
+#if defined(HAVE_SYS_PERSONALITY_H)
+ return personality(ADDR_NO_RANDOMIZE);
+#elif defined(HAVE_SYS_PROCCTL_H) && defined(PROC_ASLR_FORCE_DISABLE)
+ int data = PROC_ASLR_FORCE_DISABLE;
+
+ return procctl(P_PID, 0, PROC_ASLR_CTL, &data);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+#endif
+
+#ifdef WIN32
+
+/*
+ * AddUserToTokenDacl(HANDLE hToken)
+ *
+ * This function adds the current user account to the restricted
+ * token used when we create a restricted process.
+ *
+ * This is required because of some security changes in Windows
+ * that appeared in patches to XP/2K3 and in Vista/2008.
+ *
+ * On these machines, the Administrator account is not included in
+ * the default DACL - you just get Administrators + System. For
+ * regular users you get User + System. Because we strip Administrators
+ * when we create the restricted token, we are left with only System
+ * in the DACL which leads to access denied errors for later CreatePipe()
+ * and CreateProcess() calls when running as Administrator.
+ *
+ * This function fixes this problem by modifying the DACL of the
+ * token the process will use, and explicitly re-adding the current
+ * user account. This is still secure because the Administrator account
+ * inherits its privileges from the Administrators group - it doesn't
+ * have any of its own.
+ */
+BOOL
+AddUserToTokenDacl(HANDLE hToken)
+{
+ int i;
+ ACL_SIZE_INFORMATION asi;
+ ACCESS_ALLOWED_ACE *pace;
+ DWORD dwNewAclSize;
+ DWORD dwSize = 0;
+ DWORD dwTokenInfoLength = 0;
+ PACL pacl = NULL;
+ PTOKEN_USER pTokenUser = NULL;
+ TOKEN_DEFAULT_DACL tddNew;
+ TOKEN_DEFAULT_DACL *ptdd = NULL;
+ TOKEN_INFORMATION_CLASS tic = TokenDefaultDacl;
+ BOOL ret = FALSE;
+
+ /* Figure out the buffer size for the DACL info */
+ if (!GetTokenInformation(hToken, tic, (LPVOID) NULL, dwTokenInfoLength, &dwSize))
+ {
+ if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
+ {
+ ptdd = (TOKEN_DEFAULT_DACL *) LocalAlloc(LPTR, dwSize);
+ if (ptdd == NULL)
+ {
+ log_error(errcode(ERRCODE_OUT_OF_MEMORY),
+ _("out of memory"));
+ goto cleanup;
+ }
+
+ if (!GetTokenInformation(hToken, tic, (LPVOID) ptdd, dwSize, &dwSize))
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not get token information: error code %lu",
+ GetLastError());
+ goto cleanup;
+ }
+ }
+ else
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not get token information buffer size: error code %lu",
+ GetLastError());
+ goto cleanup;
+ }
+ }
+
+ /* Get the ACL info */
+ if (!GetAclInformation(ptdd->DefaultDacl, (LPVOID) &asi,
+ (DWORD) sizeof(ACL_SIZE_INFORMATION),
+ AclSizeInformation))
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not get ACL information: error code %lu",
+ GetLastError());
+ goto cleanup;
+ }
+
+ /* Get the current user SID */
+ if (!GetTokenUser(hToken, &pTokenUser))
+ goto cleanup; /* callee printed a message */
+
+ /* Figure out the size of the new ACL */
+ dwNewAclSize = asi.AclBytesInUse + sizeof(ACCESS_ALLOWED_ACE) +
+ GetLengthSid(pTokenUser->User.Sid) - sizeof(DWORD);
+
+ /* Allocate the ACL buffer & initialize it */
+ pacl = (PACL) LocalAlloc(LPTR, dwNewAclSize);
+ if (pacl == NULL)
+ {
+ log_error(errcode(ERRCODE_OUT_OF_MEMORY),
+ _("out of memory"));
+ goto cleanup;
+ }
+
+ if (!InitializeAcl(pacl, dwNewAclSize, ACL_REVISION))
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not initialize ACL: error code %lu", GetLastError());
+ goto cleanup;
+ }
+
+ /* Loop through the existing ACEs, and build the new ACL */
+ for (i = 0; i < (int) asi.AceCount; i++)
+ {
+ if (!GetAce(ptdd->DefaultDacl, i, (LPVOID *) &pace))
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not get ACE: error code %lu", GetLastError());
+ goto cleanup;
+ }
+
+ if (!AddAce(pacl, ACL_REVISION, MAXDWORD, pace, ((PACE_HEADER) pace)->AceSize))
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not add ACE: error code %lu", GetLastError());
+ goto cleanup;
+ }
+ }
+
+ /* Add the new ACE for the current user */
+ if (!AddAccessAllowedAceEx(pacl, ACL_REVISION, OBJECT_INHERIT_ACE, GENERIC_ALL, pTokenUser->User.Sid))
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not add access allowed ACE: error code %lu",
+ GetLastError());
+ goto cleanup;
+ }
+
+ /* Set the new DACL in the token */
+ tddNew.DefaultDacl = pacl;
+
+ if (!SetTokenInformation(hToken, tic, (LPVOID) &tddNew, dwNewAclSize))
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not set token information: error code %lu",
+ GetLastError());
+ goto cleanup;
+ }
+
+ ret = TRUE;
+
+cleanup:
+ if (pTokenUser)
+ LocalFree((HLOCAL) pTokenUser);
+
+ if (pacl)
+ LocalFree((HLOCAL) pacl);
+
+ if (ptdd)
+ LocalFree((HLOCAL) ptdd);
+
+ return ret;
+}
+
+/*
+ * GetTokenUser(HANDLE hToken, PTOKEN_USER *ppTokenUser)
+ *
+ * Get the users token information from a process token.
+ *
+ * The caller of this function is responsible for calling LocalFree() on the
+ * returned TOKEN_USER memory.
+ */
+static BOOL
+GetTokenUser(HANDLE hToken, PTOKEN_USER *ppTokenUser)
+{
+ DWORD dwLength;
+
+ *ppTokenUser = NULL;
+
+ if (!GetTokenInformation(hToken,
+ TokenUser,
+ NULL,
+ 0,
+ &dwLength))
+ {
+ if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
+ {
+ *ppTokenUser = (PTOKEN_USER) LocalAlloc(LPTR, dwLength);
+
+ if (*ppTokenUser == NULL)
+ {
+ log_error(errcode(ERRCODE_OUT_OF_MEMORY),
+ _("out of memory"));
+ return FALSE;
+ }
+ }
+ else
+ {
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not get token information buffer size: error code %lu",
+ GetLastError());
+ return FALSE;
+ }
+ }
+
+ if (!GetTokenInformation(hToken,
+ TokenUser,
+ *ppTokenUser,
+ dwLength,
+ &dwLength))
+ {
+ LocalFree(*ppTokenUser);
+ *ppTokenUser = NULL;
+
+ log_error(errcode(ERRCODE_SYSTEM_ERROR),
+ "could not get token information: error code %lu",
+ GetLastError());
+ return FALSE;
+ }
+
+ /* Memory in *ppTokenUser is LocalFree():d by the caller */
+ return TRUE;
+}
+
+#endif
diff --git a/src/common/f2s.c b/src/common/f2s.c
new file mode 100644
index 0000000..ba08dcb
--- /dev/null
+++ b/src/common/f2s.c
@@ -0,0 +1,803 @@
+/*---------------------------------------------------------------------------
+ *
+ * Ryu floating-point output for single precision.
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/f2s.c
+ *
+ * This is a modification of code taken from github.com/ulfjack/ryu under the
+ * terms of the Boost license (not the Apache license). The original copyright
+ * notice follows:
+ *
+ * Copyright 2018 Ulf Adams
+ *
+ * The contents of this file may be used under the terms of the Apache
+ * License, Version 2.0.
+ *
+ * (See accompanying file LICENSE-Apache or copy at
+ * http://www.apache.org/licenses/LICENSE-2.0)
+ *
+ * Alternatively, the contents of this file may be used under the terms of the
+ * Boost Software License, Version 1.0.
+ *
+ * (See accompanying file LICENSE-Boost or copy at
+ * https://www.boost.org/LICENSE_1_0.txt)
+ *
+ * Unless required by applicable law or agreed to in writing, this software is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/shortest_dec.h"
+#include "digit_table.h"
+#include "ryu_common.h"
+
+#define FLOAT_MANTISSA_BITS 23
+#define FLOAT_EXPONENT_BITS 8
+#define FLOAT_BIAS 127
+
+/*
+ * This table is generated (by the upstream) by PrintFloatLookupTable,
+ * and modified (by us) to add UINT64CONST.
+ */
+#define FLOAT_POW5_INV_BITCOUNT 59
+static const uint64 FLOAT_POW5_INV_SPLIT[31] = {
+ UINT64CONST(576460752303423489), UINT64CONST(461168601842738791), UINT64CONST(368934881474191033), UINT64CONST(295147905179352826),
+ UINT64CONST(472236648286964522), UINT64CONST(377789318629571618), UINT64CONST(302231454903657294), UINT64CONST(483570327845851670),
+ UINT64CONST(386856262276681336), UINT64CONST(309485009821345069), UINT64CONST(495176015714152110), UINT64CONST(396140812571321688),
+ UINT64CONST(316912650057057351), UINT64CONST(507060240091291761), UINT64CONST(405648192073033409), UINT64CONST(324518553658426727),
+ UINT64CONST(519229685853482763), UINT64CONST(415383748682786211), UINT64CONST(332306998946228969), UINT64CONST(531691198313966350),
+ UINT64CONST(425352958651173080), UINT64CONST(340282366920938464), UINT64CONST(544451787073501542), UINT64CONST(435561429658801234),
+ UINT64CONST(348449143727040987), UINT64CONST(557518629963265579), UINT64CONST(446014903970612463), UINT64CONST(356811923176489971),
+ UINT64CONST(570899077082383953), UINT64CONST(456719261665907162), UINT64CONST(365375409332725730)
+};
+#define FLOAT_POW5_BITCOUNT 61
+static const uint64 FLOAT_POW5_SPLIT[47] = {
+ UINT64CONST(1152921504606846976), UINT64CONST(1441151880758558720), UINT64CONST(1801439850948198400), UINT64CONST(2251799813685248000),
+ UINT64CONST(1407374883553280000), UINT64CONST(1759218604441600000), UINT64CONST(2199023255552000000), UINT64CONST(1374389534720000000),
+ UINT64CONST(1717986918400000000), UINT64CONST(2147483648000000000), UINT64CONST(1342177280000000000), UINT64CONST(1677721600000000000),
+ UINT64CONST(2097152000000000000), UINT64CONST(1310720000000000000), UINT64CONST(1638400000000000000), UINT64CONST(2048000000000000000),
+ UINT64CONST(1280000000000000000), UINT64CONST(1600000000000000000), UINT64CONST(2000000000000000000), UINT64CONST(1250000000000000000),
+ UINT64CONST(1562500000000000000), UINT64CONST(1953125000000000000), UINT64CONST(1220703125000000000), UINT64CONST(1525878906250000000),
+ UINT64CONST(1907348632812500000), UINT64CONST(1192092895507812500), UINT64CONST(1490116119384765625), UINT64CONST(1862645149230957031),
+ UINT64CONST(1164153218269348144), UINT64CONST(1455191522836685180), UINT64CONST(1818989403545856475), UINT64CONST(2273736754432320594),
+ UINT64CONST(1421085471520200371), UINT64CONST(1776356839400250464), UINT64CONST(2220446049250313080), UINT64CONST(1387778780781445675),
+ UINT64CONST(1734723475976807094), UINT64CONST(2168404344971008868), UINT64CONST(1355252715606880542), UINT64CONST(1694065894508600678),
+ UINT64CONST(2117582368135750847), UINT64CONST(1323488980084844279), UINT64CONST(1654361225106055349), UINT64CONST(2067951531382569187),
+ UINT64CONST(1292469707114105741), UINT64CONST(1615587133892632177), UINT64CONST(2019483917365790221)
+};
+
+static inline uint32
+pow5Factor(uint32 value)
+{
+ uint32 count = 0;
+
+ for (;;)
+ {
+ Assert(value != 0);
+ const uint32 q = value / 5;
+ const uint32 r = value % 5;
+
+ if (r != 0)
+ break;
+
+ value = q;
+ ++count;
+ }
+ return count;
+}
+
+/* Returns true if value is divisible by 5^p. */
+static inline bool
+multipleOfPowerOf5(const uint32 value, const uint32 p)
+{
+ return pow5Factor(value) >= p;
+}
+
+/* Returns true if value is divisible by 2^p. */
+static inline bool
+multipleOfPowerOf2(const uint32 value, const uint32 p)
+{
+ /* return __builtin_ctz(value) >= p; */
+ return (value & ((1u << p) - 1)) == 0;
+}
+
+/*
+ * It seems to be slightly faster to avoid uint128_t here, although the
+ * generated code for uint128_t looks slightly nicer.
+ */
+static inline uint32
+mulShift(const uint32 m, const uint64 factor, const int32 shift)
+{
+ /*
+ * The casts here help MSVC to avoid calls to the __allmul library
+ * function.
+ */
+ const uint32 factorLo = (uint32) (factor);
+ const uint32 factorHi = (uint32) (factor >> 32);
+ const uint64 bits0 = (uint64) m * factorLo;
+ const uint64 bits1 = (uint64) m * factorHi;
+
+ Assert(shift > 32);
+
+#ifdef RYU_32_BIT_PLATFORM
+
+ /*
+ * On 32-bit platforms we can avoid a 64-bit shift-right since we only
+ * need the upper 32 bits of the result and the shift value is > 32.
+ */
+ const uint32 bits0Hi = (uint32) (bits0 >> 32);
+ uint32 bits1Lo = (uint32) (bits1);
+ uint32 bits1Hi = (uint32) (bits1 >> 32);
+
+ bits1Lo += bits0Hi;
+ bits1Hi += (bits1Lo < bits0Hi);
+
+ const int32 s = shift - 32;
+
+ return (bits1Hi << (32 - s)) | (bits1Lo >> s);
+
+#else /* RYU_32_BIT_PLATFORM */
+
+ const uint64 sum = (bits0 >> 32) + bits1;
+ const uint64 shiftedSum = sum >> (shift - 32);
+
+ Assert(shiftedSum <= PG_UINT32_MAX);
+ return (uint32) shiftedSum;
+
+#endif /* RYU_32_BIT_PLATFORM */
+}
+
+static inline uint32
+mulPow5InvDivPow2(const uint32 m, const uint32 q, const int32 j)
+{
+ return mulShift(m, FLOAT_POW5_INV_SPLIT[q], j);
+}
+
+static inline uint32
+mulPow5divPow2(const uint32 m, const uint32 i, const int32 j)
+{
+ return mulShift(m, FLOAT_POW5_SPLIT[i], j);
+}
+
+static inline uint32
+decimalLength(const uint32 v)
+{
+ /* Function precondition: v is not a 10-digit number. */
+ /* (9 digits are sufficient for round-tripping.) */
+ Assert(v < 1000000000);
+ if (v >= 100000000)
+ {
+ return 9;
+ }
+ if (v >= 10000000)
+ {
+ return 8;
+ }
+ if (v >= 1000000)
+ {
+ return 7;
+ }
+ if (v >= 100000)
+ {
+ return 6;
+ }
+ if (v >= 10000)
+ {
+ return 5;
+ }
+ if (v >= 1000)
+ {
+ return 4;
+ }
+ if (v >= 100)
+ {
+ return 3;
+ }
+ if (v >= 10)
+ {
+ return 2;
+ }
+ return 1;
+}
+
+/* A floating decimal representing m * 10^e. */
+typedef struct floating_decimal_32
+{
+ uint32 mantissa;
+ int32 exponent;
+} floating_decimal_32;
+
+static inline floating_decimal_32
+f2d(const uint32 ieeeMantissa, const uint32 ieeeExponent)
+{
+ int32 e2;
+ uint32 m2;
+
+ if (ieeeExponent == 0)
+ {
+ /* We subtract 2 so that the bounds computation has 2 additional bits. */
+ e2 = 1 - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2;
+ m2 = ieeeMantissa;
+ }
+ else
+ {
+ e2 = ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2;
+ m2 = (1u << FLOAT_MANTISSA_BITS) | ieeeMantissa;
+ }
+
+#if STRICTLY_SHORTEST
+ const bool even = (m2 & 1) == 0;
+ const bool acceptBounds = even;
+#else
+ const bool acceptBounds = false;
+#endif
+
+ /* Step 2: Determine the interval of legal decimal representations. */
+ const uint32 mv = 4 * m2;
+ const uint32 mp = 4 * m2 + 2;
+
+ /* Implicit bool -> int conversion. True is 1, false is 0. */
+ const uint32 mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
+ const uint32 mm = 4 * m2 - 1 - mmShift;
+
+ /* Step 3: Convert to a decimal power base using 64-bit arithmetic. */
+ uint32 vr,
+ vp,
+ vm;
+ int32 e10;
+ bool vmIsTrailingZeros = false;
+ bool vrIsTrailingZeros = false;
+ uint8 lastRemovedDigit = 0;
+
+ if (e2 >= 0)
+ {
+ const uint32 q = log10Pow2(e2);
+
+ e10 = q;
+
+ const int32 k = FLOAT_POW5_INV_BITCOUNT + pow5bits(q) - 1;
+ const int32 i = -e2 + q + k;
+
+ vr = mulPow5InvDivPow2(mv, q, i);
+ vp = mulPow5InvDivPow2(mp, q, i);
+ vm = mulPow5InvDivPow2(mm, q, i);
+
+ if (q != 0 && (vp - 1) / 10 <= vm / 10)
+ {
+ /*
+ * We need to know one removed digit even if we are not going to
+ * loop below. We could use q = X - 1 above, except that would
+ * require 33 bits for the result, and we've found that 32-bit
+ * arithmetic is faster even on 64-bit machines.
+ */
+ const int32 l = FLOAT_POW5_INV_BITCOUNT + pow5bits(q - 1) - 1;
+
+ lastRemovedDigit = (uint8) (mulPow5InvDivPow2(mv, q - 1, -e2 + q - 1 + l) % 10);
+ }
+ if (q <= 9)
+ {
+ /*
+ * The largest power of 5 that fits in 24 bits is 5^10, but q <= 9
+ * seems to be safe as well.
+ *
+ * Only one of mp, mv, and mm can be a multiple of 5, if any.
+ */
+ if (mv % 5 == 0)
+ {
+ vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
+ }
+ else if (acceptBounds)
+ {
+ vmIsTrailingZeros = multipleOfPowerOf5(mm, q);
+ }
+ else
+ {
+ vp -= multipleOfPowerOf5(mp, q);
+ }
+ }
+ }
+ else
+ {
+ const uint32 q = log10Pow5(-e2);
+
+ e10 = q + e2;
+
+ const int32 i = -e2 - q;
+ const int32 k = pow5bits(i) - FLOAT_POW5_BITCOUNT;
+ int32 j = q - k;
+
+ vr = mulPow5divPow2(mv, i, j);
+ vp = mulPow5divPow2(mp, i, j);
+ vm = mulPow5divPow2(mm, i, j);
+
+ if (q != 0 && (vp - 1) / 10 <= vm / 10)
+ {
+ j = q - 1 - (pow5bits(i + 1) - FLOAT_POW5_BITCOUNT);
+ lastRemovedDigit = (uint8) (mulPow5divPow2(mv, i + 1, j) % 10);
+ }
+ if (q <= 1)
+ {
+ /*
+ * {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q
+ * trailing 0 bits.
+ */
+ /* mv = 4 * m2, so it always has at least two trailing 0 bits. */
+ vrIsTrailingZeros = true;
+ if (acceptBounds)
+ {
+ /*
+ * mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff
+ * mmShift == 1.
+ */
+ vmIsTrailingZeros = mmShift == 1;
+ }
+ else
+ {
+ /*
+ * mp = mv + 2, so it always has at least one trailing 0 bit.
+ */
+ --vp;
+ }
+ }
+ else if (q < 31)
+ {
+ /* TODO(ulfjack):Use a tighter bound here. */
+ vrIsTrailingZeros = multipleOfPowerOf2(mv, q - 1);
+ }
+ }
+
+ /*
+ * Step 4: Find the shortest decimal representation in the interval of
+ * legal representations.
+ */
+ uint32 removed = 0;
+ uint32 output;
+
+ if (vmIsTrailingZeros || vrIsTrailingZeros)
+ {
+ /* General case, which happens rarely (~4.0%). */
+ while (vp / 10 > vm / 10)
+ {
+ vmIsTrailingZeros &= vm - (vm / 10) * 10 == 0;
+ vrIsTrailingZeros &= lastRemovedDigit == 0;
+ lastRemovedDigit = (uint8) (vr % 10);
+ vr /= 10;
+ vp /= 10;
+ vm /= 10;
+ ++removed;
+ }
+ if (vmIsTrailingZeros)
+ {
+ while (vm % 10 == 0)
+ {
+ vrIsTrailingZeros &= lastRemovedDigit == 0;
+ lastRemovedDigit = (uint8) (vr % 10);
+ vr /= 10;
+ vp /= 10;
+ vm /= 10;
+ ++removed;
+ }
+ }
+
+ if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0)
+ {
+ /* Round even if the exact number is .....50..0. */
+ lastRemovedDigit = 4;
+ }
+
+ /*
+ * We need to take vr + 1 if vr is outside bounds or we need to round
+ * up.
+ */
+ output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
+ }
+ else
+ {
+ /*
+ * Specialized for the common case (~96.0%). Percentages below are
+ * relative to this.
+ *
+ * Loop iterations below (approximately): 0: 13.6%, 1: 70.7%, 2:
+ * 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01%
+ */
+ while (vp / 10 > vm / 10)
+ {
+ lastRemovedDigit = (uint8) (vr % 10);
+ vr /= 10;
+ vp /= 10;
+ vm /= 10;
+ ++removed;
+ }
+
+ /*
+ * We need to take vr + 1 if vr is outside bounds or we need to round
+ * up.
+ */
+ output = vr + (vr == vm || lastRemovedDigit >= 5);
+ }
+
+ const int32 exp = e10 + removed;
+
+ floating_decimal_32 fd;
+
+ fd.exponent = exp;
+ fd.mantissa = output;
+ return fd;
+}
+
+static inline int
+to_chars_f(const floating_decimal_32 v, const uint32 olength, char *const result)
+{
+ /* Step 5: Print the decimal representation. */
+ int index = 0;
+
+ uint32 output = v.mantissa;
+ int32 exp = v.exponent;
+
+ /*----
+ * On entry, mantissa * 10^exp is the result to be output.
+ * Caller has already done the - sign if needed.
+ *
+ * We want to insert the point somewhere depending on the output length
+ * and exponent, which might mean adding zeros:
+ *
+ * exp | format
+ * 1+ | ddddddddd000000
+ * 0 | ddddddddd
+ * -1 .. -len+1 | dddddddd.d to d.ddddddddd
+ * -len ... | 0.ddddddddd to 0.000dddddd
+ */
+ uint32 i = 0;
+ int32 nexp = exp + olength;
+
+ if (nexp <= 0)
+ {
+ /* -nexp is number of 0s to add after '.' */
+ Assert(nexp >= -3);
+ /* 0.000ddddd */
+ index = 2 - nexp;
+ /* copy 8 bytes rather than 5 to let compiler optimize */
+ memcpy(result, "0.000000", 8);
+ }
+ else if (exp < 0)
+ {
+ /*
+ * dddd.dddd; leave space at the start and move the '.' in after
+ */
+ index = 1;
+ }
+ else
+ {
+ /*
+ * We can save some code later by pre-filling with zeros. We know that
+ * there can be no more than 6 output digits in this form, otherwise
+ * we would not choose fixed-point output. memset 8 rather than 6
+ * bytes to let the compiler optimize it.
+ */
+ Assert(exp < 6 && exp + olength <= 6);
+ memset(result, '0', 8);
+ }
+
+ while (output >= 10000)
+ {
+ const uint32 c = output - 10000 * (output / 10000);
+ const uint32 c0 = (c % 100) << 1;
+ const uint32 c1 = (c / 100) << 1;
+
+ output /= 10000;
+
+ memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2);
+ memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2);
+ i += 4;
+ }
+ if (output >= 100)
+ {
+ const uint32 c = (output % 100) << 1;
+
+ output /= 100;
+ memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2);
+ i += 2;
+ }
+ if (output >= 10)
+ {
+ const uint32 c = output << 1;
+
+ memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2);
+ }
+ else
+ {
+ result[index] = (char) ('0' + output);
+ }
+
+ if (index == 1)
+ {
+ /*
+ * nexp is 1..6 here, representing the number of digits before the
+ * point. A value of 7+ is not possible because we switch to
+ * scientific notation when the display exponent reaches 6.
+ */
+ Assert(nexp < 7);
+ /* gcc only seems to want to optimize memmove for small 2^n */
+ if (nexp & 4)
+ {
+ memmove(result + index - 1, result + index, 4);
+ index += 4;
+ }
+ if (nexp & 2)
+ {
+ memmove(result + index - 1, result + index, 2);
+ index += 2;
+ }
+ if (nexp & 1)
+ {
+ result[index - 1] = result[index];
+ }
+ result[nexp] = '.';
+ index = olength + 1;
+ }
+ else if (exp >= 0)
+ {
+ /* we supplied the trailing zeros earlier, now just set the length. */
+ index = olength + exp;
+ }
+ else
+ {
+ index = olength + (2 - nexp);
+ }
+
+ return index;
+}
+
+static inline int
+to_chars(const floating_decimal_32 v, const bool sign, char *const result)
+{
+ /* Step 5: Print the decimal representation. */
+ int index = 0;
+
+ uint32 output = v.mantissa;
+ uint32 olength = decimalLength(output);
+ int32 exp = v.exponent + olength - 1;
+
+ if (sign)
+ result[index++] = '-';
+
+ /*
+ * The thresholds for fixed-point output are chosen to match printf
+ * defaults. Beware that both the code of to_chars_f and the value of
+ * FLOAT_SHORTEST_DECIMAL_LEN are sensitive to these thresholds.
+ */
+ if (exp >= -4 && exp < 6)
+ return to_chars_f(v, olength, result + index) + sign;
+
+ /*
+ * If v.exponent is exactly 0, we might have reached here via the small
+ * integer fast path, in which case v.mantissa might contain trailing
+ * (decimal) zeros. For scientific notation we need to move these zeros
+ * into the exponent. (For fixed point this doesn't matter, which is why
+ * we do this here rather than above.)
+ *
+ * Since we already calculated the display exponent (exp) above based on
+ * the old decimal length, that value does not change here. Instead, we
+ * just reduce the display length for each digit removed.
+ *
+ * If we didn't get here via the fast path, the raw exponent will not
+ * usually be 0, and there will be no trailing zeros, so we pay no more
+ * than one div10/multiply extra cost. We claw back half of that by
+ * checking for divisibility by 2 before dividing by 10.
+ */
+ if (v.exponent == 0)
+ {
+ while ((output & 1) == 0)
+ {
+ const uint32 q = output / 10;
+ const uint32 r = output - 10 * q;
+
+ if (r != 0)
+ break;
+ output = q;
+ --olength;
+ }
+ }
+
+ /*----
+ * Print the decimal digits.
+ * The following code is equivalent to:
+ *
+ * for (uint32 i = 0; i < olength - 1; ++i) {
+ * const uint32 c = output % 10; output /= 10;
+ * result[index + olength - i] = (char) ('0' + c);
+ * }
+ * result[index] = '0' + output % 10;
+ */
+ uint32 i = 0;
+
+ while (output >= 10000)
+ {
+ const uint32 c = output - 10000 * (output / 10000);
+ const uint32 c0 = (c % 100) << 1;
+ const uint32 c1 = (c / 100) << 1;
+
+ output /= 10000;
+
+ memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2);
+ memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2);
+ i += 4;
+ }
+ if (output >= 100)
+ {
+ const uint32 c = (output % 100) << 1;
+
+ output /= 100;
+ memcpy(result + index + olength - i - 1, DIGIT_TABLE + c, 2);
+ i += 2;
+ }
+ if (output >= 10)
+ {
+ const uint32 c = output << 1;
+
+ /*
+ * We can't use memcpy here: the decimal dot goes between these two
+ * digits.
+ */
+ result[index + olength - i] = DIGIT_TABLE[c + 1];
+ result[index] = DIGIT_TABLE[c];
+ }
+ else
+ {
+ result[index] = (char) ('0' + output);
+ }
+
+ /* Print decimal point if needed. */
+ if (olength > 1)
+ {
+ result[index + 1] = '.';
+ index += olength + 1;
+ }
+ else
+ {
+ ++index;
+ }
+
+ /* Print the exponent. */
+ result[index++] = 'e';
+ if (exp < 0)
+ {
+ result[index++] = '-';
+ exp = -exp;
+ }
+ else
+ result[index++] = '+';
+
+ memcpy(result + index, DIGIT_TABLE + 2 * exp, 2);
+ index += 2;
+
+ return index;
+}
+
+static inline bool
+f2d_small_int(const uint32 ieeeMantissa,
+ const uint32 ieeeExponent,
+ floating_decimal_32 *v)
+{
+ const int32 e2 = (int32) ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS;
+
+ /*
+ * Avoid using multiple "return false;" here since it tends to provoke the
+ * compiler into inlining multiple copies of f2d, which is undesirable.
+ */
+
+ if (e2 >= -FLOAT_MANTISSA_BITS && e2 <= 0)
+ {
+ /*----
+ * Since 2^23 <= m2 < 2^24 and 0 <= -e2 <= 23:
+ * 1 <= f = m2 / 2^-e2 < 2^24.
+ *
+ * Test if the lower -e2 bits of the significand are 0, i.e. whether
+ * the fraction is 0. We can use ieeeMantissa here, since the implied
+ * 1 bit can never be tested by this; the implied 1 can only be part
+ * of a fraction if e2 < -FLOAT_MANTISSA_BITS which we already
+ * checked. (e.g. 0.5 gives ieeeMantissa == 0 and e2 == -24)
+ */
+ const uint32 mask = (1U << -e2) - 1;
+ const uint32 fraction = ieeeMantissa & mask;
+
+ if (fraction == 0)
+ {
+ /*----
+ * f is an integer in the range [1, 2^24).
+ * Note: mantissa might contain trailing (decimal) 0's.
+ * Note: since 2^24 < 10^9, there is no need to adjust
+ * decimalLength().
+ */
+ const uint32 m2 = (1U << FLOAT_MANTISSA_BITS) | ieeeMantissa;
+
+ v->mantissa = m2 >> -e2;
+ v->exponent = 0;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Store the shortest decimal representation of the given float as an
+ * UNTERMINATED string in the caller's supplied buffer (which must be at least
+ * FLOAT_SHORTEST_DECIMAL_LEN-1 bytes long).
+ *
+ * Returns the number of bytes stored.
+ */
+int
+float_to_shortest_decimal_bufn(float f, char *result)
+{
+ /*
+ * Step 1: Decode the floating-point number, and unify normalized and
+ * subnormal cases.
+ */
+ const uint32 bits = float_to_bits(f);
+
+ /* Decode bits into sign, mantissa, and exponent. */
+ const bool ieeeSign = ((bits >> (FLOAT_MANTISSA_BITS + FLOAT_EXPONENT_BITS)) & 1) != 0;
+ const uint32 ieeeMantissa = bits & ((1u << FLOAT_MANTISSA_BITS) - 1);
+ const uint32 ieeeExponent = (bits >> FLOAT_MANTISSA_BITS) & ((1u << FLOAT_EXPONENT_BITS) - 1);
+
+ /* Case distinction; exit early for the easy cases. */
+ if (ieeeExponent == ((1u << FLOAT_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0))
+ {
+ return copy_special_str(result, ieeeSign, (ieeeExponent != 0), (ieeeMantissa != 0));
+ }
+
+ floating_decimal_32 v;
+ const bool isSmallInt = f2d_small_int(ieeeMantissa, ieeeExponent, &v);
+
+ if (!isSmallInt)
+ {
+ v = f2d(ieeeMantissa, ieeeExponent);
+ }
+
+ return to_chars(v, ieeeSign, result);
+}
+
+/*
+ * Store the shortest decimal representation of the given float as a
+ * null-terminated string in the caller's supplied buffer (which must be at
+ * least FLOAT_SHORTEST_DECIMAL_LEN bytes long).
+ *
+ * Returns the string length.
+ */
+int
+float_to_shortest_decimal_buf(float f, char *result)
+{
+ const int index = float_to_shortest_decimal_bufn(f, result);
+
+ /* Terminate the string. */
+ Assert(index < FLOAT_SHORTEST_DECIMAL_LEN);
+ result[index] = '\0';
+ return index;
+}
+
+/*
+ * Return the shortest decimal representation as a null-terminated palloc'd
+ * string (outside the backend, uses malloc() instead).
+ *
+ * Caller is responsible for freeing the result.
+ */
+char *
+float_to_shortest_decimal(float f)
+{
+ char *const result = (char *) palloc(FLOAT_SHORTEST_DECIMAL_LEN);
+
+ float_to_shortest_decimal_buf(f, result);
+ return result;
+}
diff --git a/src/common/fe_memutils.c b/src/common/fe_memutils.c
new file mode 100644
index 0000000..3bad81e
--- /dev/null
+++ b/src/common/fe_memutils.c
@@ -0,0 +1,175 @@
+/*-------------------------------------------------------------------------
+ *
+ * fe_memutils.c
+ * memory management support for frontend code
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/fe_memutils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#error "This file is not expected to be compiled for backend code"
+#endif
+
+#include "postgres_fe.h"
+
+static inline void *
+pg_malloc_internal(size_t size, int flags)
+{
+ void *tmp;
+
+ /* Avoid unportable behavior of malloc(0) */
+ if (size == 0)
+ size = 1;
+ tmp = malloc(size);
+ if (tmp == NULL)
+ {
+ if ((flags & MCXT_ALLOC_NO_OOM) == 0)
+ {
+ fprintf(stderr, _("out of memory\n"));
+ exit(EXIT_FAILURE);
+ }
+ return NULL;
+ }
+
+ if ((flags & MCXT_ALLOC_ZERO) != 0)
+ MemSet(tmp, 0, size);
+ return tmp;
+}
+
+void *
+pg_malloc(size_t size)
+{
+ return pg_malloc_internal(size, 0);
+}
+
+void *
+pg_malloc0(size_t size)
+{
+ return pg_malloc_internal(size, MCXT_ALLOC_ZERO);
+}
+
+void *
+pg_malloc_extended(size_t size, int flags)
+{
+ return pg_malloc_internal(size, flags);
+}
+
+void *
+pg_realloc(void *ptr, size_t size)
+{
+ void *tmp;
+
+ /* Avoid unportable behavior of realloc(NULL, 0) */
+ if (ptr == NULL && size == 0)
+ size = 1;
+ tmp = realloc(ptr, size);
+ if (!tmp)
+ {
+ fprintf(stderr, _("out of memory\n"));
+ exit(EXIT_FAILURE);
+ }
+ return tmp;
+}
+
+/*
+ * "Safe" wrapper around strdup().
+ */
+char *
+pg_strdup(const char *in)
+{
+ char *tmp;
+
+ if (!in)
+ {
+ fprintf(stderr,
+ _("cannot duplicate null pointer (internal error)\n"));
+ exit(EXIT_FAILURE);
+ }
+ tmp = strdup(in);
+ if (!tmp)
+ {
+ fprintf(stderr, _("out of memory\n"));
+ exit(EXIT_FAILURE);
+ }
+ return tmp;
+}
+
+void
+pg_free(void *ptr)
+{
+ free(ptr);
+}
+
+/*
+ * Frontend emulation of backend memory management functions. Useful for
+ * programs that compile backend files.
+ */
+void *
+palloc(Size size)
+{
+ return pg_malloc_internal(size, 0);
+}
+
+void *
+palloc0(Size size)
+{
+ return pg_malloc_internal(size, MCXT_ALLOC_ZERO);
+}
+
+void *
+palloc_extended(Size size, int flags)
+{
+ return pg_malloc_internal(size, flags);
+}
+
+void
+pfree(void *pointer)
+{
+ pg_free(pointer);
+}
+
+char *
+pstrdup(const char *in)
+{
+ return pg_strdup(in);
+}
+
+char *
+pnstrdup(const char *in, Size size)
+{
+ char *tmp;
+ int len;
+
+ if (!in)
+ {
+ fprintf(stderr,
+ _("cannot duplicate null pointer (internal error)\n"));
+ exit(EXIT_FAILURE);
+ }
+
+ len = strnlen(in, size);
+ tmp = malloc(len + 1);
+ if (tmp == NULL)
+ {
+ fprintf(stderr, _("out of memory\n"));
+ exit(EXIT_FAILURE);
+ }
+
+ memcpy(tmp, in, len);
+ tmp[len] = '\0';
+
+ return tmp;
+}
+
+void *
+repalloc(void *pointer, Size size)
+{
+ return pg_realloc(pointer, size);
+}
diff --git a/src/common/file_perm.c b/src/common/file_perm.c
new file mode 100644
index 0000000..60f88d2
--- /dev/null
+++ b/src/common/file_perm.c
@@ -0,0 +1,91 @@
+/*-------------------------------------------------------------------------
+ *
+ * File and directory permission routines
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/common/file_perm.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "common/file_perm.h"
+
+/* Modes for creating directories and files in the data directory */
+int pg_dir_create_mode = PG_DIR_MODE_OWNER;
+int pg_file_create_mode = PG_FILE_MODE_OWNER;
+
+/*
+ * Mode mask to pass to umask(). This is more of a preventative measure since
+ * all file/directory creates should be performed using the create modes above.
+ */
+int pg_mode_mask = PG_MODE_MASK_OWNER;
+
+/*
+ * Set create modes and mask to use when writing to PGDATA based on the data
+ * directory mode passed. If group read/execute are present in the mode, then
+ * create modes and mask will be relaxed to allow group read/execute on all
+ * newly created files and directories.
+ */
+void
+SetDataDirectoryCreatePerm(int dataDirMode)
+{
+ /* If the data directory mode has group access */
+ if ((PG_DIR_MODE_GROUP & dataDirMode) == PG_DIR_MODE_GROUP)
+ {
+ pg_dir_create_mode = PG_DIR_MODE_GROUP;
+ pg_file_create_mode = PG_FILE_MODE_GROUP;
+ pg_mode_mask = PG_MODE_MASK_GROUP;
+ }
+ /* Else use default permissions */
+ else
+ {
+ pg_dir_create_mode = PG_DIR_MODE_OWNER;
+ pg_file_create_mode = PG_FILE_MODE_OWNER;
+ pg_mode_mask = PG_MODE_MASK_OWNER;
+ }
+}
+
+#ifdef FRONTEND
+
+/*
+ * Get the create modes and mask to use when writing to PGDATA by examining the
+ * mode of the PGDATA directory and calling SetDataDirectoryCreatePerm().
+ *
+ * Errors are not handled here and should be reported by the application when
+ * false is returned.
+ *
+ * Suppress when on Windows, because there may not be proper support for Unix-y
+ * file permissions.
+ */
+bool
+GetDataDirectoryCreatePerm(const char *dataDir)
+{
+#if !defined(WIN32) && !defined(__CYGWIN__)
+ struct stat statBuf;
+
+ /*
+ * If an error occurs getting the mode then return false. The caller is
+ * responsible for generating an error, if appropriate, indicating that we
+ * were unable to access the data directory.
+ */
+ if (stat(dataDir, &statBuf) == -1)
+ return false;
+
+ /* Set permissions */
+ SetDataDirectoryCreatePerm(statBuf.st_mode);
+ return true;
+#else /* !defined(WIN32) && !defined(__CYGWIN__) */
+ /*
+ * On Windows, we don't have anything to do here since they don't have
+ * Unix-y permissions.
+ */
+ return true;
+#endif
+}
+
+
+#endif /* FRONTEND */
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
new file mode 100644
index 0000000..74833c4
--- /dev/null
+++ b/src/common/file_utils.c
@@ -0,0 +1,582 @@
+/*-------------------------------------------------------------------------
+ *
+ * File-processing utility routines.
+ *
+ * Assorted utility functions to work on files.
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/common/file_utils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "common/file_utils.h"
+#ifdef FRONTEND
+#include "common/logging.h"
+#endif
+#include "port/pg_iovec.h"
+
+#ifdef FRONTEND
+
+/* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
+#if defined(HAVE_SYNC_FILE_RANGE)
+#define PG_FLUSH_DATA_WORKS 1
+#elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
+#define PG_FLUSH_DATA_WORKS 1
+#endif
+
+/*
+ * pg_xlog has been renamed to pg_wal in version 10.
+ */
+#define MINIMUM_VERSION_FOR_PG_WAL 100000
+
+#ifdef PG_FLUSH_DATA_WORKS
+static int pre_sync_fname(const char *fname, bool isdir);
+#endif
+static void walkdir(const char *path,
+ int (*action) (const char *fname, bool isdir),
+ bool process_symlinks);
+
+/*
+ * Issue fsync recursively on PGDATA and all its contents.
+ *
+ * We fsync regular files and directories wherever they are, but we follow
+ * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
+ * Other symlinks are presumed to point at files we're not responsible for
+ * fsyncing, and might not have privileges to write at all.
+ *
+ * serverVersion indicates the version of the server to be fsync'd.
+ */
+void
+fsync_pgdata(const char *pg_data,
+ int serverVersion)
+{
+ bool xlog_is_symlink;
+ char pg_wal[MAXPGPATH];
+ char pg_tblspc[MAXPGPATH];
+
+ /* handle renaming of pg_xlog to pg_wal in post-10 clusters */
+ snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data,
+ serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal");
+ snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data);
+
+ /*
+ * If pg_wal is a symlink, we'll need to recurse into it separately,
+ * because the first walkdir below will ignore it.
+ */
+ xlog_is_symlink = false;
+
+ {
+ struct stat st;
+
+ if (lstat(pg_wal, &st) < 0)
+ pg_log_error("could not stat file \"%s\": %m", pg_wal);
+ else if (S_ISLNK(st.st_mode))
+ xlog_is_symlink = true;
+ }
+
+ /*
+ * If possible, hint to the kernel that we're soon going to fsync the data
+ * directory and its contents.
+ */
+#ifdef PG_FLUSH_DATA_WORKS
+ walkdir(pg_data, pre_sync_fname, false);
+ if (xlog_is_symlink)
+ walkdir(pg_wal, pre_sync_fname, false);
+ walkdir(pg_tblspc, pre_sync_fname, true);
+#endif
+
+ /*
+ * Now we do the fsync()s in the same order.
+ *
+ * The main call ignores symlinks, so in addition to specially processing
+ * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
+ * process_symlinks = true. Note that if there are any plain directories
+ * in pg_tblspc, they'll get fsync'd twice. That's not an expected case
+ * so we don't worry about optimizing it.
+ */
+ walkdir(pg_data, fsync_fname, false);
+ if (xlog_is_symlink)
+ walkdir(pg_wal, fsync_fname, false);
+ walkdir(pg_tblspc, fsync_fname, true);
+}
+
+/*
+ * Issue fsync recursively on the given directory and all its contents.
+ *
+ * This is a convenient wrapper on top of walkdir().
+ */
+void
+fsync_dir_recurse(const char *dir)
+{
+ /*
+ * If possible, hint to the kernel that we're soon going to fsync the data
+ * directory and its contents.
+ */
+#ifdef PG_FLUSH_DATA_WORKS
+ walkdir(dir, pre_sync_fname, false);
+#endif
+
+ walkdir(dir, fsync_fname, false);
+}
+
+/*
+ * walkdir: recursively walk a directory, applying the action to each
+ * regular file and directory (including the named directory itself).
+ *
+ * If process_symlinks is true, the action and recursion are also applied
+ * to regular files and directories that are pointed to by symlinks in the
+ * given directory; otherwise symlinks are ignored. Symlinks are always
+ * ignored in subdirectories, ie we intentionally don't pass down the
+ * process_symlinks flag to recursive calls.
+ *
+ * Errors are reported but not considered fatal.
+ *
+ * See also walkdir in fd.c, which is a backend version of this logic.
+ */
+static void
+walkdir(const char *path,
+ int (*action) (const char *fname, bool isdir),
+ bool process_symlinks)
+{
+ DIR *dir;
+ struct dirent *de;
+
+ dir = opendir(path);
+ if (dir == NULL)
+ {
+ pg_log_error("could not open directory \"%s\": %m", path);
+ return;
+ }
+
+ while (errno = 0, (de = readdir(dir)) != NULL)
+ {
+ char subpath[MAXPGPATH * 2];
+
+ if (strcmp(de->d_name, ".") == 0 ||
+ strcmp(de->d_name, "..") == 0)
+ continue;
+
+ snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
+
+ switch (get_dirent_type(subpath, de, process_symlinks, PG_LOG_ERROR))
+ {
+ case PGFILETYPE_REG:
+ (*action) (subpath, false);
+ break;
+ case PGFILETYPE_DIR:
+ walkdir(subpath, action, false);
+ break;
+ default:
+
+ /*
+ * Errors are already reported directly by get_dirent_type(),
+ * and any remaining symlinks and unknown file types are
+ * ignored.
+ */
+ break;
+ }
+ }
+
+ if (errno)
+ pg_log_error("could not read directory \"%s\": %m", path);
+
+ (void) closedir(dir);
+
+ /*
+ * It's important to fsync the destination directory itself as individual
+ * file fsyncs don't guarantee that the directory entry for the file is
+ * synced. Recent versions of ext4 have made the window much wider but
+ * it's been an issue for ext3 and other filesystems in the past.
+ */
+ (*action) (path, true);
+}
+
+/*
+ * Hint to the OS that it should get ready to fsync() this file.
+ *
+ * Ignores errors trying to open unreadable files, and reports other errors
+ * non-fatally.
+ */
+#ifdef PG_FLUSH_DATA_WORKS
+
+static int
+pre_sync_fname(const char *fname, bool isdir)
+{
+ int fd;
+
+ fd = open(fname, O_RDONLY | PG_BINARY, 0);
+
+ if (fd < 0)
+ {
+ if (errno == EACCES || (isdir && errno == EISDIR))
+ return 0;
+ pg_log_error("could not open file \"%s\": %m", fname);
+ return -1;
+ }
+
+ /*
+ * We do what pg_flush_data() would do in the backend: prefer to use
+ * sync_file_range, but fall back to posix_fadvise. We ignore errors
+ * because this is only a hint.
+ */
+#if defined(HAVE_SYNC_FILE_RANGE)
+ (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE);
+#elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
+ (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
+#else
+#error PG_FLUSH_DATA_WORKS should not have been defined
+#endif
+
+ (void) close(fd);
+ return 0;
+}
+
+#endif /* PG_FLUSH_DATA_WORKS */
+
+/*
+ * fsync_fname -- Try to fsync a file or directory
+ *
+ * Ignores errors trying to open unreadable files, or trying to fsync
+ * directories on systems where that isn't allowed/required. All other errors
+ * are fatal.
+ */
+int
+fsync_fname(const char *fname, bool isdir)
+{
+ int fd;
+ int flags;
+ int returncode;
+
+ /*
+ * Some OSs require directories to be opened read-only whereas other
+ * systems don't allow us to fsync files opened read-only; so we need both
+ * cases here. Using O_RDWR will cause us to fail to fsync files that are
+ * not writable by our userid, but we assume that's OK.
+ */
+ flags = PG_BINARY;
+ if (!isdir)
+ flags |= O_RDWR;
+ else
+ flags |= O_RDONLY;
+
+ /*
+ * Open the file, silently ignoring errors about unreadable files (or
+ * unsupported operations, e.g. opening a directory under Windows), and
+ * logging others.
+ */
+ fd = open(fname, flags, 0);
+ if (fd < 0)
+ {
+ if (errno == EACCES || (isdir && errno == EISDIR))
+ return 0;
+ pg_log_error("could not open file \"%s\": %m", fname);
+ return -1;
+ }
+
+ returncode = fsync(fd);
+
+ /*
+ * Some OSes don't allow us to fsync directories at all, so we can ignore
+ * those errors. Anything else needs to be reported.
+ */
+ if (returncode != 0 && !(isdir && (errno == EBADF || errno == EINVAL)))
+ {
+ pg_log_error("could not fsync file \"%s\": %m", fname);
+ (void) close(fd);
+ exit(EXIT_FAILURE);
+ }
+
+ (void) close(fd);
+ return 0;
+}
+
+/*
+ * fsync_parent_path -- fsync the parent path of a file or directory
+ *
+ * This is aimed at making file operations persistent on disk in case of
+ * an OS crash or power failure.
+ */
+int
+fsync_parent_path(const char *fname)
+{
+ char parentpath[MAXPGPATH];
+
+ strlcpy(parentpath, fname, MAXPGPATH);
+ get_parent_directory(parentpath);
+
+ /*
+ * get_parent_directory() returns an empty string if the input argument is
+ * just a file name (see comments in path.c), so handle that as being the
+ * current directory.
+ */
+ if (strlen(parentpath) == 0)
+ strlcpy(parentpath, ".", MAXPGPATH);
+
+ if (fsync_fname(parentpath, true) != 0)
+ return -1;
+
+ return 0;
+}
+
+/*
+ * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability
+ *
+ * Wrapper around rename, similar to the backend version.
+ */
+int
+durable_rename(const char *oldfile, const char *newfile)
+{
+ int fd;
+
+ /*
+ * First fsync the old and target path (if it exists), to ensure that they
+ * are properly persistent on disk. Syncing the target file is not
+ * strictly necessary, but it makes it easier to reason about crashes;
+ * because it's then guaranteed that either source or target file exists
+ * after a crash.
+ */
+ if (fsync_fname(oldfile, false) != 0)
+ return -1;
+
+ fd = open(newfile, PG_BINARY | O_RDWR, 0);
+ if (fd < 0)
+ {
+ if (errno != ENOENT)
+ {
+ pg_log_error("could not open file \"%s\": %m", newfile);
+ return -1;
+ }
+ }
+ else
+ {
+ if (fsync(fd) != 0)
+ {
+ pg_log_error("could not fsync file \"%s\": %m", newfile);
+ close(fd);
+ exit(EXIT_FAILURE);
+ }
+ close(fd);
+ }
+
+ /* Time to do the real deal... */
+ if (rename(oldfile, newfile) != 0)
+ {
+ pg_log_error("could not rename file \"%s\" to \"%s\": %m",
+ oldfile, newfile);
+ return -1;
+ }
+
+ /*
+ * To guarantee renaming the file is persistent, fsync the file with its
+ * new name, and its containing directory.
+ */
+ if (fsync_fname(newfile, false) != 0)
+ return -1;
+
+ if (fsync_parent_path(newfile) != 0)
+ return -1;
+
+ return 0;
+}
+
+#endif /* FRONTEND */
+
+/*
+ * Return the type of a directory entry.
+ *
+ * In frontend code, elevel should be a level from logging.h; in backend code
+ * it should be a level from elog.h.
+ */
+PGFileType
+get_dirent_type(const char *path,
+ const struct dirent *de,
+ bool look_through_symlinks,
+ int elevel)
+{
+ PGFileType result;
+
+ /*
+ * Some systems tell us the type directly in the dirent struct, but that's
+ * a BSD and Linux extension not required by POSIX. Even when the
+ * interface is present, sometimes the type is unknown, depending on the
+ * filesystem.
+ */
+#if defined(DT_REG) && defined(DT_DIR) && defined(DT_LNK)
+ if (de->d_type == DT_REG)
+ result = PGFILETYPE_REG;
+ else if (de->d_type == DT_DIR)
+ result = PGFILETYPE_DIR;
+ else if (de->d_type == DT_LNK && !look_through_symlinks)
+ result = PGFILETYPE_LNK;
+ else
+ result = PGFILETYPE_UNKNOWN;
+#else
+ result = PGFILETYPE_UNKNOWN;
+#endif
+
+ if (result == PGFILETYPE_UNKNOWN)
+ {
+ struct stat fst;
+ int sret;
+
+
+ if (look_through_symlinks)
+ sret = stat(path, &fst);
+ else
+ sret = lstat(path, &fst);
+
+ if (sret < 0)
+ {
+ result = PGFILETYPE_ERROR;
+#ifdef FRONTEND
+ pg_log_generic(elevel, PG_LOG_PRIMARY, "could not stat file \"%s\": %m", path);
+#else
+ ereport(elevel,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m", path)));
+#endif
+ }
+ else if (S_ISREG(fst.st_mode))
+ result = PGFILETYPE_REG;
+ else if (S_ISDIR(fst.st_mode))
+ result = PGFILETYPE_DIR;
+ else if (S_ISLNK(fst.st_mode))
+ result = PGFILETYPE_LNK;
+ }
+
+ return result;
+}
+
+/*
+ * pg_pwritev_with_retry
+ *
+ * Convenience wrapper for pg_pwritev() that retries on partial write. If an
+ * error is returned, it is unspecified how much has been written.
+ */
+ssize_t
+pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
+{
+ struct iovec iov_copy[PG_IOV_MAX];
+ ssize_t sum = 0;
+ ssize_t part;
+
+ /* We'd better have space to make a copy, in case we need to retry. */
+ if (iovcnt > PG_IOV_MAX)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (;;)
+ {
+ /* Write as much as we can. */
+ part = pg_pwritev(fd, iov, iovcnt, offset);
+ if (part < 0)
+ return -1;
+
+#ifdef SIMULATE_SHORT_WRITE
+ part = Min(part, 4096);
+#endif
+
+ /* Count our progress. */
+ sum += part;
+ offset += part;
+
+ /* Step over iovecs that are done. */
+ while (iovcnt > 0 && iov->iov_len <= part)
+ {
+ part -= iov->iov_len;
+ ++iov;
+ --iovcnt;
+ }
+
+ /* Are they all done? */
+ if (iovcnt == 0)
+ {
+ /* We don't expect the kernel to write more than requested. */
+ Assert(part == 0);
+ break;
+ }
+
+ /*
+ * Move whatever's left to the front of our mutable copy and adjust
+ * the leading iovec.
+ */
+ Assert(iovcnt > 0);
+ memmove(iov_copy, iov, sizeof(*iov) * iovcnt);
+ Assert(iov->iov_len > part);
+ iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part;
+ iov_copy[0].iov_len -= part;
+ iov = iov_copy;
+ }
+
+ return sum;
+}
+
+/*
+ * pg_pwrite_zeros
+ *
+ * Writes zeros to file worth "size" bytes at "offset" (from the start of the
+ * file), using vectored I/O.
+ *
+ * Returns the total amount of data written. On failure, a negative value
+ * is returned with errno set.
+ */
+ssize_t
+pg_pwrite_zeros(int fd, size_t size, off_t offset)
+{
+ static const PGIOAlignedBlock zbuffer = {{0}}; /* worth BLCKSZ */
+ void *zerobuf_addr = unconstify(PGIOAlignedBlock *, &zbuffer)->data;
+ struct iovec iov[PG_IOV_MAX];
+ size_t remaining_size = size;
+ ssize_t total_written = 0;
+
+ /* Loop, writing as many blocks as we can for each system call. */
+ while (remaining_size > 0)
+ {
+ int iovcnt = 0;
+ ssize_t written;
+
+ for (; iovcnt < PG_IOV_MAX && remaining_size > 0; iovcnt++)
+ {
+ size_t this_iov_size;
+
+ iov[iovcnt].iov_base = zerobuf_addr;
+
+ if (remaining_size < BLCKSZ)
+ this_iov_size = remaining_size;
+ else
+ this_iov_size = BLCKSZ;
+
+ iov[iovcnt].iov_len = this_iov_size;
+ remaining_size -= this_iov_size;
+ }
+
+ written = pg_pwritev_with_retry(fd, iov, iovcnt, offset);
+
+ if (written < 0)
+ return written;
+
+ offset += written;
+ total_written += written;
+ }
+
+ Assert(total_written == size);
+
+ return total_written;
+}
diff --git a/src/common/hashfn.c b/src/common/hashfn.c
new file mode 100644
index 0000000..2490607
--- /dev/null
+++ b/src/common/hashfn.c
@@ -0,0 +1,692 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashfn.c
+ * Generic hashing functions, and hash functions for use in dynahash.c
+ * hashtables
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/hashfn.c
+ *
+ * NOTES
+ * It is expected that every bit of a hash function's 32-bit result is
+ * as random as every other; failure to ensure this is likely to lead
+ * to poor performance of hash tables. In most cases a hash
+ * function should use hash_bytes() or its variant hash_bytes_uint32(),
+ * or the wrappers hash_any() and hash_uint32 defined in hashfn.h.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "common/hashfn.h"
+#include "port/pg_bitutils.h"
+
+
+/*
+ * This hash function was written by Bob Jenkins
+ * (bob_jenkins@burtleburtle.net), and superficially adapted
+ * for PostgreSQL by Neil Conway. For more information on this
+ * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
+ * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
+ *
+ * In the current code, we have adopted Bob's 2006 update of his hash
+ * function to fetch the data a word at a time when it is suitably aligned.
+ * This makes for a useful speedup, at the cost of having to maintain
+ * four code paths (aligned vs unaligned, and little-endian vs big-endian).
+ * It also uses two separate mixing functions mix() and final(), instead
+ * of a slower multi-purpose function.
+ */
+
+/* Get a bit mask of the bits set in non-uint32 aligned addresses */
+#define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
+
+#define rot(x,k) pg_rotate_left32(x, k)
+
+/*----------
+ * mix -- mix 3 32-bit values reversibly.
+ *
+ * This is reversible, so any information in (a,b,c) before mix() is
+ * still in (a,b,c) after mix().
+ *
+ * If four pairs of (a,b,c) inputs are run through mix(), or through
+ * mix() in reverse, there are at least 32 bits of the output that
+ * are sometimes the same for one pair and different for another pair.
+ * This was tested for:
+ * * pairs that differed by one bit, by two bits, in any combination
+ * of top bits of (a,b,c), or in any combination of bottom bits of
+ * (a,b,c).
+ * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ * is commonly produced by subtraction) look like a single 1-bit
+ * difference.
+ * * the base values were pseudorandom, all zero but one bit set, or
+ * all zero plus a counter that starts at zero.
+ *
+ * This does not achieve avalanche. There are input bits of (a,b,c)
+ * that fail to affect some output bits of (a,b,c), especially of a. The
+ * most thoroughly mixed value is c, but it doesn't really even achieve
+ * avalanche in c.
+ *
+ * This allows some parallelism. Read-after-writes are good at doubling
+ * the number of bits affected, so the goal of mixing pulls in the opposite
+ * direction from the goal of parallelism. I did what I could. Rotates
+ * seem to cost as much as shifts on every machine I could lay my hands on,
+ * and rotates are much kinder to the top and bottom bits, so I used rotates.
+ *----------
+ */
+#define mix(a,b,c) \
+{ \
+ a -= c; a ^= rot(c, 4); c += b; \
+ b -= a; b ^= rot(a, 6); a += c; \
+ c -= b; c ^= rot(b, 8); b += a; \
+ a -= c; a ^= rot(c,16); c += b; \
+ b -= a; b ^= rot(a,19); a += c; \
+ c -= b; c ^= rot(b, 4); b += a; \
+}
+
+/*----------
+ * final -- final mixing of 3 32-bit values (a,b,c) into c
+ *
+ * Pairs of (a,b,c) values differing in only a few bits will usually
+ * produce values of c that look totally different. This was tested for
+ * * pairs that differed by one bit, by two bits, in any combination
+ * of top bits of (a,b,c), or in any combination of bottom bits of
+ * (a,b,c).
+ * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ * is commonly produced by subtraction) look like a single 1-bit
+ * difference.
+ * * the base values were pseudorandom, all zero but one bit set, or
+ * all zero plus a counter that starts at zero.
+ *
+ * The use of separate functions for mix() and final() allow for a
+ * substantial performance increase since final() does not need to
+ * do well in reverse, but is does need to affect all output bits.
+ * mix(), on the other hand, does not need to affect all output
+ * bits (affecting 32 bits is enough). The original hash function had
+ * a single mixing operation that had to satisfy both sets of requirements
+ * and was slower as a result.
+ *----------
+ */
+#define final(a,b,c) \
+{ \
+ c ^= b; c -= rot(b,14); \
+ a ^= c; a -= rot(c,11); \
+ b ^= a; b -= rot(a,25); \
+ c ^= b; c -= rot(b,16); \
+ a ^= c; a -= rot(c, 4); \
+ b ^= a; b -= rot(a,14); \
+ c ^= b; c -= rot(b,24); \
+}
+
+/*
+ * hash_bytes() -- hash a variable-length key into a 32-bit value
+ * k : the key (the unaligned variable-length array of bytes)
+ * len : the length of the key, counting by bytes
+ *
+ * Returns a uint32 value. Every bit of the key affects every bit of
+ * the return value. Every 1-bit and 2-bit delta achieves avalanche.
+ * About 6*len+35 instructions. The best hash table sizes are powers
+ * of 2. There is no need to do mod a prime (mod is sooo slow!).
+ * If you need less than 32 bits, use a bitmask.
+ *
+ * This procedure must never throw elog(ERROR); the ResourceOwner code
+ * relies on this not to fail.
+ *
+ * Note: we could easily change this function to return a 64-bit hash value
+ * by using the final values of both b and c. b is perhaps a little less
+ * well mixed than c, however.
+ */
+uint32
+hash_bytes(const unsigned char *k, int keylen)
+{
+ uint32 a,
+ b,
+ c,
+ len;
+
+ /* Set up the internal state */
+ len = keylen;
+ a = b = c = 0x9e3779b9 + len + 3923095;
+
+ /* If the source pointer is word-aligned, we use word-wide fetches */
+ if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
+ {
+ /* Code path for aligned source data */
+ const uint32 *ka = (const uint32 *) k;
+
+ /* handle most of the key */
+ while (len >= 12)
+ {
+ a += ka[0];
+ b += ka[1];
+ c += ka[2];
+ mix(a, b, c);
+ ka += 3;
+ len -= 12;
+ }
+
+ /* handle the last 11 bytes */
+ k = (const unsigned char *) ka;
+#ifdef WORDS_BIGENDIAN
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 8);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 24);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += ka[1];
+ a += ka[0];
+ break;
+ case 7:
+ b += ((uint32) k[6] << 8);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 16);
+ /* fall through */
+ case 5:
+ b += ((uint32) k[4] << 24);
+ /* fall through */
+ case 4:
+ a += ka[0];
+ break;
+ case 3:
+ a += ((uint32) k[2] << 8);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 16);
+ /* fall through */
+ case 1:
+ a += ((uint32) k[0] << 24);
+ /* case 0: nothing left to add */
+ }
+#else /* !WORDS_BIGENDIAN */
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 24);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 8);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += ka[1];
+ a += ka[0];
+ break;
+ case 7:
+ b += ((uint32) k[6] << 16);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 8);
+ /* fall through */
+ case 5:
+ b += k[4];
+ /* fall through */
+ case 4:
+ a += ka[0];
+ break;
+ case 3:
+ a += ((uint32) k[2] << 16);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 8);
+ /* fall through */
+ case 1:
+ a += k[0];
+ /* case 0: nothing left to add */
+ }
+#endif /* WORDS_BIGENDIAN */
+ }
+ else
+ {
+ /* Code path for non-aligned source data */
+
+ /* handle most of the key */
+ while (len >= 12)
+ {
+#ifdef WORDS_BIGENDIAN
+ a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
+ b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
+ c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
+#else /* !WORDS_BIGENDIAN */
+ a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
+ b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
+ c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
+#endif /* WORDS_BIGENDIAN */
+ mix(a, b, c);
+ k += 12;
+ len -= 12;
+ }
+
+ /* handle the last 11 bytes */
+#ifdef WORDS_BIGENDIAN
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 8);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 24);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += k[7];
+ /* fall through */
+ case 7:
+ b += ((uint32) k[6] << 8);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 16);
+ /* fall through */
+ case 5:
+ b += ((uint32) k[4] << 24);
+ /* fall through */
+ case 4:
+ a += k[3];
+ /* fall through */
+ case 3:
+ a += ((uint32) k[2] << 8);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 16);
+ /* fall through */
+ case 1:
+ a += ((uint32) k[0] << 24);
+ /* case 0: nothing left to add */
+ }
+#else /* !WORDS_BIGENDIAN */
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 24);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 8);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += ((uint32) k[7] << 24);
+ /* fall through */
+ case 7:
+ b += ((uint32) k[6] << 16);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 8);
+ /* fall through */
+ case 5:
+ b += k[4];
+ /* fall through */
+ case 4:
+ a += ((uint32) k[3] << 24);
+ /* fall through */
+ case 3:
+ a += ((uint32) k[2] << 16);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 8);
+ /* fall through */
+ case 1:
+ a += k[0];
+ /* case 0: nothing left to add */
+ }
+#endif /* WORDS_BIGENDIAN */
+ }
+
+ final(a, b, c);
+
+ /* report the result */
+ return c;
+}
+
+/*
+ * hash_bytes_extended() -- hash into a 64-bit value, using an optional seed
+ * k : the key (the unaligned variable-length array of bytes)
+ * len : the length of the key, counting by bytes
+ * seed : a 64-bit seed (0 means no seed)
+ *
+ * Returns a uint64 value. Otherwise similar to hash_bytes.
+ */
+uint64
+hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)
+{
+ uint32 a,
+ b,
+ c,
+ len;
+
+ /* Set up the internal state */
+ len = keylen;
+ a = b = c = 0x9e3779b9 + len + 3923095;
+
+ /* If the seed is non-zero, use it to perturb the internal state. */
+ if (seed != 0)
+ {
+ /*
+ * In essence, the seed is treated as part of the data being hashed,
+ * but for simplicity, we pretend that it's padded with four bytes of
+ * zeroes so that the seed constitutes a 12-byte chunk.
+ */
+ a += (uint32) (seed >> 32);
+ b += (uint32) seed;
+ mix(a, b, c);
+ }
+
+ /* If the source pointer is word-aligned, we use word-wide fetches */
+ if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
+ {
+ /* Code path for aligned source data */
+ const uint32 *ka = (const uint32 *) k;
+
+ /* handle most of the key */
+ while (len >= 12)
+ {
+ a += ka[0];
+ b += ka[1];
+ c += ka[2];
+ mix(a, b, c);
+ ka += 3;
+ len -= 12;
+ }
+
+ /* handle the last 11 bytes */
+ k = (const unsigned char *) ka;
+#ifdef WORDS_BIGENDIAN
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 8);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 24);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += ka[1];
+ a += ka[0];
+ break;
+ case 7:
+ b += ((uint32) k[6] << 8);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 16);
+ /* fall through */
+ case 5:
+ b += ((uint32) k[4] << 24);
+ /* fall through */
+ case 4:
+ a += ka[0];
+ break;
+ case 3:
+ a += ((uint32) k[2] << 8);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 16);
+ /* fall through */
+ case 1:
+ a += ((uint32) k[0] << 24);
+ /* case 0: nothing left to add */
+ }
+#else /* !WORDS_BIGENDIAN */
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 24);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 8);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += ka[1];
+ a += ka[0];
+ break;
+ case 7:
+ b += ((uint32) k[6] << 16);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 8);
+ /* fall through */
+ case 5:
+ b += k[4];
+ /* fall through */
+ case 4:
+ a += ka[0];
+ break;
+ case 3:
+ a += ((uint32) k[2] << 16);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 8);
+ /* fall through */
+ case 1:
+ a += k[0];
+ /* case 0: nothing left to add */
+ }
+#endif /* WORDS_BIGENDIAN */
+ }
+ else
+ {
+ /* Code path for non-aligned source data */
+
+ /* handle most of the key */
+ while (len >= 12)
+ {
+#ifdef WORDS_BIGENDIAN
+ a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
+ b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
+ c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
+#else /* !WORDS_BIGENDIAN */
+ a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
+ b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
+ c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
+#endif /* WORDS_BIGENDIAN */
+ mix(a, b, c);
+ k += 12;
+ len -= 12;
+ }
+
+ /* handle the last 11 bytes */
+#ifdef WORDS_BIGENDIAN
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 8);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 24);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += k[7];
+ /* fall through */
+ case 7:
+ b += ((uint32) k[6] << 8);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 16);
+ /* fall through */
+ case 5:
+ b += ((uint32) k[4] << 24);
+ /* fall through */
+ case 4:
+ a += k[3];
+ /* fall through */
+ case 3:
+ a += ((uint32) k[2] << 8);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 16);
+ /* fall through */
+ case 1:
+ a += ((uint32) k[0] << 24);
+ /* case 0: nothing left to add */
+ }
+#else /* !WORDS_BIGENDIAN */
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 24);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 8);
+ /* fall through */
+ case 8:
+ /* the lowest byte of c is reserved for the length */
+ b += ((uint32) k[7] << 24);
+ /* fall through */
+ case 7:
+ b += ((uint32) k[6] << 16);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 8);
+ /* fall through */
+ case 5:
+ b += k[4];
+ /* fall through */
+ case 4:
+ a += ((uint32) k[3] << 24);
+ /* fall through */
+ case 3:
+ a += ((uint32) k[2] << 16);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 8);
+ /* fall through */
+ case 1:
+ a += k[0];
+ /* case 0: nothing left to add */
+ }
+#endif /* WORDS_BIGENDIAN */
+ }
+
+ final(a, b, c);
+
+ /* report the result */
+ return ((uint64) b << 32) | c;
+}
+
+/*
+ * hash_bytes_uint32() -- hash a 32-bit value to a 32-bit value
+ *
+ * This has the same result as
+ * hash_bytes(&k, sizeof(uint32))
+ * but is faster and doesn't force the caller to store k into memory.
+ */
+uint32
+hash_bytes_uint32(uint32 k)
+{
+ uint32 a,
+ b,
+ c;
+
+ a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
+ a += k;
+
+ final(a, b, c);
+
+ /* report the result */
+ return c;
+}
+
+/*
+ * hash_bytes_uint32_extended() -- hash 32-bit value to 64-bit value, with seed
+ *
+ * Like hash_bytes_uint32, this is a convenience function.
+ */
+uint64
+hash_bytes_uint32_extended(uint32 k, uint64 seed)
+{
+ uint32 a,
+ b,
+ c;
+
+ a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
+
+ if (seed != 0)
+ {
+ a += (uint32) (seed >> 32);
+ b += (uint32) seed;
+ mix(a, b, c);
+ }
+
+ a += k;
+
+ final(a, b, c);
+
+ /* report the result */
+ return ((uint64) b << 32) | c;
+}
+
+/*
+ * string_hash: hash function for keys that are NUL-terminated strings.
+ *
+ * NOTE: this is the default hash function if none is specified.
+ */
+uint32
+string_hash(const void *key, Size keysize)
+{
+ /*
+ * If the string exceeds keysize-1 bytes, we want to hash only that many,
+ * because when it is copied into the hash table it will be truncated at
+ * that length.
+ */
+ Size s_len = strlen((const char *) key);
+
+ s_len = Min(s_len, keysize - 1);
+ return hash_bytes((const unsigned char *) key, (int) s_len);
+}
+
+/*
+ * tag_hash: hash function for fixed-size tag values
+ */
+uint32
+tag_hash(const void *key, Size keysize)
+{
+ return hash_bytes((const unsigned char *) key, (int) keysize);
+}
+
+/*
+ * uint32_hash: hash function for keys that are uint32 or int32
+ *
+ * (tag_hash works for this case too, but is slower)
+ */
+uint32
+uint32_hash(const void *key, Size keysize)
+{
+ Assert(keysize == sizeof(uint32));
+ return hash_bytes_uint32(*((const uint32 *) key));
+}
diff --git a/src/common/hmac.c b/src/common/hmac.c
new file mode 100644
index 0000000..f0b239d
--- /dev/null
+++ b/src/common/hmac.c
@@ -0,0 +1,330 @@
+/*-------------------------------------------------------------------------
+ *
+ * hmac.c
+ * Implements Keyed-Hashing for Message Authentication (HMAC)
+ *
+ * Fallback implementation of HMAC, as specified in RFC 2104.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/hmac.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/cryptohash.h"
+#include "common/hmac.h"
+#include "common/md5.h"
+#include "common/sha1.h"
+#include "common/sha2.h"
+
+/*
+ * In backend, use palloc/pfree to ease the error handling. In frontend,
+ * use malloc to be able to return a failure status back to the caller.
+ */
+#ifndef FRONTEND
+#define ALLOC(size) palloc(size)
+#define FREE(ptr) pfree(ptr)
+#else
+#define ALLOC(size) malloc(size)
+#define FREE(ptr) free(ptr)
+#endif
+
+/* Set of error states */
+typedef enum pg_hmac_errno
+{
+ PG_HMAC_ERROR_NONE = 0,
+ PG_HMAC_ERROR_OOM,
+ PG_HMAC_ERROR_INTERNAL
+} pg_hmac_errno;
+
+/* Internal pg_hmac_ctx structure */
+struct pg_hmac_ctx
+{
+ pg_cryptohash_ctx *hash;
+ pg_cryptohash_type type;
+ pg_hmac_errno error;
+ const char *errreason;
+ int block_size;
+ int digest_size;
+
+ /*
+ * Use the largest block size among supported options. This wastes some
+ * memory but simplifies the allocation logic.
+ */
+ uint8 k_ipad[PG_SHA512_BLOCK_LENGTH];
+ uint8 k_opad[PG_SHA512_BLOCK_LENGTH];
+};
+
+#define HMAC_IPAD 0x36
+#define HMAC_OPAD 0x5C
+
+/*
+ * pg_hmac_create
+ *
+ * Allocate a hash context. Returns NULL on failure for an OOM. The
+ * backend issues an error, without returning.
+ */
+pg_hmac_ctx *
+pg_hmac_create(pg_cryptohash_type type)
+{
+ pg_hmac_ctx *ctx;
+
+ ctx = ALLOC(sizeof(pg_hmac_ctx));
+ if (ctx == NULL)
+ return NULL;
+ memset(ctx, 0, sizeof(pg_hmac_ctx));
+ ctx->type = type;
+ ctx->error = PG_HMAC_ERROR_NONE;
+ ctx->errreason = NULL;
+
+ /*
+ * Initialize the context data. This requires to know the digest and
+ * block lengths, that depend on the type of hash used.
+ */
+ switch (type)
+ {
+ case PG_MD5:
+ ctx->digest_size = MD5_DIGEST_LENGTH;
+ ctx->block_size = MD5_BLOCK_SIZE;
+ break;
+ case PG_SHA1:
+ ctx->digest_size = SHA1_DIGEST_LENGTH;
+ ctx->block_size = SHA1_BLOCK_SIZE;
+ break;
+ case PG_SHA224:
+ ctx->digest_size = PG_SHA224_DIGEST_LENGTH;
+ ctx->block_size = PG_SHA224_BLOCK_LENGTH;
+ break;
+ case PG_SHA256:
+ ctx->digest_size = PG_SHA256_DIGEST_LENGTH;
+ ctx->block_size = PG_SHA256_BLOCK_LENGTH;
+ break;
+ case PG_SHA384:
+ ctx->digest_size = PG_SHA384_DIGEST_LENGTH;
+ ctx->block_size = PG_SHA384_BLOCK_LENGTH;
+ break;
+ case PG_SHA512:
+ ctx->digest_size = PG_SHA512_DIGEST_LENGTH;
+ ctx->block_size = PG_SHA512_BLOCK_LENGTH;
+ break;
+ }
+
+ ctx->hash = pg_cryptohash_create(type);
+ if (ctx->hash == NULL)
+ {
+ explicit_bzero(ctx, sizeof(pg_hmac_ctx));
+ FREE(ctx);
+ return NULL;
+ }
+
+ return ctx;
+}
+
+/*
+ * pg_hmac_init
+ *
+ * Initialize a HMAC context. Returns 0 on success, -1 on failure.
+ */
+int
+pg_hmac_init(pg_hmac_ctx *ctx, const uint8 *key, size_t len)
+{
+ int i;
+ int digest_size;
+ int block_size;
+ uint8 *shrinkbuf = NULL;
+
+ if (ctx == NULL)
+ return -1;
+
+ digest_size = ctx->digest_size;
+ block_size = ctx->block_size;
+
+ memset(ctx->k_opad, HMAC_OPAD, ctx->block_size);
+ memset(ctx->k_ipad, HMAC_IPAD, ctx->block_size);
+
+ /*
+ * If the key is longer than the block size, pass it through the hash once
+ * to shrink it down.
+ */
+ if (len > block_size)
+ {
+ pg_cryptohash_ctx *hash_ctx;
+
+ /* temporary buffer for one-time shrink */
+ shrinkbuf = ALLOC(digest_size);
+ if (shrinkbuf == NULL)
+ {
+ ctx->error = PG_HMAC_ERROR_OOM;
+ return -1;
+ }
+ memset(shrinkbuf, 0, digest_size);
+
+ hash_ctx = pg_cryptohash_create(ctx->type);
+ if (hash_ctx == NULL)
+ {
+ ctx->error = PG_HMAC_ERROR_OOM;
+ FREE(shrinkbuf);
+ return -1;
+ }
+
+ if (pg_cryptohash_init(hash_ctx) < 0 ||
+ pg_cryptohash_update(hash_ctx, key, len) < 0 ||
+ pg_cryptohash_final(hash_ctx, shrinkbuf, digest_size) < 0)
+ {
+ ctx->error = PG_HMAC_ERROR_INTERNAL;
+ ctx->errreason = pg_cryptohash_error(hash_ctx);
+ pg_cryptohash_free(hash_ctx);
+ FREE(shrinkbuf);
+ return -1;
+ }
+
+ key = shrinkbuf;
+ len = digest_size;
+ pg_cryptohash_free(hash_ctx);
+ }
+
+ for (i = 0; i < len; i++)
+ {
+ ctx->k_ipad[i] ^= key[i];
+ ctx->k_opad[i] ^= key[i];
+ }
+
+ /* tmp = H(K XOR ipad, text) */
+ if (pg_cryptohash_init(ctx->hash) < 0 ||
+ pg_cryptohash_update(ctx->hash, ctx->k_ipad, ctx->block_size) < 0)
+ {
+ ctx->error = PG_HMAC_ERROR_INTERNAL;
+ ctx->errreason = pg_cryptohash_error(ctx->hash);
+ if (shrinkbuf)
+ FREE(shrinkbuf);
+ return -1;
+ }
+
+ if (shrinkbuf)
+ FREE(shrinkbuf);
+ return 0;
+}
+
+/*
+ * pg_hmac_update
+ *
+ * Update a HMAC context. Returns 0 on success, -1 on failure.
+ */
+int
+pg_hmac_update(pg_hmac_ctx *ctx, const uint8 *data, size_t len)
+{
+ if (ctx == NULL)
+ return -1;
+
+ if (pg_cryptohash_update(ctx->hash, data, len) < 0)
+ {
+ ctx->error = PG_HMAC_ERROR_INTERNAL;
+ ctx->errreason = pg_cryptohash_error(ctx->hash);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * pg_hmac_final
+ *
+ * Finalize a HMAC context. Returns 0 on success, -1 on failure.
+ */
+int
+pg_hmac_final(pg_hmac_ctx *ctx, uint8 *dest, size_t len)
+{
+ uint8 *h;
+
+ if (ctx == NULL)
+ return -1;
+
+ h = ALLOC(ctx->digest_size);
+ if (h == NULL)
+ {
+ ctx->error = PG_HMAC_ERROR_OOM;
+ return -1;
+ }
+ memset(h, 0, ctx->digest_size);
+
+ if (pg_cryptohash_final(ctx->hash, h, ctx->digest_size) < 0)
+ {
+ ctx->error = PG_HMAC_ERROR_INTERNAL;
+ ctx->errreason = pg_cryptohash_error(ctx->hash);
+ FREE(h);
+ return -1;
+ }
+
+ /* H(K XOR opad, tmp) */
+ if (pg_cryptohash_init(ctx->hash) < 0 ||
+ pg_cryptohash_update(ctx->hash, ctx->k_opad, ctx->block_size) < 0 ||
+ pg_cryptohash_update(ctx->hash, h, ctx->digest_size) < 0 ||
+ pg_cryptohash_final(ctx->hash, dest, len) < 0)
+ {
+ ctx->error = PG_HMAC_ERROR_INTERNAL;
+ ctx->errreason = pg_cryptohash_error(ctx->hash);
+ FREE(h);
+ return -1;
+ }
+
+ FREE(h);
+ return 0;
+}
+
+/*
+ * pg_hmac_free
+ *
+ * Free a HMAC context.
+ */
+void
+pg_hmac_free(pg_hmac_ctx *ctx)
+{
+ if (ctx == NULL)
+ return;
+
+ pg_cryptohash_free(ctx->hash);
+ explicit_bzero(ctx, sizeof(pg_hmac_ctx));
+ FREE(ctx);
+}
+
+/*
+ * pg_hmac_error
+ *
+ * Returns a static string providing details about an error that happened
+ * during a HMAC computation.
+ */
+const char *
+pg_hmac_error(pg_hmac_ctx *ctx)
+{
+ if (ctx == NULL)
+ return _("out of memory");
+
+ /*
+ * If a reason is provided, rely on it, else fallback to any error code
+ * set.
+ */
+ if (ctx->errreason)
+ return ctx->errreason;
+
+ switch (ctx->error)
+ {
+ case PG_HMAC_ERROR_NONE:
+ return _("success");
+ case PG_HMAC_ERROR_INTERNAL:
+ return _("internal error");
+ case PG_HMAC_ERROR_OOM:
+ return _("out of memory");
+ }
+
+ Assert(false); /* cannot be reached */
+ return _("success");
+}
diff --git a/src/common/hmac_openssl.c b/src/common/hmac_openssl.c
new file mode 100644
index 0000000..12be542
--- /dev/null
+++ b/src/common/hmac_openssl.c
@@ -0,0 +1,348 @@
+/*-------------------------------------------------------------------------
+ *
+ * hmac_openssl.c
+ * Implementation of HMAC with OpenSSL.
+ *
+ * This should only be used if code is compiled with OpenSSL support.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/hmac_openssl.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+
+#include <openssl/err.h>
+#include <openssl/hmac.h>
+
+#include "common/hmac.h"
+#include "common/md5.h"
+#include "common/sha1.h"
+#include "common/sha2.h"
+#ifndef FRONTEND
+#include "utils/memutils.h"
+#include "utils/resowner.h"
+#include "utils/resowner_private.h"
+#endif
+
+/*
+ * In backend, use an allocation in TopMemoryContext to count for resowner
+ * cleanup handling if necessary. For versions of OpenSSL where HMAC_CTX is
+ * known, just use palloc(). In frontend, use malloc to be able to return
+ * a failure status back to the caller.
+ */
+#ifndef FRONTEND
+#ifdef HAVE_HMAC_CTX_NEW
+#define ALLOC(size) MemoryContextAlloc(TopMemoryContext, size)
+#else
+#define ALLOC(size) palloc(size)
+#endif
+#define FREE(ptr) pfree(ptr)
+#else /* FRONTEND */
+#define ALLOC(size) malloc(size)
+#define FREE(ptr) free(ptr)
+#endif /* FRONTEND */
+
+/* Set of error states */
+typedef enum pg_hmac_errno
+{
+ PG_HMAC_ERROR_NONE = 0,
+ PG_HMAC_ERROR_DEST_LEN,
+ PG_HMAC_ERROR_OPENSSL
+} pg_hmac_errno;
+
+/* Internal pg_hmac_ctx structure */
+struct pg_hmac_ctx
+{
+ HMAC_CTX *hmacctx;
+ pg_cryptohash_type type;
+ pg_hmac_errno error;
+ const char *errreason;
+
+#ifndef FRONTEND
+ ResourceOwner resowner;
+#endif
+};
+
+static const char *
+SSLerrmessage(unsigned long ecode)
+{
+ if (ecode == 0)
+ return NULL;
+
+ /*
+ * This may return NULL, but we would fall back to a default error path if
+ * that were the case.
+ */
+ return ERR_reason_error_string(ecode);
+}
+
+/*
+ * pg_hmac_create
+ *
+ * Allocate a hash context. Returns NULL on failure for an OOM. The
+ * backend issues an error, without returning.
+ */
+pg_hmac_ctx *
+pg_hmac_create(pg_cryptohash_type type)
+{
+ pg_hmac_ctx *ctx;
+
+ ctx = ALLOC(sizeof(pg_hmac_ctx));
+ if (ctx == NULL)
+ return NULL;
+ memset(ctx, 0, sizeof(pg_hmac_ctx));
+
+ ctx->type = type;
+ ctx->error = PG_HMAC_ERROR_NONE;
+ ctx->errreason = NULL;
+
+
+ /*
+ * Initialization takes care of assigning the correct type for OpenSSL.
+ * Also ensure that there aren't any unconsumed errors in the queue from
+ * previous runs.
+ */
+ ERR_clear_error();
+#ifdef HAVE_HMAC_CTX_NEW
+#ifndef FRONTEND
+ ResourceOwnerEnlargeHMAC(CurrentResourceOwner);
+#endif
+ ctx->hmacctx = HMAC_CTX_new();
+#else
+ ctx->hmacctx = ALLOC(sizeof(HMAC_CTX));
+#endif
+
+ if (ctx->hmacctx == NULL)
+ {
+ explicit_bzero(ctx, sizeof(pg_hmac_ctx));
+ FREE(ctx);
+#ifndef FRONTEND
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+#endif
+ return NULL;
+ }
+
+#ifdef HAVE_HMAC_CTX_NEW
+#ifndef FRONTEND
+ ctx->resowner = CurrentResourceOwner;
+ ResourceOwnerRememberHMAC(CurrentResourceOwner, PointerGetDatum(ctx));
+#endif
+#else
+ memset(ctx->hmacctx, 0, sizeof(HMAC_CTX));
+#endif /* HAVE_HMAC_CTX_NEW */
+
+ return ctx;
+}
+
+/*
+ * pg_hmac_init
+ *
+ * Initialize a HMAC context. Returns 0 on success, -1 on failure.
+ */
+int
+pg_hmac_init(pg_hmac_ctx *ctx, const uint8 *key, size_t len)
+{
+ int status = 0;
+
+ if (ctx == NULL)
+ return -1;
+
+ switch (ctx->type)
+ {
+ case PG_MD5:
+ status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_md5(), NULL);
+ break;
+ case PG_SHA1:
+ status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_sha1(), NULL);
+ break;
+ case PG_SHA224:
+ status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_sha224(), NULL);
+ break;
+ case PG_SHA256:
+ status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_sha256(), NULL);
+ break;
+ case PG_SHA384:
+ status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_sha384(), NULL);
+ break;
+ case PG_SHA512:
+ status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_sha512(), NULL);
+ break;
+ }
+
+ /* OpenSSL internals return 1 on success, 0 on failure */
+ if (status <= 0)
+ {
+ ctx->errreason = SSLerrmessage(ERR_get_error());
+ ctx->error = PG_HMAC_ERROR_OPENSSL;
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * pg_hmac_update
+ *
+ * Update a HMAC context. Returns 0 on success, -1 on failure.
+ */
+int
+pg_hmac_update(pg_hmac_ctx *ctx, const uint8 *data, size_t len)
+{
+ int status = 0;
+
+ if (ctx == NULL)
+ return -1;
+
+ status = HMAC_Update(ctx->hmacctx, data, len);
+
+ /* OpenSSL internals return 1 on success, 0 on failure */
+ if (status <= 0)
+ {
+ ctx->errreason = SSLerrmessage(ERR_get_error());
+ ctx->error = PG_HMAC_ERROR_OPENSSL;
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * pg_hmac_final
+ *
+ * Finalize a HMAC context. Returns 0 on success, -1 on failure.
+ */
+int
+pg_hmac_final(pg_hmac_ctx *ctx, uint8 *dest, size_t len)
+{
+ int status = 0;
+ uint32 outlen;
+
+ if (ctx == NULL)
+ return -1;
+
+ switch (ctx->type)
+ {
+ case PG_MD5:
+ if (len < MD5_DIGEST_LENGTH)
+ {
+ ctx->error = PG_HMAC_ERROR_DEST_LEN;
+ return -1;
+ }
+ break;
+ case PG_SHA1:
+ if (len < SHA1_DIGEST_LENGTH)
+ {
+ ctx->error = PG_HMAC_ERROR_DEST_LEN;
+ return -1;
+ }
+ break;
+ case PG_SHA224:
+ if (len < PG_SHA224_DIGEST_LENGTH)
+ {
+ ctx->error = PG_HMAC_ERROR_DEST_LEN;
+ return -1;
+ }
+ break;
+ case PG_SHA256:
+ if (len < PG_SHA256_DIGEST_LENGTH)
+ {
+ ctx->error = PG_HMAC_ERROR_DEST_LEN;
+ return -1;
+ }
+ break;
+ case PG_SHA384:
+ if (len < PG_SHA384_DIGEST_LENGTH)
+ {
+ ctx->error = PG_HMAC_ERROR_DEST_LEN;
+ return -1;
+ }
+ break;
+ case PG_SHA512:
+ if (len < PG_SHA512_DIGEST_LENGTH)
+ {
+ ctx->error = PG_HMAC_ERROR_DEST_LEN;
+ return -1;
+ }
+ break;
+ }
+
+ status = HMAC_Final(ctx->hmacctx, dest, &outlen);
+
+ /* OpenSSL internals return 1 on success, 0 on failure */
+ if (status <= 0)
+ {
+ ctx->errreason = SSLerrmessage(ERR_get_error());
+ ctx->error = PG_HMAC_ERROR_OPENSSL;
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * pg_hmac_free
+ *
+ * Free a HMAC context.
+ */
+void
+pg_hmac_free(pg_hmac_ctx *ctx)
+{
+ if (ctx == NULL)
+ return;
+
+#ifdef HAVE_HMAC_CTX_FREE
+ HMAC_CTX_free(ctx->hmacctx);
+#ifndef FRONTEND
+ ResourceOwnerForgetHMAC(ctx->resowner, PointerGetDatum(ctx));
+#endif
+#else
+ explicit_bzero(ctx->hmacctx, sizeof(HMAC_CTX));
+ FREE(ctx->hmacctx);
+#endif
+
+ explicit_bzero(ctx, sizeof(pg_hmac_ctx));
+ FREE(ctx);
+}
+
+/*
+ * pg_hmac_error
+ *
+ * Returns a static string providing details about an error that happened
+ * during a HMAC computation.
+ */
+const char *
+pg_hmac_error(pg_hmac_ctx *ctx)
+{
+ if (ctx == NULL)
+ return _("out of memory");
+
+ /*
+ * If a reason is provided, rely on it, else fallback to any error code
+ * set.
+ */
+ if (ctx->errreason)
+ return ctx->errreason;
+
+ switch (ctx->error)
+ {
+ case PG_HMAC_ERROR_NONE:
+ return _("success");
+ case PG_HMAC_ERROR_DEST_LEN:
+ return _("destination buffer too small");
+ case PG_HMAC_ERROR_OPENSSL:
+ return _("OpenSSL failure");
+ }
+
+ Assert(false); /* cannot be reached */
+ return _("success");
+}
diff --git a/src/common/ip.c b/src/common/ip.c
new file mode 100644
index 0000000..9baad3a
--- /dev/null
+++ b/src/common/ip.c
@@ -0,0 +1,262 @@
+/*-------------------------------------------------------------------------
+ *
+ * ip.c
+ * IPv6-aware network access.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/ip.c
+ *
+ * This file and the IPV6 implementation were initially provided by
+ * Nigel Kukard <nkukard@lbsd.net>, Linux Based Systems Design
+ * http://www.lbsd.net.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <sys/file.h>
+
+#include "common/ip.h"
+
+
+
+static int getaddrinfo_unix(const char *path,
+ const struct addrinfo *hintsp,
+ struct addrinfo **result);
+
+static int getnameinfo_unix(const struct sockaddr_un *sa, int salen,
+ char *node, int nodelen,
+ char *service, int servicelen,
+ int flags);
+
+
+/*
+ * pg_getaddrinfo_all - get address info for Unix, IPv4 and IPv6 sockets
+ */
+int
+pg_getaddrinfo_all(const char *hostname, const char *servname,
+ const struct addrinfo *hintp, struct addrinfo **result)
+{
+ int rc;
+
+ /* not all versions of getaddrinfo() zero *result on failure */
+ *result = NULL;
+
+ if (hintp->ai_family == AF_UNIX)
+ return getaddrinfo_unix(servname, hintp, result);
+
+ /* NULL has special meaning to getaddrinfo(). */
+ rc = getaddrinfo((!hostname || hostname[0] == '\0') ? NULL : hostname,
+ servname, hintp, result);
+
+ return rc;
+}
+
+
+/*
+ * pg_freeaddrinfo_all - free addrinfo structures for IPv4, IPv6, or Unix
+ *
+ * Note: the ai_family field of the original hint structure must be passed
+ * so that we can tell whether the addrinfo struct was built by the system's
+ * getaddrinfo() routine or our own getaddrinfo_unix() routine. Some versions
+ * of getaddrinfo() might be willing to return AF_UNIX addresses, so it's
+ * not safe to look at ai_family in the addrinfo itself.
+ */
+void
+pg_freeaddrinfo_all(int hint_ai_family, struct addrinfo *ai)
+{
+ if (hint_ai_family == AF_UNIX)
+ {
+ /* struct was built by getaddrinfo_unix (see pg_getaddrinfo_all) */
+ while (ai != NULL)
+ {
+ struct addrinfo *p = ai;
+
+ ai = ai->ai_next;
+ free(p->ai_addr);
+ free(p);
+ }
+ }
+ else
+ {
+ /* struct was built by getaddrinfo() */
+ if (ai != NULL)
+ freeaddrinfo(ai);
+ }
+}
+
+
+/*
+ * pg_getnameinfo_all - get name info for Unix, IPv4 and IPv6 sockets
+ *
+ * The API of this routine differs from the standard getnameinfo() definition
+ * in two ways: first, the addr parameter is declared as sockaddr_storage
+ * rather than struct sockaddr, and second, the node and service fields are
+ * guaranteed to be filled with something even on failure return.
+ */
+int
+pg_getnameinfo_all(const struct sockaddr_storage *addr, int salen,
+ char *node, int nodelen,
+ char *service, int servicelen,
+ int flags)
+{
+ int rc;
+
+ if (addr && addr->ss_family == AF_UNIX)
+ rc = getnameinfo_unix((const struct sockaddr_un *) addr, salen,
+ node, nodelen,
+ service, servicelen,
+ flags);
+ else
+ rc = getnameinfo((const struct sockaddr *) addr, salen,
+ node, nodelen,
+ service, servicelen,
+ flags);
+
+ if (rc != 0)
+ {
+ if (node)
+ strlcpy(node, "???", nodelen);
+ if (service)
+ strlcpy(service, "???", servicelen);
+ }
+
+ return rc;
+}
+
+
+/* -------
+ * getaddrinfo_unix - get unix socket info using IPv6-compatible API
+ *
+ * Bugs: only one addrinfo is set even though hintsp is NULL or
+ * ai_socktype is 0
+ * AI_CANONNAME is not supported.
+ * -------
+ */
+static int
+getaddrinfo_unix(const char *path, const struct addrinfo *hintsp,
+ struct addrinfo **result)
+{
+ struct addrinfo hints = {0};
+ struct addrinfo *aip;
+ struct sockaddr_un *unp;
+
+ *result = NULL;
+
+ if (strlen(path) >= sizeof(unp->sun_path))
+ return EAI_FAIL;
+
+ if (hintsp == NULL)
+ {
+ hints.ai_family = AF_UNIX;
+ hints.ai_socktype = SOCK_STREAM;
+ }
+ else
+ memcpy(&hints, hintsp, sizeof(hints));
+
+ if (hints.ai_socktype == 0)
+ hints.ai_socktype = SOCK_STREAM;
+
+ if (hints.ai_family != AF_UNIX)
+ {
+ /* shouldn't have been called */
+ return EAI_FAIL;
+ }
+
+ aip = calloc(1, sizeof(struct addrinfo));
+ if (aip == NULL)
+ return EAI_MEMORY;
+
+ unp = calloc(1, sizeof(struct sockaddr_un));
+ if (unp == NULL)
+ {
+ free(aip);
+ return EAI_MEMORY;
+ }
+
+ aip->ai_family = AF_UNIX;
+ aip->ai_socktype = hints.ai_socktype;
+ aip->ai_protocol = hints.ai_protocol;
+ aip->ai_next = NULL;
+ aip->ai_canonname = NULL;
+ *result = aip;
+
+ unp->sun_family = AF_UNIX;
+ aip->ai_addr = (struct sockaddr *) unp;
+ aip->ai_addrlen = sizeof(struct sockaddr_un);
+
+ strcpy(unp->sun_path, path);
+
+ /*
+ * If the supplied path starts with @, replace that with a zero byte for
+ * the internal representation. In that mode, the entire sun_path is the
+ * address, including trailing zero bytes. But we set the address length
+ * to only include the length of the original string. That way the
+ * trailing zero bytes won't show up in any network or socket lists of the
+ * operating system. This is just a convention, also followed by other
+ * packages.
+ */
+ if (path[0] == '@')
+ {
+ unp->sun_path[0] = '\0';
+ aip->ai_addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(path);
+ }
+
+ return 0;
+}
+
+/*
+ * Convert an address to a hostname.
+ */
+static int
+getnameinfo_unix(const struct sockaddr_un *sa, int salen,
+ char *node, int nodelen,
+ char *service, int servicelen,
+ int flags)
+{
+ int ret;
+
+ /* Invalid arguments. */
+ if (sa == NULL || sa->sun_family != AF_UNIX ||
+ (node == NULL && service == NULL))
+ return EAI_FAIL;
+
+ if (node)
+ {
+ ret = snprintf(node, nodelen, "%s", "[local]");
+ if (ret < 0 || ret >= nodelen)
+ return EAI_MEMORY;
+ }
+
+ if (service)
+ {
+ /*
+ * Check whether it looks like an abstract socket, but it could also
+ * just be an empty string.
+ */
+ if (sa->sun_path[0] == '\0' && sa->sun_path[1] != '\0')
+ ret = snprintf(service, servicelen, "@%s", sa->sun_path + 1);
+ else
+ ret = snprintf(service, servicelen, "%s", sa->sun_path);
+ if (ret < 0 || ret >= servicelen)
+ return EAI_MEMORY;
+ }
+
+ return 0;
+}
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
new file mode 100644
index 0000000..2e86589
--- /dev/null
+++ b/src/common/jsonapi.c
@@ -0,0 +1,1206 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonapi.c
+ * JSON parser and lexer interfaces
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/jsonapi.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/jsonapi.h"
+#include "mb/pg_wchar.h"
+#include "port/pg_lfind.h"
+
+#ifndef FRONTEND
+#include "miscadmin.h"
+#endif
+
+/*
+ * The context of the parser is maintained by the recursive descent
+ * mechanism, but is passed explicitly to the error reporting routine
+ * for better diagnostics.
+ */
+typedef enum /* contexts of JSON parser */
+{
+ JSON_PARSE_VALUE, /* expecting a value */
+ JSON_PARSE_STRING, /* expecting a string (for a field name) */
+ JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */
+ JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */
+ JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */
+ JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */
+ JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */
+ JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */
+ JSON_PARSE_END /* saw the end of a document, expect nothing */
+} JsonParseContext;
+
+static inline JsonParseErrorType json_lex_string(JsonLexContext *lex);
+static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, char *s,
+ bool *num_err, int *total_len);
+static inline JsonParseErrorType parse_scalar(JsonLexContext *lex, JsonSemAction *sem);
+static JsonParseErrorType parse_object_field(JsonLexContext *lex, JsonSemAction *sem);
+static JsonParseErrorType parse_object(JsonLexContext *lex, JsonSemAction *sem);
+static JsonParseErrorType parse_array_element(JsonLexContext *lex, JsonSemAction *sem);
+static JsonParseErrorType parse_array(JsonLexContext *lex, JsonSemAction *sem);
+static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
+
+/* the null action object used for pure validation */
+JsonSemAction nullSemAction =
+{
+ NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL
+};
+
+/* Recursive Descent parser support routines */
+
+/*
+ * lex_peek
+ *
+ * what is the current look_ahead token?
+*/
+static inline JsonTokenType
+lex_peek(JsonLexContext *lex)
+{
+ return lex->token_type;
+}
+
+/*
+ * lex_expect
+ *
+ * move the lexer to the next token if the current look_ahead token matches
+ * the parameter token. Otherwise, report an error.
+ */
+static inline JsonParseErrorType
+lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
+{
+ if (lex_peek(lex) == token)
+ return json_lex(lex);
+ else
+ return report_parse_error(ctx, lex);
+}
+
+/* chars to consider as part of an alphanumeric token */
+#define JSON_ALPHANUMERIC_CHAR(c) \
+ (((c) >= 'a' && (c) <= 'z') || \
+ ((c) >= 'A' && (c) <= 'Z') || \
+ ((c) >= '0' && (c) <= '9') || \
+ (c) == '_' || \
+ IS_HIGHBIT_SET(c))
+
+/*
+ * Utility function to check if a string is a valid JSON number.
+ *
+ * str is of length len, and need not be null-terminated.
+ */
+bool
+IsValidJsonNumber(const char *str, int len)
+{
+ bool numeric_error;
+ int total_len;
+ JsonLexContext dummy_lex;
+
+ if (len <= 0)
+ return false;
+
+ /*
+ * json_lex_number expects a leading '-' to have been eaten already.
+ *
+ * having to cast away the constness of str is ugly, but there's not much
+ * easy alternative.
+ */
+ if (*str == '-')
+ {
+ dummy_lex.input = unconstify(char *, str) + 1;
+ dummy_lex.input_length = len - 1;
+ }
+ else
+ {
+ dummy_lex.input = unconstify(char *, str);
+ dummy_lex.input_length = len;
+ }
+
+ json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
+
+ return (!numeric_error) && (total_len == dummy_lex.input_length);
+}
+
+/*
+ * makeJsonLexContextCstringLen
+ *
+ * lex constructor, with or without StringInfo object for de-escaped lexemes.
+ *
+ * Without is better as it makes the processing faster, so only make one
+ * if really required.
+ */
+JsonLexContext *
+makeJsonLexContextCstringLen(char *json, int len, int encoding, bool need_escapes)
+{
+ JsonLexContext *lex = palloc0(sizeof(JsonLexContext));
+
+ lex->input = lex->token_terminator = lex->line_start = json;
+ lex->line_number = 1;
+ lex->input_length = len;
+ lex->input_encoding = encoding;
+ if (need_escapes)
+ lex->strval = makeStringInfo();
+ return lex;
+}
+
+/*
+ * pg_parse_json
+ *
+ * Publicly visible entry point for the JSON parser.
+ *
+ * lex is a lexing context, set up for the json to be processed by calling
+ * makeJsonLexContext(). sem is a structure of function pointers to semantic
+ * action routines to be called at appropriate spots during parsing, and a
+ * pointer to a state object to be passed to those routines.
+ */
+JsonParseErrorType
+pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
+{
+ JsonTokenType tok;
+ JsonParseErrorType result;
+
+ /* get the initial token */
+ result = json_lex(lex);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ tok = lex_peek(lex);
+
+ /* parse by recursive descent */
+ switch (tok)
+ {
+ case JSON_TOKEN_OBJECT_START:
+ result = parse_object(lex, sem);
+ break;
+ case JSON_TOKEN_ARRAY_START:
+ result = parse_array(lex, sem);
+ break;
+ default:
+ result = parse_scalar(lex, sem); /* json can be a bare scalar */
+ }
+
+ if (result == JSON_SUCCESS)
+ result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
+
+ return result;
+}
+
+/*
+ * json_count_array_elements
+ *
+ * Returns number of array elements in lex context at start of array token
+ * until end of array token at same nesting level.
+ *
+ * Designed to be called from array_start routines.
+ */
+JsonParseErrorType
+json_count_array_elements(JsonLexContext *lex, int *elements)
+{
+ JsonLexContext copylex;
+ int count;
+ JsonParseErrorType result;
+
+ /*
+ * It's safe to do this with a shallow copy because the lexical routines
+ * don't scribble on the input. They do scribble on the other pointers
+ * etc, so doing this with a copy makes that safe.
+ */
+ memcpy(&copylex, lex, sizeof(JsonLexContext));
+ copylex.strval = NULL; /* not interested in values here */
+ copylex.lex_level++;
+
+ count = 0;
+ result = lex_expect(JSON_PARSE_ARRAY_START, &copylex,
+ JSON_TOKEN_ARRAY_START);
+ if (result != JSON_SUCCESS)
+ return result;
+ if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
+ {
+ while (1)
+ {
+ count++;
+ result = parse_array_element(&copylex, &nullSemAction);
+ if (result != JSON_SUCCESS)
+ return result;
+ if (copylex.token_type != JSON_TOKEN_COMMA)
+ break;
+ result = json_lex(&copylex);
+ if (result != JSON_SUCCESS)
+ return result;
+ }
+ }
+ result = lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex,
+ JSON_TOKEN_ARRAY_END);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ *elements = count;
+ return JSON_SUCCESS;
+}
+
+/*
+ * Recursive Descent parse routines. There is one for each structural
+ * element in a json document:
+ * - scalar (string, number, true, false, null)
+ * - array ( [ ] )
+ * - array element
+ * - object ( { } )
+ * - object field
+ */
+static inline JsonParseErrorType
+parse_scalar(JsonLexContext *lex, JsonSemAction *sem)
+{
+ char *val = NULL;
+ json_scalar_action sfunc = sem->scalar;
+ JsonTokenType tok = lex_peek(lex);
+ JsonParseErrorType result;
+
+ /* a scalar must be a string, a number, true, false, or null */
+ if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
+ tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
+ tok != JSON_TOKEN_NULL)
+ return report_parse_error(JSON_PARSE_VALUE, lex);
+
+ /* if no semantic function, just consume the token */
+ if (sfunc == NULL)
+ return json_lex(lex);
+
+ /* extract the de-escaped string value, or the raw lexeme */
+ if (lex_peek(lex) == JSON_TOKEN_STRING)
+ {
+ if (lex->strval != NULL)
+ val = pstrdup(lex->strval->data);
+ }
+ else
+ {
+ int len = (lex->token_terminator - lex->token_start);
+
+ val = palloc(len + 1);
+ memcpy(val, lex->token_start, len);
+ val[len] = '\0';
+ }
+
+ /* consume the token */
+ result = json_lex(lex);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ /* invoke the callback */
+ result = (*sfunc) (sem->semstate, val, tok);
+
+ return result;
+}
+
+static JsonParseErrorType
+parse_object_field(JsonLexContext *lex, JsonSemAction *sem)
+{
+ /*
+ * An object field is "fieldname" : value where value can be a scalar,
+ * object or array. Note: in user-facing docs and error messages, we
+ * generally call a field name a "key".
+ */
+
+ char *fname = NULL; /* keep compiler quiet */
+ json_ofield_action ostart = sem->object_field_start;
+ json_ofield_action oend = sem->object_field_end;
+ bool isnull;
+ JsonTokenType tok;
+ JsonParseErrorType result;
+
+ if (lex_peek(lex) != JSON_TOKEN_STRING)
+ return report_parse_error(JSON_PARSE_STRING, lex);
+ if ((ostart != NULL || oend != NULL) && lex->strval != NULL)
+ fname = pstrdup(lex->strval->data);
+ result = json_lex(lex);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ tok = lex_peek(lex);
+ isnull = tok == JSON_TOKEN_NULL;
+
+ if (ostart != NULL)
+ {
+ result = (*ostart) (sem->semstate, fname, isnull);
+ if (result != JSON_SUCCESS)
+ return result;
+ }
+
+ switch (tok)
+ {
+ case JSON_TOKEN_OBJECT_START:
+ result = parse_object(lex, sem);
+ break;
+ case JSON_TOKEN_ARRAY_START:
+ result = parse_array(lex, sem);
+ break;
+ default:
+ result = parse_scalar(lex, sem);
+ }
+ if (result != JSON_SUCCESS)
+ return result;
+
+ if (oend != NULL)
+ {
+ result = (*oend) (sem->semstate, fname, isnull);
+ if (result != JSON_SUCCESS)
+ return result;
+ }
+
+ return JSON_SUCCESS;
+}
+
+static JsonParseErrorType
+parse_object(JsonLexContext *lex, JsonSemAction *sem)
+{
+ /*
+ * an object is a possibly empty sequence of object fields, separated by
+ * commas and surrounded by curly braces.
+ */
+ json_struct_action ostart = sem->object_start;
+ json_struct_action oend = sem->object_end;
+ JsonTokenType tok;
+ JsonParseErrorType result;
+
+#ifndef FRONTEND
+ check_stack_depth();
+#endif
+
+ if (ostart != NULL)
+ {
+ result = (*ostart) (sem->semstate);
+ if (result != JSON_SUCCESS)
+ return result;
+ }
+
+ /*
+ * Data inside an object is at a higher nesting level than the object
+ * itself. Note that we increment this after we call the semantic routine
+ * for the object start and restore it before we call the routine for the
+ * object end.
+ */
+ lex->lex_level++;
+
+ Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START);
+ result = json_lex(lex);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ tok = lex_peek(lex);
+ switch (tok)
+ {
+ case JSON_TOKEN_STRING:
+ result = parse_object_field(lex, sem);
+ while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
+ {
+ result = json_lex(lex);
+ if (result != JSON_SUCCESS)
+ break;
+ result = parse_object_field(lex, sem);
+ }
+ break;
+ case JSON_TOKEN_OBJECT_END:
+ break;
+ default:
+ /* case of an invalid initial token inside the object */
+ result = report_parse_error(JSON_PARSE_OBJECT_START, lex);
+ }
+ if (result != JSON_SUCCESS)
+ return result;
+
+ result = lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ lex->lex_level--;
+
+ if (oend != NULL)
+ {
+ result = (*oend) (sem->semstate);
+ if (result != JSON_SUCCESS)
+ return result;
+ }
+
+ return JSON_SUCCESS;
+}
+
+static JsonParseErrorType
+parse_array_element(JsonLexContext *lex, JsonSemAction *sem)
+{
+ json_aelem_action astart = sem->array_element_start;
+ json_aelem_action aend = sem->array_element_end;
+ JsonTokenType tok = lex_peek(lex);
+ JsonParseErrorType result;
+ bool isnull;
+
+ isnull = tok == JSON_TOKEN_NULL;
+
+ if (astart != NULL)
+ {
+ result = (*astart) (sem->semstate, isnull);
+ if (result != JSON_SUCCESS)
+ return result;
+ }
+
+ /* an array element is any object, array or scalar */
+ switch (tok)
+ {
+ case JSON_TOKEN_OBJECT_START:
+ result = parse_object(lex, sem);
+ break;
+ case JSON_TOKEN_ARRAY_START:
+ result = parse_array(lex, sem);
+ break;
+ default:
+ result = parse_scalar(lex, sem);
+ }
+
+ if (result != JSON_SUCCESS)
+ return result;
+
+ if (aend != NULL)
+ {
+ result = (*aend) (sem->semstate, isnull);
+ if (result != JSON_SUCCESS)
+ return result;
+ }
+
+ return JSON_SUCCESS;
+}
+
+static JsonParseErrorType
+parse_array(JsonLexContext *lex, JsonSemAction *sem)
+{
+ /*
+ * an array is a possibly empty sequence of array elements, separated by
+ * commas and surrounded by square brackets.
+ */
+ json_struct_action astart = sem->array_start;
+ json_struct_action aend = sem->array_end;
+ JsonParseErrorType result;
+
+#ifndef FRONTEND
+ check_stack_depth();
+#endif
+
+ if (astart != NULL)
+ {
+ result = (*astart) (sem->semstate);
+ if (result != JSON_SUCCESS)
+ return result;
+ }
+
+ /*
+ * Data inside an array is at a higher nesting level than the array
+ * itself. Note that we increment this after we call the semantic routine
+ * for the array start and restore it before we call the routine for the
+ * array end.
+ */
+ lex->lex_level++;
+
+ result = lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
+ if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
+ {
+ result = parse_array_element(lex, sem);
+
+ while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
+ {
+ result = json_lex(lex);
+ if (result != JSON_SUCCESS)
+ break;
+ result = parse_array_element(lex, sem);
+ }
+ }
+ if (result != JSON_SUCCESS)
+ return result;
+
+ result = lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
+ if (result != JSON_SUCCESS)
+ return result;
+
+ lex->lex_level--;
+
+ if (aend != NULL)
+ {
+ result = (*aend) (sem->semstate);
+ if (result != JSON_SUCCESS)
+ return result;
+ }
+
+ return JSON_SUCCESS;
+}
+
+/*
+ * Lex one token from the input stream.
+ */
+JsonParseErrorType
+json_lex(JsonLexContext *lex)
+{
+ char *s;
+ char *const end = lex->input + lex->input_length;
+ JsonParseErrorType result;
+
+ /* Skip leading whitespace. */
+ s = lex->token_terminator;
+ while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
+ {
+ if (*s++ == '\n')
+ {
+ ++lex->line_number;
+ lex->line_start = s;
+ }
+ }
+ lex->token_start = s;
+
+ /* Determine token type. */
+ if (s >= end)
+ {
+ lex->token_start = NULL;
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s;
+ lex->token_type = JSON_TOKEN_END;
+ }
+ else
+ {
+ switch (*s)
+ {
+ /* Single-character token, some kind of punctuation mark. */
+ case '{':
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ lex->token_type = JSON_TOKEN_OBJECT_START;
+ break;
+ case '}':
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ lex->token_type = JSON_TOKEN_OBJECT_END;
+ break;
+ case '[':
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ lex->token_type = JSON_TOKEN_ARRAY_START;
+ break;
+ case ']':
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ lex->token_type = JSON_TOKEN_ARRAY_END;
+ break;
+ case ',':
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ lex->token_type = JSON_TOKEN_COMMA;
+ break;
+ case ':':
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ lex->token_type = JSON_TOKEN_COLON;
+ break;
+ case '"':
+ /* string */
+ result = json_lex_string(lex);
+ if (result != JSON_SUCCESS)
+ return result;
+ lex->token_type = JSON_TOKEN_STRING;
+ break;
+ case '-':
+ /* Negative number. */
+ result = json_lex_number(lex, s + 1, NULL, NULL);
+ if (result != JSON_SUCCESS)
+ return result;
+ lex->token_type = JSON_TOKEN_NUMBER;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ /* Positive number. */
+ result = json_lex_number(lex, s, NULL, NULL);
+ if (result != JSON_SUCCESS)
+ return result;
+ lex->token_type = JSON_TOKEN_NUMBER;
+ break;
+ default:
+ {
+ char *p;
+
+ /*
+ * We're not dealing with a string, number, legal
+ * punctuation mark, or end of string. The only legal
+ * tokens we might find here are true, false, and null,
+ * but for error reporting purposes we scan until we see a
+ * non-alphanumeric character. That way, we can report
+ * the whole word as an unexpected token, rather than just
+ * some unintuitive prefix thereof.
+ */
+ for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++)
+ /* skip */ ;
+
+ /*
+ * We got some sort of unexpected punctuation or an
+ * otherwise unexpected character, so just complain about
+ * that one character.
+ */
+ if (p == s)
+ {
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ return JSON_INVALID_TOKEN;
+ }
+
+ /*
+ * We've got a real alphanumeric token here. If it
+ * happens to be true, false, or null, all is well. If
+ * not, error out.
+ */
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = p;
+ if (p - s == 4)
+ {
+ if (memcmp(s, "true", 4) == 0)
+ lex->token_type = JSON_TOKEN_TRUE;
+ else if (memcmp(s, "null", 4) == 0)
+ lex->token_type = JSON_TOKEN_NULL;
+ else
+ return JSON_INVALID_TOKEN;
+ }
+ else if (p - s == 5 && memcmp(s, "false", 5) == 0)
+ lex->token_type = JSON_TOKEN_FALSE;
+ else
+ return JSON_INVALID_TOKEN;
+ }
+ } /* end of switch */
+ }
+
+ return JSON_SUCCESS;
+}
+
+/*
+ * The next token in the input stream is known to be a string; lex it.
+ *
+ * If lex->strval isn't NULL, fill it with the decoded string.
+ * Set lex->token_terminator to the end of the decoded input, and in
+ * success cases, transfer its previous value to lex->prev_token_terminator.
+ * Return JSON_SUCCESS or an error code.
+ *
+ * Note: be careful that all error exits advance lex->token_terminator
+ * to the point after the character we detected the error on.
+ */
+static inline JsonParseErrorType
+json_lex_string(JsonLexContext *lex)
+{
+ char *s;
+ char *const end = lex->input + lex->input_length;
+ int hi_surrogate = -1;
+
+ /* Convenience macros for error exits */
+#define FAIL_AT_CHAR_START(code) \
+ do { \
+ lex->token_terminator = s; \
+ return code; \
+ } while (0)
+#define FAIL_AT_CHAR_END(code) \
+ do { \
+ lex->token_terminator = \
+ s + pg_encoding_mblen_bounded(lex->input_encoding, s); \
+ return code; \
+ } while (0)
+
+ if (lex->strval != NULL)
+ resetStringInfo(lex->strval);
+
+ Assert(lex->input_length > 0);
+ s = lex->token_start;
+ for (;;)
+ {
+ s++;
+ /* Premature end of the string. */
+ if (s >= end)
+ FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
+ else if (*s == '"')
+ break;
+ else if (*s == '\\')
+ {
+ /* OK, we have an escape character. */
+ s++;
+ if (s >= end)
+ FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
+ else if (*s == 'u')
+ {
+ int i;
+ int ch = 0;
+
+ for (i = 1; i <= 4; i++)
+ {
+ s++;
+ if (s >= end)
+ FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
+ else if (*s >= '0' && *s <= '9')
+ ch = (ch * 16) + (*s - '0');
+ else if (*s >= 'a' && *s <= 'f')
+ ch = (ch * 16) + (*s - 'a') + 10;
+ else if (*s >= 'A' && *s <= 'F')
+ ch = (ch * 16) + (*s - 'A') + 10;
+ else
+ FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT);
+ }
+ if (lex->strval != NULL)
+ {
+ /*
+ * Combine surrogate pairs.
+ */
+ if (is_utf16_surrogate_first(ch))
+ {
+ if (hi_surrogate != -1)
+ FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE);
+ hi_surrogate = ch;
+ continue;
+ }
+ else if (is_utf16_surrogate_second(ch))
+ {
+ if (hi_surrogate == -1)
+ FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
+ ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
+ hi_surrogate = -1;
+ }
+
+ if (hi_surrogate != -1)
+ FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
+
+ /*
+ * Reject invalid cases. We can't have a value above
+ * 0xFFFF here (since we only accepted 4 hex digits
+ * above), so no need to test for out-of-range chars.
+ */
+ if (ch == 0)
+ {
+ /* We can't allow this, since our TEXT type doesn't */
+ FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO);
+ }
+
+ /*
+ * Add the represented character to lex->strval. In the
+ * backend, we can let pg_unicode_to_server_noerror()
+ * handle any required character set conversion; in
+ * frontend, we can only deal with trivial conversions.
+ */
+#ifndef FRONTEND
+ {
+ char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
+
+ if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
+ FAIL_AT_CHAR_END(JSON_UNICODE_UNTRANSLATABLE);
+ appendStringInfoString(lex->strval, cbuf);
+ }
+#else
+ if (lex->input_encoding == PG_UTF8)
+ {
+ /* OK, we can map the code point to UTF8 easily */
+ char utf8str[5];
+ int utf8len;
+
+ unicode_to_utf8(ch, (unsigned char *) utf8str);
+ utf8len = pg_utf_mblen((unsigned char *) utf8str);
+ appendBinaryStringInfo(lex->strval, utf8str, utf8len);
+ }
+ else if (ch <= 0x007f)
+ {
+ /* The ASCII range is the same in all encodings */
+ appendStringInfoChar(lex->strval, (char) ch);
+ }
+ else
+ FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE);
+#endif /* FRONTEND */
+ }
+ }
+ else if (lex->strval != NULL)
+ {
+ if (hi_surrogate != -1)
+ FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
+
+ switch (*s)
+ {
+ case '"':
+ case '\\':
+ case '/':
+ appendStringInfoChar(lex->strval, *s);
+ break;
+ case 'b':
+ appendStringInfoChar(lex->strval, '\b');
+ break;
+ case 'f':
+ appendStringInfoChar(lex->strval, '\f');
+ break;
+ case 'n':
+ appendStringInfoChar(lex->strval, '\n');
+ break;
+ case 'r':
+ appendStringInfoChar(lex->strval, '\r');
+ break;
+ case 't':
+ appendStringInfoChar(lex->strval, '\t');
+ break;
+ default:
+
+ /*
+ * Not a valid string escape, so signal error. We
+ * adjust token_start so that just the escape sequence
+ * is reported, not the whole string.
+ */
+ lex->token_start = s;
+ FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
+ }
+ }
+ else if (strchr("\"\\/bfnrt", *s) == NULL)
+ {
+ /*
+ * Simpler processing if we're not bothered about de-escaping
+ *
+ * It's very tempting to remove the strchr() call here and
+ * replace it with a switch statement, but testing so far has
+ * shown it's not a performance win.
+ */
+ lex->token_start = s;
+ FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
+ }
+ }
+ else
+ {
+ char *p = s;
+
+ if (hi_surrogate != -1)
+ FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
+
+ /*
+ * Skip to the first byte that requires special handling, so we
+ * can batch calls to appendBinaryStringInfo.
+ */
+ while (p < end - sizeof(Vector8) &&
+ !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
+ !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
+ !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
+ p += sizeof(Vector8);
+
+ for (; p < end; p++)
+ {
+ if (*p == '\\' || *p == '"')
+ break;
+ else if ((unsigned char) *p <= 31)
+ {
+ /* Per RFC4627, these characters MUST be escaped. */
+ /*
+ * Since *p isn't printable, exclude it from the context
+ * string
+ */
+ lex->token_terminator = p;
+ return JSON_ESCAPING_REQUIRED;
+ }
+ }
+
+ if (lex->strval != NULL)
+ appendBinaryStringInfo(lex->strval, s, p - s);
+
+ /*
+ * s will be incremented at the top of the loop, so set it to just
+ * behind our lookahead position
+ */
+ s = p - 1;
+ }
+ }
+
+ if (hi_surrogate != -1)
+ {
+ lex->token_terminator = s + 1;
+ return JSON_UNICODE_LOW_SURROGATE;
+ }
+
+ /* Hooray, we found the end of the string! */
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s + 1;
+ return JSON_SUCCESS;
+
+#undef FAIL_AT_CHAR_START
+#undef FAIL_AT_CHAR_END
+}
+
+/*
+ * The next token in the input stream is known to be a number; lex it.
+ *
+ * In JSON, a number consists of four parts:
+ *
+ * (1) An optional minus sign ('-').
+ *
+ * (2) Either a single '0', or a string of one or more digits that does not
+ * begin with a '0'.
+ *
+ * (3) An optional decimal part, consisting of a period ('.') followed by
+ * one or more digits. (Note: While this part can be omitted
+ * completely, it's not OK to have only the decimal point without
+ * any digits afterwards.)
+ *
+ * (4) An optional exponent part, consisting of 'e' or 'E', optionally
+ * followed by '+' or '-', followed by one or more digits. (Note:
+ * As with the decimal part, if 'e' or 'E' is present, it must be
+ * followed by at least one digit.)
+ *
+ * The 's' argument to this function points to the ostensible beginning
+ * of part 2 - i.e. the character after any optional minus sign, or the
+ * first character of the string if there is none.
+ *
+ * If num_err is not NULL, we return an error flag to *num_err rather than
+ * raising an error for a badly-formed number. Also, if total_len is not NULL
+ * the distance from lex->input to the token end+1 is returned to *total_len.
+ */
+static inline JsonParseErrorType
+json_lex_number(JsonLexContext *lex, char *s,
+ bool *num_err, int *total_len)
+{
+ bool error = false;
+ int len = s - lex->input;
+
+ /* Part (1): leading sign indicator. */
+ /* Caller already did this for us; so do nothing. */
+
+ /* Part (2): parse main digit string. */
+ if (len < lex->input_length && *s == '0')
+ {
+ s++;
+ len++;
+ }
+ else if (len < lex->input_length && *s >= '1' && *s <= '9')
+ {
+ do
+ {
+ s++;
+ len++;
+ } while (len < lex->input_length && *s >= '0' && *s <= '9');
+ }
+ else
+ error = true;
+
+ /* Part (3): parse optional decimal portion. */
+ if (len < lex->input_length && *s == '.')
+ {
+ s++;
+ len++;
+ if (len == lex->input_length || *s < '0' || *s > '9')
+ error = true;
+ else
+ {
+ do
+ {
+ s++;
+ len++;
+ } while (len < lex->input_length && *s >= '0' && *s <= '9');
+ }
+ }
+
+ /* Part (4): parse optional exponent. */
+ if (len < lex->input_length && (*s == 'e' || *s == 'E'))
+ {
+ s++;
+ len++;
+ if (len < lex->input_length && (*s == '+' || *s == '-'))
+ {
+ s++;
+ len++;
+ }
+ if (len == lex->input_length || *s < '0' || *s > '9')
+ error = true;
+ else
+ {
+ do
+ {
+ s++;
+ len++;
+ } while (len < lex->input_length && *s >= '0' && *s <= '9');
+ }
+ }
+
+ /*
+ * Check for trailing garbage. As in json_lex(), any alphanumeric stuff
+ * here should be considered part of the token for error-reporting
+ * purposes.
+ */
+ for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
+ error = true;
+
+ if (total_len != NULL)
+ *total_len = len;
+
+ if (num_err != NULL)
+ {
+ /* let the caller handle any error */
+ *num_err = error;
+ }
+ else
+ {
+ /* return token endpoint */
+ lex->prev_token_terminator = lex->token_terminator;
+ lex->token_terminator = s;
+ /* handle error if any */
+ if (error)
+ return JSON_INVALID_TOKEN;
+ }
+
+ return JSON_SUCCESS;
+}
+
+/*
+ * Report a parse error.
+ *
+ * lex->token_start and lex->token_terminator must identify the current token.
+ */
+static JsonParseErrorType
+report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
+{
+ /* Handle case where the input ended prematurely. */
+ if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
+ return JSON_EXPECTED_MORE;
+
+ /* Otherwise choose the error type based on the parsing context. */
+ switch (ctx)
+ {
+ case JSON_PARSE_END:
+ return JSON_EXPECTED_END;
+ case JSON_PARSE_VALUE:
+ return JSON_EXPECTED_JSON;
+ case JSON_PARSE_STRING:
+ return JSON_EXPECTED_STRING;
+ case JSON_PARSE_ARRAY_START:
+ return JSON_EXPECTED_ARRAY_FIRST;
+ case JSON_PARSE_ARRAY_NEXT:
+ return JSON_EXPECTED_ARRAY_NEXT;
+ case JSON_PARSE_OBJECT_START:
+ return JSON_EXPECTED_OBJECT_FIRST;
+ case JSON_PARSE_OBJECT_LABEL:
+ return JSON_EXPECTED_COLON;
+ case JSON_PARSE_OBJECT_NEXT:
+ return JSON_EXPECTED_OBJECT_NEXT;
+ case JSON_PARSE_OBJECT_COMMA:
+ return JSON_EXPECTED_STRING;
+ }
+
+ /*
+ * We don't use a default: case, so that the compiler will warn about
+ * unhandled enum values.
+ */
+ Assert(false);
+ return JSON_SUCCESS; /* silence stupider compilers */
+}
+
+
+#ifndef FRONTEND
+/*
+ * Extract the current token from a lexing context, for error reporting.
+ */
+static char *
+extract_token(JsonLexContext *lex)
+{
+ int toklen = lex->token_terminator - lex->token_start;
+ char *token = palloc(toklen + 1);
+
+ memcpy(token, lex->token_start, toklen);
+ token[toklen] = '\0';
+ return token;
+}
+
+/*
+ * Construct an (already translated) detail message for a JSON error.
+ *
+ * Note that the error message generated by this routine may not be
+ * palloc'd, making it unsafe for frontend code as there is no way to
+ * know if this can be safely pfree'd or not.
+ */
+char *
+json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
+{
+ switch (error)
+ {
+ case JSON_SUCCESS:
+ /* fall through to the error code after switch */
+ break;
+ case JSON_ESCAPING_INVALID:
+ return psprintf(_("Escape sequence \"\\%s\" is invalid."),
+ extract_token(lex));
+ case JSON_ESCAPING_REQUIRED:
+ return psprintf(_("Character with value 0x%02x must be escaped."),
+ (unsigned char) *(lex->token_terminator));
+ case JSON_EXPECTED_END:
+ return psprintf(_("Expected end of input, but found \"%s\"."),
+ extract_token(lex));
+ case JSON_EXPECTED_ARRAY_FIRST:
+ return psprintf(_("Expected array element or \"]\", but found \"%s\"."),
+ extract_token(lex));
+ case JSON_EXPECTED_ARRAY_NEXT:
+ return psprintf(_("Expected \",\" or \"]\", but found \"%s\"."),
+ extract_token(lex));
+ case JSON_EXPECTED_COLON:
+ return psprintf(_("Expected \":\", but found \"%s\"."),
+ extract_token(lex));
+ case JSON_EXPECTED_JSON:
+ return psprintf(_("Expected JSON value, but found \"%s\"."),
+ extract_token(lex));
+ case JSON_EXPECTED_MORE:
+ return _("The input string ended unexpectedly.");
+ case JSON_EXPECTED_OBJECT_FIRST:
+ return psprintf(_("Expected string or \"}\", but found \"%s\"."),
+ extract_token(lex));
+ case JSON_EXPECTED_OBJECT_NEXT:
+ return psprintf(_("Expected \",\" or \"}\", but found \"%s\"."),
+ extract_token(lex));
+ case JSON_EXPECTED_STRING:
+ return psprintf(_("Expected string, but found \"%s\"."),
+ extract_token(lex));
+ case JSON_INVALID_TOKEN:
+ return psprintf(_("Token \"%s\" is invalid."),
+ extract_token(lex));
+ case JSON_UNICODE_CODE_POINT_ZERO:
+ return _("\\u0000 cannot be converted to text.");
+ case JSON_UNICODE_ESCAPE_FORMAT:
+ return _("\"\\u\" must be followed by four hexadecimal digits.");
+ case JSON_UNICODE_HIGH_ESCAPE:
+ /* note: this case is only reachable in frontend not backend */
+ return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
+ case JSON_UNICODE_UNTRANSLATABLE:
+ /* note: this case is only reachable in backend not frontend */
+ return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."),
+ GetDatabaseEncodingName());
+ case JSON_UNICODE_HIGH_SURROGATE:
+ return _("Unicode high surrogate must not follow a high surrogate.");
+ case JSON_UNICODE_LOW_SURROGATE:
+ return _("Unicode low surrogate must follow a high surrogate.");
+ case JSON_SEM_ACTION_FAILED:
+ /* fall through to the error code after switch */
+ break;
+ }
+
+ /*
+ * We don't use a default: case, so that the compiler will warn about
+ * unhandled enum values. But this needs to be here anyway to cover the
+ * possibility of an incorrect input.
+ */
+ elog(ERROR, "unexpected json parse error type: %d", (int) error);
+ return NULL;
+}
+#endif
diff --git a/src/common/keywords.c b/src/common/keywords.c
new file mode 100644
index 0000000..b72f0d5
--- /dev/null
+++ b/src/common/keywords.c
@@ -0,0 +1,48 @@
+/*-------------------------------------------------------------------------
+ *
+ * keywords.c
+ * PostgreSQL's list of SQL keywords
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/keywords.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "common/keywords.h"
+
+
+/* ScanKeywordList lookup data for SQL keywords */
+
+#include "kwlist_d.h"
+
+/* Keyword categories for SQL keywords */
+
+#define PG_KEYWORD(kwname, value, category, collabel) category,
+
+const uint8 ScanKeywordCategories[SCANKEYWORDS_NUM_KEYWORDS] = {
+#include "parser/kwlist.h"
+};
+
+#undef PG_KEYWORD
+
+/* Keyword can-be-bare-label flags for SQL keywords */
+
+#define PG_KEYWORD(kwname, value, category, collabel) collabel,
+
+#define BARE_LABEL true
+#define AS_LABEL false
+
+const bool ScanKeywordBareLabel[SCANKEYWORDS_NUM_KEYWORDS] = {
+#include "parser/kwlist.h"
+};
+
+#undef PG_KEYWORD
+#undef BARE_LABEL
+#undef AS_LABEL
diff --git a/src/common/kwlist_d.h b/src/common/kwlist_d.h
new file mode 100644
index 0000000..e8af260
--- /dev/null
+++ b/src/common/kwlist_d.h
@@ -0,0 +1,1119 @@
+/*-------------------------------------------------------------------------
+ *
+ * kwlist_d.h
+ * List of keywords represented as a ScanKeywordList.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES
+ * ******************************
+ * *** DO NOT EDIT THIS FILE! ***
+ * ******************************
+ *
+ * It has been GENERATED by src/tools/gen_keywordlist.pl
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef KWLIST_D_H
+#define KWLIST_D_H
+
+#include "common/kwlookup.h"
+
+static const char ScanKeywords_kw_string[] =
+ "abort\0"
+ "absent\0"
+ "absolute\0"
+ "access\0"
+ "action\0"
+ "add\0"
+ "admin\0"
+ "after\0"
+ "aggregate\0"
+ "all\0"
+ "also\0"
+ "alter\0"
+ "always\0"
+ "analyse\0"
+ "analyze\0"
+ "and\0"
+ "any\0"
+ "array\0"
+ "as\0"
+ "asc\0"
+ "asensitive\0"
+ "assertion\0"
+ "assignment\0"
+ "asymmetric\0"
+ "at\0"
+ "atomic\0"
+ "attach\0"
+ "attribute\0"
+ "authorization\0"
+ "backward\0"
+ "before\0"
+ "begin\0"
+ "between\0"
+ "bigint\0"
+ "binary\0"
+ "bit\0"
+ "boolean\0"
+ "both\0"
+ "breadth\0"
+ "by\0"
+ "cache\0"
+ "call\0"
+ "called\0"
+ "cascade\0"
+ "cascaded\0"
+ "case\0"
+ "cast\0"
+ "catalog\0"
+ "chain\0"
+ "char\0"
+ "character\0"
+ "characteristics\0"
+ "check\0"
+ "checkpoint\0"
+ "class\0"
+ "close\0"
+ "cluster\0"
+ "coalesce\0"
+ "collate\0"
+ "collation\0"
+ "column\0"
+ "columns\0"
+ "comment\0"
+ "comments\0"
+ "commit\0"
+ "committed\0"
+ "compression\0"
+ "concurrently\0"
+ "configuration\0"
+ "conflict\0"
+ "connection\0"
+ "constraint\0"
+ "constraints\0"
+ "content\0"
+ "continue\0"
+ "conversion\0"
+ "copy\0"
+ "cost\0"
+ "create\0"
+ "cross\0"
+ "csv\0"
+ "cube\0"
+ "current\0"
+ "current_catalog\0"
+ "current_date\0"
+ "current_role\0"
+ "current_schema\0"
+ "current_time\0"
+ "current_timestamp\0"
+ "current_user\0"
+ "cursor\0"
+ "cycle\0"
+ "data\0"
+ "database\0"
+ "day\0"
+ "deallocate\0"
+ "dec\0"
+ "decimal\0"
+ "declare\0"
+ "default\0"
+ "defaults\0"
+ "deferrable\0"
+ "deferred\0"
+ "definer\0"
+ "delete\0"
+ "delimiter\0"
+ "delimiters\0"
+ "depends\0"
+ "depth\0"
+ "desc\0"
+ "detach\0"
+ "dictionary\0"
+ "disable\0"
+ "discard\0"
+ "distinct\0"
+ "do\0"
+ "document\0"
+ "domain\0"
+ "double\0"
+ "drop\0"
+ "each\0"
+ "else\0"
+ "enable\0"
+ "encoding\0"
+ "encrypted\0"
+ "end\0"
+ "enum\0"
+ "escape\0"
+ "event\0"
+ "except\0"
+ "exclude\0"
+ "excluding\0"
+ "exclusive\0"
+ "execute\0"
+ "exists\0"
+ "explain\0"
+ "expression\0"
+ "extension\0"
+ "external\0"
+ "extract\0"
+ "false\0"
+ "family\0"
+ "fetch\0"
+ "filter\0"
+ "finalize\0"
+ "first\0"
+ "float\0"
+ "following\0"
+ "for\0"
+ "force\0"
+ "foreign\0"
+ "format\0"
+ "forward\0"
+ "freeze\0"
+ "from\0"
+ "full\0"
+ "function\0"
+ "functions\0"
+ "generated\0"
+ "global\0"
+ "grant\0"
+ "granted\0"
+ "greatest\0"
+ "group\0"
+ "grouping\0"
+ "groups\0"
+ "handler\0"
+ "having\0"
+ "header\0"
+ "hold\0"
+ "hour\0"
+ "identity\0"
+ "if\0"
+ "ilike\0"
+ "immediate\0"
+ "immutable\0"
+ "implicit\0"
+ "import\0"
+ "in\0"
+ "include\0"
+ "including\0"
+ "increment\0"
+ "indent\0"
+ "index\0"
+ "indexes\0"
+ "inherit\0"
+ "inherits\0"
+ "initially\0"
+ "inline\0"
+ "inner\0"
+ "inout\0"
+ "input\0"
+ "insensitive\0"
+ "insert\0"
+ "instead\0"
+ "int\0"
+ "integer\0"
+ "intersect\0"
+ "interval\0"
+ "into\0"
+ "invoker\0"
+ "is\0"
+ "isnull\0"
+ "isolation\0"
+ "join\0"
+ "json\0"
+ "json_array\0"
+ "json_arrayagg\0"
+ "json_object\0"
+ "json_objectagg\0"
+ "key\0"
+ "keys\0"
+ "label\0"
+ "language\0"
+ "large\0"
+ "last\0"
+ "lateral\0"
+ "leading\0"
+ "leakproof\0"
+ "least\0"
+ "left\0"
+ "level\0"
+ "like\0"
+ "limit\0"
+ "listen\0"
+ "load\0"
+ "local\0"
+ "localtime\0"
+ "localtimestamp\0"
+ "location\0"
+ "lock\0"
+ "locked\0"
+ "logged\0"
+ "mapping\0"
+ "match\0"
+ "matched\0"
+ "materialized\0"
+ "maxvalue\0"
+ "merge\0"
+ "method\0"
+ "minute\0"
+ "minvalue\0"
+ "mode\0"
+ "month\0"
+ "move\0"
+ "name\0"
+ "names\0"
+ "national\0"
+ "natural\0"
+ "nchar\0"
+ "new\0"
+ "next\0"
+ "nfc\0"
+ "nfd\0"
+ "nfkc\0"
+ "nfkd\0"
+ "no\0"
+ "none\0"
+ "normalize\0"
+ "normalized\0"
+ "not\0"
+ "nothing\0"
+ "notify\0"
+ "notnull\0"
+ "nowait\0"
+ "null\0"
+ "nullif\0"
+ "nulls\0"
+ "numeric\0"
+ "object\0"
+ "of\0"
+ "off\0"
+ "offset\0"
+ "oids\0"
+ "old\0"
+ "on\0"
+ "only\0"
+ "operator\0"
+ "option\0"
+ "options\0"
+ "or\0"
+ "order\0"
+ "ordinality\0"
+ "others\0"
+ "out\0"
+ "outer\0"
+ "over\0"
+ "overlaps\0"
+ "overlay\0"
+ "overriding\0"
+ "owned\0"
+ "owner\0"
+ "parallel\0"
+ "parameter\0"
+ "parser\0"
+ "partial\0"
+ "partition\0"
+ "passing\0"
+ "password\0"
+ "placing\0"
+ "plans\0"
+ "policy\0"
+ "position\0"
+ "preceding\0"
+ "precision\0"
+ "prepare\0"
+ "prepared\0"
+ "preserve\0"
+ "primary\0"
+ "prior\0"
+ "privileges\0"
+ "procedural\0"
+ "procedure\0"
+ "procedures\0"
+ "program\0"
+ "publication\0"
+ "quote\0"
+ "range\0"
+ "read\0"
+ "real\0"
+ "reassign\0"
+ "recheck\0"
+ "recursive\0"
+ "ref\0"
+ "references\0"
+ "referencing\0"
+ "refresh\0"
+ "reindex\0"
+ "relative\0"
+ "release\0"
+ "rename\0"
+ "repeatable\0"
+ "replace\0"
+ "replica\0"
+ "reset\0"
+ "restart\0"
+ "restrict\0"
+ "return\0"
+ "returning\0"
+ "returns\0"
+ "revoke\0"
+ "right\0"
+ "role\0"
+ "rollback\0"
+ "rollup\0"
+ "routine\0"
+ "routines\0"
+ "row\0"
+ "rows\0"
+ "rule\0"
+ "savepoint\0"
+ "scalar\0"
+ "schema\0"
+ "schemas\0"
+ "scroll\0"
+ "search\0"
+ "second\0"
+ "security\0"
+ "select\0"
+ "sequence\0"
+ "sequences\0"
+ "serializable\0"
+ "server\0"
+ "session\0"
+ "session_user\0"
+ "set\0"
+ "setof\0"
+ "sets\0"
+ "share\0"
+ "show\0"
+ "similar\0"
+ "simple\0"
+ "skip\0"
+ "smallint\0"
+ "snapshot\0"
+ "some\0"
+ "sql\0"
+ "stable\0"
+ "standalone\0"
+ "start\0"
+ "statement\0"
+ "statistics\0"
+ "stdin\0"
+ "stdout\0"
+ "storage\0"
+ "stored\0"
+ "strict\0"
+ "strip\0"
+ "subscription\0"
+ "substring\0"
+ "support\0"
+ "symmetric\0"
+ "sysid\0"
+ "system\0"
+ "system_user\0"
+ "table\0"
+ "tables\0"
+ "tablesample\0"
+ "tablespace\0"
+ "temp\0"
+ "template\0"
+ "temporary\0"
+ "text\0"
+ "then\0"
+ "ties\0"
+ "time\0"
+ "timestamp\0"
+ "to\0"
+ "trailing\0"
+ "transaction\0"
+ "transform\0"
+ "treat\0"
+ "trigger\0"
+ "trim\0"
+ "true\0"
+ "truncate\0"
+ "trusted\0"
+ "type\0"
+ "types\0"
+ "uescape\0"
+ "unbounded\0"
+ "uncommitted\0"
+ "unencrypted\0"
+ "union\0"
+ "unique\0"
+ "unknown\0"
+ "unlisten\0"
+ "unlogged\0"
+ "until\0"
+ "update\0"
+ "user\0"
+ "using\0"
+ "vacuum\0"
+ "valid\0"
+ "validate\0"
+ "validator\0"
+ "value\0"
+ "values\0"
+ "varchar\0"
+ "variadic\0"
+ "varying\0"
+ "verbose\0"
+ "version\0"
+ "view\0"
+ "views\0"
+ "volatile\0"
+ "when\0"
+ "where\0"
+ "whitespace\0"
+ "window\0"
+ "with\0"
+ "within\0"
+ "without\0"
+ "work\0"
+ "wrapper\0"
+ "write\0"
+ "xml\0"
+ "xmlattributes\0"
+ "xmlconcat\0"
+ "xmlelement\0"
+ "xmlexists\0"
+ "xmlforest\0"
+ "xmlnamespaces\0"
+ "xmlparse\0"
+ "xmlpi\0"
+ "xmlroot\0"
+ "xmlserialize\0"
+ "xmltable\0"
+ "year\0"
+ "yes\0"
+ "zone";
+
+static const uint16 ScanKeywords_kw_offsets[] = {
+ 0,
+ 6,
+ 13,
+ 22,
+ 29,
+ 36,
+ 40,
+ 46,
+ 52,
+ 62,
+ 66,
+ 71,
+ 77,
+ 84,
+ 92,
+ 100,
+ 104,
+ 108,
+ 114,
+ 117,
+ 121,
+ 132,
+ 142,
+ 153,
+ 164,
+ 167,
+ 174,
+ 181,
+ 191,
+ 205,
+ 214,
+ 221,
+ 227,
+ 235,
+ 242,
+ 249,
+ 253,
+ 261,
+ 266,
+ 274,
+ 277,
+ 283,
+ 288,
+ 295,
+ 303,
+ 312,
+ 317,
+ 322,
+ 330,
+ 336,
+ 341,
+ 351,
+ 367,
+ 373,
+ 384,
+ 390,
+ 396,
+ 404,
+ 413,
+ 421,
+ 431,
+ 438,
+ 446,
+ 454,
+ 463,
+ 470,
+ 480,
+ 492,
+ 505,
+ 519,
+ 528,
+ 539,
+ 550,
+ 562,
+ 570,
+ 579,
+ 590,
+ 595,
+ 600,
+ 607,
+ 613,
+ 617,
+ 622,
+ 630,
+ 646,
+ 659,
+ 672,
+ 687,
+ 700,
+ 718,
+ 731,
+ 738,
+ 744,
+ 749,
+ 758,
+ 762,
+ 773,
+ 777,
+ 785,
+ 793,
+ 801,
+ 810,
+ 821,
+ 830,
+ 838,
+ 845,
+ 855,
+ 866,
+ 874,
+ 880,
+ 885,
+ 892,
+ 903,
+ 911,
+ 919,
+ 928,
+ 931,
+ 940,
+ 947,
+ 954,
+ 959,
+ 964,
+ 969,
+ 976,
+ 985,
+ 995,
+ 999,
+ 1004,
+ 1011,
+ 1017,
+ 1024,
+ 1032,
+ 1042,
+ 1052,
+ 1060,
+ 1067,
+ 1075,
+ 1086,
+ 1096,
+ 1105,
+ 1113,
+ 1119,
+ 1126,
+ 1132,
+ 1139,
+ 1148,
+ 1154,
+ 1160,
+ 1170,
+ 1174,
+ 1180,
+ 1188,
+ 1195,
+ 1203,
+ 1210,
+ 1215,
+ 1220,
+ 1229,
+ 1239,
+ 1249,
+ 1256,
+ 1262,
+ 1270,
+ 1279,
+ 1285,
+ 1294,
+ 1301,
+ 1309,
+ 1316,
+ 1323,
+ 1328,
+ 1333,
+ 1342,
+ 1345,
+ 1351,
+ 1361,
+ 1371,
+ 1380,
+ 1387,
+ 1390,
+ 1398,
+ 1408,
+ 1418,
+ 1425,
+ 1431,
+ 1439,
+ 1447,
+ 1456,
+ 1466,
+ 1473,
+ 1479,
+ 1485,
+ 1491,
+ 1503,
+ 1510,
+ 1518,
+ 1522,
+ 1530,
+ 1540,
+ 1549,
+ 1554,
+ 1562,
+ 1565,
+ 1572,
+ 1582,
+ 1587,
+ 1592,
+ 1603,
+ 1617,
+ 1629,
+ 1644,
+ 1648,
+ 1653,
+ 1659,
+ 1668,
+ 1674,
+ 1679,
+ 1687,
+ 1695,
+ 1705,
+ 1711,
+ 1716,
+ 1722,
+ 1727,
+ 1733,
+ 1740,
+ 1745,
+ 1751,
+ 1761,
+ 1776,
+ 1785,
+ 1790,
+ 1797,
+ 1804,
+ 1812,
+ 1818,
+ 1826,
+ 1839,
+ 1848,
+ 1854,
+ 1861,
+ 1868,
+ 1877,
+ 1882,
+ 1888,
+ 1893,
+ 1898,
+ 1904,
+ 1913,
+ 1921,
+ 1927,
+ 1931,
+ 1936,
+ 1940,
+ 1944,
+ 1949,
+ 1954,
+ 1957,
+ 1962,
+ 1972,
+ 1983,
+ 1987,
+ 1995,
+ 2002,
+ 2010,
+ 2017,
+ 2022,
+ 2029,
+ 2035,
+ 2043,
+ 2050,
+ 2053,
+ 2057,
+ 2064,
+ 2069,
+ 2073,
+ 2076,
+ 2081,
+ 2090,
+ 2097,
+ 2105,
+ 2108,
+ 2114,
+ 2125,
+ 2132,
+ 2136,
+ 2142,
+ 2147,
+ 2156,
+ 2164,
+ 2175,
+ 2181,
+ 2187,
+ 2196,
+ 2206,
+ 2213,
+ 2221,
+ 2231,
+ 2239,
+ 2248,
+ 2256,
+ 2262,
+ 2269,
+ 2278,
+ 2288,
+ 2298,
+ 2306,
+ 2315,
+ 2324,
+ 2332,
+ 2338,
+ 2349,
+ 2360,
+ 2370,
+ 2381,
+ 2389,
+ 2401,
+ 2407,
+ 2413,
+ 2418,
+ 2423,
+ 2432,
+ 2440,
+ 2450,
+ 2454,
+ 2465,
+ 2477,
+ 2485,
+ 2493,
+ 2502,
+ 2510,
+ 2517,
+ 2528,
+ 2536,
+ 2544,
+ 2550,
+ 2558,
+ 2567,
+ 2574,
+ 2584,
+ 2592,
+ 2599,
+ 2605,
+ 2610,
+ 2619,
+ 2626,
+ 2634,
+ 2643,
+ 2647,
+ 2652,
+ 2657,
+ 2667,
+ 2674,
+ 2681,
+ 2689,
+ 2696,
+ 2703,
+ 2710,
+ 2719,
+ 2726,
+ 2735,
+ 2745,
+ 2758,
+ 2765,
+ 2773,
+ 2786,
+ 2790,
+ 2796,
+ 2801,
+ 2807,
+ 2812,
+ 2820,
+ 2827,
+ 2832,
+ 2841,
+ 2850,
+ 2855,
+ 2859,
+ 2866,
+ 2877,
+ 2883,
+ 2893,
+ 2904,
+ 2910,
+ 2917,
+ 2925,
+ 2932,
+ 2939,
+ 2945,
+ 2958,
+ 2968,
+ 2976,
+ 2986,
+ 2992,
+ 2999,
+ 3011,
+ 3017,
+ 3024,
+ 3036,
+ 3047,
+ 3052,
+ 3061,
+ 3071,
+ 3076,
+ 3081,
+ 3086,
+ 3091,
+ 3101,
+ 3104,
+ 3113,
+ 3125,
+ 3135,
+ 3141,
+ 3149,
+ 3154,
+ 3159,
+ 3168,
+ 3176,
+ 3181,
+ 3187,
+ 3195,
+ 3205,
+ 3217,
+ 3229,
+ 3235,
+ 3242,
+ 3250,
+ 3259,
+ 3268,
+ 3274,
+ 3281,
+ 3286,
+ 3292,
+ 3299,
+ 3305,
+ 3314,
+ 3324,
+ 3330,
+ 3337,
+ 3345,
+ 3354,
+ 3362,
+ 3370,
+ 3378,
+ 3383,
+ 3389,
+ 3398,
+ 3403,
+ 3409,
+ 3420,
+ 3427,
+ 3432,
+ 3439,
+ 3447,
+ 3452,
+ 3460,
+ 3466,
+ 3470,
+ 3484,
+ 3494,
+ 3505,
+ 3515,
+ 3525,
+ 3539,
+ 3548,
+ 3554,
+ 3562,
+ 3575,
+ 3584,
+ 3589,
+ 3593,
+};
+
+#define SCANKEYWORDS_NUM_KEYWORDS 471
+
+static int
+ScanKeywords_hash_func(const void *key, size_t keylen)
+{
+ static const int16 h[943] = {
+ 543, -186, 201, 0, 32767, 32767, 32767, 32767,
+ 221, -207, 32767, 0, 135, 283, 32767, 454,
+ 14, 79, 32767, 32767, 77, 32767, 102, 160,
+ 0, 32767, 151, 32767, 30, 392, -322, 452,
+ 32767, 0, 32767, 0, 0, 32767, 32767, 32767,
+ 234, 32767, 0, 32767, 0, 631, 32767, 368,
+ 80, 0, 0, -115, 32767, 285, 32767, 423,
+ 0, 32767, 155, 229, 32767, 126, 291, 165,
+ -22, 400, 327, 32767, 32767, 32767, 32767, -399,
+ 0, 406, 32767, 210, 1102, -203, 32767, 32767,
+ 32767, -944, 0, -188, 32767, 32767, 0, 347,
+ 32767, 0, 559, 316, 133, 32767, 202, 32767,
+ 305, 0, 32767, -94, 32767, 0, 32767, -222,
+ 32767, 138, 32767, -52, 32767, 32767, 279, 69,
+ -136, 0, 32767, 32767, 189, 32767, 32767, 88,
+ 0, 32767, 32767, 274, 32767, 514, 769, 248,
+ 32767, 32767, 32767, 32767, 32767, 32767, 0, 81,
+ 8, -29, 32767, 32767, 32767, -174, 258, 0,
+ 465, 211, 32767, 0, -229, 32767, -191, 32767,
+ 1263, 48, 32767, 343, 0, 58, 0, 32767,
+ 32767, 855, 0, 415, 0, -217, 32767, 1195,
+ 32767, 32767, 166, 32767, 42, 262, -736, 0,
+ 32767, 32767, 418, 178, 122, 32767, 46, 32767,
+ 32767, 32767, 229, 443, 32767, 32767, 250, 32767,
+ -300, 0, 32767, 1153, 32767, 108, 32767, -462,
+ 266, 32767, 478, -220, 235, 32767, 32767, -127,
+ 32767, 32767, 32767, 427, -231, 156, 32767, 0,
+ 0, 148, -218, 142, 73, 420, 32767, 32767,
+ 523, 32767, -36, 32767, 32767, 467, 844, -415,
+ 32767, 32767, -148, 179, 361, 32767, 151, 0,
+ 0, 32767, 145, 32767, 248, 110, 29, 125,
+ 282, 32767, -36, 43, 32767, 1125, 32767, 530,
+ 251, 519, 191, 0, 32767, -34, -502, 313,
+ 462, 845, 32767, 32767, -255, 412, 32767, 78,
+ 0, 32767, 444, 161, 0, 32767, 308, 32767,
+ -273, 400, 32767, 296, 32767, 32767, 72, 32767,
+ 32767, 34, 32767, 364, 151, -63, 4, 229,
+ 0, -276, 32767, 32767, 32767, 32767, -406, 32767,
+ 203, 32767, 140, 187, 160, 32767, 286, 0,
+ 32767, 32767, -88, 0, 100, -361, 32767, 9,
+ 0, -456, 32767, -37, -404, 32767, -969, 32767,
+ 371, 95, 0, 703, -31, 263, 373, -745,
+ 507, 14, 32767, -159, 0, 32767, 47, 299,
+ -126, 0, 32767, 83, 32767, 32767, 420, 236,
+ 32767, 32767, 0, 310, 89, 233, 32767, 93,
+ 32767, 0, 816, 60, 301, 211, 193, 0,
+ 452, -107, -403, -242, 353, 18, 32767, 32767,
+ 32767, 243, 104, 32767, 32767, 32767, -305, 32767,
+ -1048, 54, 0, 383, 32767, 32767, 32767, 226,
+ 319, 0, 32767, 32767, 32767, -130, 537, 32767,
+ 0, -206, 240, 696, 121, 32767, 180, 164,
+ 32767, 390, 185, 32767, 220, 545, 29, 32767,
+ 0, 32767, 32767, 1120, -163, 32767, 32767, 32767,
+ -368, 136, 445, 171, 233, 32767, 73, 32767,
+ 92, 32767, 0, 32767, 0, 208, 354, 32767,
+ 54, 32767, 32767, -246, -93, 389, 32767, 32767,
+ 32767, 32767, 50, 32767, 32767, 308, 32767, -278,
+ 0, 32767, 32767, -1172, 32767, 8, 32767, 0,
+ 32767, 341, 304, 242, -174, -92, 76, 419,
+ 32767, 87, 32767, -262, 32767, 32767, 32767, 109,
+ 200, 0, 32767, 0, 85, 530, 32767, -316,
+ 32767, 0, -286, 32767, 193, 268, 32767, 32767,
+ 278, 32767, 32767, 155, 445, 95, -310, 32767,
+ 207, -56, 32767, 32767, 0, -127, 232, -283,
+ 103, 32767, 1, 0, 32767, 32767, -485, 350,
+ 79, -56, -354, 32767, 121, 24, 81, 20,
+ 325, 40, 248, 32767, 32767, 32767, 358, 32767,
+ -56, 32767, 0, 174, -28, -301, -92, 32767,
+ 114, 295, 32767, 363, -355, 32767, 290, 0,
+ 32767, 32767, 32767, 122, 55, -142, 32767, 50,
+ 32767, 32767, 152, 571, 1397, 0, 472, -448,
+ 185, 140, 228, 435, 0, 32767, 32767, 414,
+ 32767, 379, 92, 185, 23, 299, 32767, 32767,
+ 0, 32767, 32767, 32767, 306, 439, -198, 219,
+ 340, 32767, 416, 0, -123, 377, 32767, 32767,
+ 0, 32767, 670, -670, 339, 32767, 32767, 32767,
+ 0, -256, 70, 514, 331, 0, 302, 469,
+ 0, 370, 32767, 32767, 42, 255, 212, 0,
+ 322, 277, 32767, -163, 32767, 216, 32767, 32767,
+ 0, 32767, 190, 32767, 32767, 0, 32767, 0,
+ -409, 1366, 32767, 32767, 32767, 193, 32767, 325,
+ 32767, 0, 142, 466, 32767, 32767, 32767, 113,
+ 32767, 32767, 62, 0, -62, 113, -90, 34,
+ -256, 32767, 32767, -936, 32767, 32767, 32767, 0,
+ -64, 0, -34, 451, 290, 108, 32767, 276,
+ 842, 0, 556, -153, 32767, 412, -168, 32767,
+ 32767, 1331, 407, 234, -60, 115, 457, -73,
+ 502, 772, 32767, 33, 404, -925, 32767, 32767,
+ 421, -123, 32767, 32767, 32767, 0, 0, 32767,
+ 32767, 32767, 429, 0, 3, 769, -81, 306,
+ 64, 32767, 192, 96, 0, 63, 44, 32767,
+ 32767, 32767, 32767, 0, 284, 32767, 575, 32767,
+ 32767, 12, 32767, 516, 116, 32767, 32767, 150,
+ 442, 134, 32767, 198, -45, 249, 40, 373,
+ 32767, 0, 32767, 32767, 0, 0, 352, 32767,
+ 117, 32767, 426, 0, 0, 32767, 32767, 32767,
+ 32767, -92, 32767, -442, 32767, 269, 32767, 32767,
+ 32767, 429, 32767, 0, 32767, 0, 143, 32767,
+ 508, -66, 32767, 280, 32767, 39, 162, 32767,
+ 32767, 0, 32767, 31, 32767, 32767, 32767, 0,
+ 32767, 257, -90, -249, 224, 272, 32767, 32767,
+ 313, -467, 214, 0, -85, 32767, 48, 0,
+ 32767, -336, 202, 0, 447, 90, 264, 32767,
+ 32767, 0, 101, 32767, 32767, 32767, 0, 32767,
+ 32767, 227, -1093, 32767, 0, 32767, 27, 174,
+ 32767, 7, 32767, -621, 146, 32767, 32767, 32767,
+ 854, 0, 32767, 161, 0, 137, 32767, 32767,
+ 32767, 32767, 0, 391, 219, 276, 32767, 168,
+ 32767, 32767, 0, 32767, 32767, 32767, 1, -4,
+ 32767, 0, 293, 0, 374, 256, 0, 0,
+ 32767, 355, 212, 404, 0, 186, 32767, 0,
+ 359, 32767, 32767, 172, 32767, 32767, -131, 0,
+ 402, 0, 56, 32767, 462, 389, 82, 0,
+ 32767, 0, 32767, 0, 32767, 32767, 32767, 32767,
+ 106, 425, -160, 31, 32767, 55, 0, 0,
+ 32767, 32767, 430, 1224, 179, -179, 0, 397,
+ 32767, 0, 0, 0, -60, 47, 32767, 396,
+ 32767, 326, 383, 369, 32767, 368, 32767
+ };
+
+ const unsigned char *k = (const unsigned char *) key;
+ uint32 a = 0;
+ uint32 b = 0;
+
+ while (keylen--)
+ {
+ unsigned char c = *k++ | 0x20;
+
+ a = a * 257 + c;
+ b = b * 31 + c;
+ }
+ return h[a % 943] + h[b % 943];
+}
+
+const ScanKeywordList ScanKeywords = {
+ ScanKeywords_kw_string,
+ ScanKeywords_kw_offsets,
+ ScanKeywords_hash_func,
+ SCANKEYWORDS_NUM_KEYWORDS,
+ 17
+};
+
+#endif /* KWLIST_D_H */
diff --git a/src/common/kwlookup.c b/src/common/kwlookup.c
new file mode 100644
index 0000000..7e49825
--- /dev/null
+++ b/src/common/kwlookup.c
@@ -0,0 +1,85 @@
+/*-------------------------------------------------------------------------
+ *
+ * kwlookup.c
+ * Key word lookup for PostgreSQL
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/kwlookup.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "common/kwlookup.h"
+
+
+/*
+ * ScanKeywordLookup - see if a given word is a keyword
+ *
+ * The list of keywords to be matched against is passed as a ScanKeywordList.
+ *
+ * Returns the keyword number (0..N-1) of the keyword, or -1 if no match.
+ * Callers typically use the keyword number to index into information
+ * arrays, but that is no concern of this code.
+ *
+ * The match is done case-insensitively. Note that we deliberately use a
+ * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
+ * even if we are in a locale where tolower() would produce more or different
+ * translations. This is to conform to the SQL99 spec, which says that
+ * keywords are to be matched in this way even though non-keyword identifiers
+ * receive a different case-normalization mapping.
+ */
+int
+ScanKeywordLookup(const char *str,
+ const ScanKeywordList *keywords)
+{
+ size_t len;
+ int h;
+ const char *kw;
+
+ /*
+ * Reject immediately if too long to be any keyword. This saves useless
+ * hashing and downcasing work on long strings.
+ */
+ len = strlen(str);
+ if (len > keywords->max_kw_len)
+ return -1;
+
+ /*
+ * Compute the hash function. We assume it was generated to produce
+ * case-insensitive results. Since it's a perfect hash, we need only
+ * match to the specific keyword it identifies.
+ */
+ h = keywords->hash(str, len);
+
+ /* An out-of-range result implies no match */
+ if (h < 0 || h >= keywords->num_keywords)
+ return -1;
+
+ /*
+ * Compare character-by-character to see if we have a match, applying an
+ * ASCII-only downcasing to the input characters. We must not use
+ * tolower() since it may produce the wrong translation in some locales
+ * (eg, Turkish).
+ */
+ kw = GetScanKeyword(h, keywords);
+ while (*str != '\0')
+ {
+ char ch = *str++;
+
+ if (ch >= 'A' && ch <= 'Z')
+ ch += 'a' - 'A';
+ if (ch != *kw++)
+ return -1;
+ }
+ if (*kw != '\0')
+ return -1;
+
+ /* Success! */
+ return h;
+}
diff --git a/src/common/link-canary.c b/src/common/link-canary.c
new file mode 100644
index 0000000..f84331a
--- /dev/null
+++ b/src/common/link-canary.c
@@ -0,0 +1,36 @@
+/*-------------------------------------------------------------------------
+ * link-canary.c
+ * Detect whether src/common functions came from frontend or backend.
+ *
+ * Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/link-canary.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "common/link-canary.h"
+
+/*
+ * This function just reports whether this file was compiled for frontend
+ * or backend environment. We need this because in some systems, mainly
+ * ELF-based platforms, it is possible for a shlib (such as libpq) loaded
+ * into the backend to call a backend function named XYZ in preference to
+ * the shlib's own function XYZ. That's bad if the two functions don't
+ * act identically. This exact situation comes up for many functions in
+ * src/common and src/port, where the same function names exist in both
+ * libpq and the backend but they don't act quite identically. To verify
+ * that appropriate measures have been taken to prevent incorrect symbol
+ * resolution, libpq should test that this function returns true.
+ */
+bool
+pg_link_canary_is_frontend(void)
+{
+#ifdef FRONTEND
+ return true;
+#else
+ return false;
+#endif
+}
diff --git a/src/common/logging.c b/src/common/logging.c
new file mode 100644
index 0000000..dab718b
--- /dev/null
+++ b/src/common/logging.c
@@ -0,0 +1,334 @@
+/*-------------------------------------------------------------------------
+ * Logging framework for frontend programs
+ *
+ * Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * src/common/logging.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#error "This file is not expected to be compiled for backend code"
+#endif
+
+#include "postgres_fe.h"
+
+#include <unistd.h>
+
+#include "common/logging.h"
+
+enum pg_log_level __pg_log_level;
+
+static const char *progname;
+static int log_flags;
+
+static void (*log_pre_callback) (void);
+static void (*log_locus_callback) (const char **, uint64 *);
+
+static const char *sgr_error = NULL;
+static const char *sgr_warning = NULL;
+static const char *sgr_note = NULL;
+static const char *sgr_locus = NULL;
+
+#define SGR_ERROR_DEFAULT "01;31"
+#define SGR_WARNING_DEFAULT "01;35"
+#define SGR_NOTE_DEFAULT "01;36"
+#define SGR_LOCUS_DEFAULT "01"
+
+#define ANSI_ESCAPE_FMT "\x1b[%sm"
+#define ANSI_ESCAPE_RESET "\x1b[0m"
+
+#ifdef WIN32
+
+#ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING
+#define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x0004
+#endif
+
+/*
+ * Attempt to enable VT100 sequence processing for colorization on Windows.
+ * If current environment is not VT100-compatible or if this mode could not
+ * be enabled, return false.
+ */
+static bool
+enable_vt_processing(void)
+{
+ /* Check stderr */
+ HANDLE hOut = GetStdHandle(STD_ERROR_HANDLE);
+ DWORD dwMode = 0;
+
+ if (hOut == INVALID_HANDLE_VALUE)
+ return false;
+
+ /*
+ * Look for the current console settings and check if VT100 is already
+ * enabled.
+ */
+ if (!GetConsoleMode(hOut, &dwMode))
+ return false;
+ if ((dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING) != 0)
+ return true;
+
+ dwMode |= ENABLE_VIRTUAL_TERMINAL_PROCESSING;
+ if (!SetConsoleMode(hOut, dwMode))
+ return false;
+ return true;
+}
+#endif /* WIN32 */
+
+/*
+ * This should be called before any output happens.
+ */
+void
+pg_logging_init(const char *argv0)
+{
+ const char *pg_color_env = getenv("PG_COLOR");
+ bool log_color = false;
+ bool color_terminal = isatty(fileno(stderr));
+
+#ifdef WIN32
+
+ /*
+ * On Windows, check if environment is VT100-compatible if using a
+ * terminal.
+ */
+ if (color_terminal)
+ color_terminal = enable_vt_processing();
+#endif
+
+ /* usually the default, but not on Windows */
+ setvbuf(stderr, NULL, _IONBF, 0);
+
+ progname = get_progname(argv0);
+ __pg_log_level = PG_LOG_INFO;
+
+ if (pg_color_env)
+ {
+ if (strcmp(pg_color_env, "always") == 0 ||
+ (strcmp(pg_color_env, "auto") == 0 && color_terminal))
+ log_color = true;
+ }
+
+ if (log_color)
+ {
+ const char *pg_colors_env = getenv("PG_COLORS");
+
+ if (pg_colors_env)
+ {
+ char *colors = strdup(pg_colors_env);
+
+ if (colors)
+ {
+ for (char *token = strtok(colors, ":"); token; token = strtok(NULL, ":"))
+ {
+ char *e = strchr(token, '=');
+
+ if (e)
+ {
+ char *name;
+ char *value;
+
+ *e = '\0';
+ name = token;
+ value = e + 1;
+
+ if (strcmp(name, "error") == 0)
+ sgr_error = strdup(value);
+ if (strcmp(name, "warning") == 0)
+ sgr_warning = strdup(value);
+ if (strcmp(name, "note") == 0)
+ sgr_note = strdup(value);
+ if (strcmp(name, "locus") == 0)
+ sgr_locus = strdup(value);
+ }
+ }
+
+ free(colors);
+ }
+ }
+ else
+ {
+ sgr_error = SGR_ERROR_DEFAULT;
+ sgr_warning = SGR_WARNING_DEFAULT;
+ sgr_note = SGR_NOTE_DEFAULT;
+ sgr_locus = SGR_LOCUS_DEFAULT;
+ }
+ }
+}
+
+/*
+ * Change the logging flags.
+ */
+void
+pg_logging_config(int new_flags)
+{
+ log_flags = new_flags;
+}
+
+/*
+ * pg_logging_init sets the default log level to INFO. Programs that prefer
+ * a different default should use this to set it, immediately afterward.
+ */
+void
+pg_logging_set_level(enum pg_log_level new_level)
+{
+ __pg_log_level = new_level;
+}
+
+/*
+ * Command line switches such as --verbose should invoke this.
+ */
+void
+pg_logging_increase_verbosity(void)
+{
+ /*
+ * The enum values are chosen such that we have to decrease __pg_log_level
+ * in order to become more verbose.
+ */
+ if (__pg_log_level > PG_LOG_NOTSET + 1)
+ __pg_log_level--;
+}
+
+void
+pg_logging_set_pre_callback(void (*cb) (void))
+{
+ log_pre_callback = cb;
+}
+
+void
+pg_logging_set_locus_callback(void (*cb) (const char **filename, uint64 *lineno))
+{
+ log_locus_callback = cb;
+}
+
+void
+pg_log_generic(enum pg_log_level level, enum pg_log_part part,
+ const char *pg_restrict fmt,...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ pg_log_generic_v(level, part, fmt, ap);
+ va_end(ap);
+}
+
+void
+pg_log_generic_v(enum pg_log_level level, enum pg_log_part part,
+ const char *pg_restrict fmt, va_list ap)
+{
+ int save_errno = errno;
+ const char *filename = NULL;
+ uint64 lineno = 0;
+ va_list ap2;
+ size_t required_len;
+ char *buf;
+
+ Assert(progname);
+ Assert(level);
+ Assert(fmt);
+ Assert(fmt[strlen(fmt) - 1] != '\n');
+
+ /* Do nothing if log level is too low. */
+ if (level < __pg_log_level)
+ return;
+
+ /*
+ * Flush stdout before output to stderr, to ensure sync even when stdout
+ * is buffered.
+ */
+ fflush(stdout);
+
+ if (log_pre_callback)
+ log_pre_callback();
+
+ if (log_locus_callback)
+ log_locus_callback(&filename, &lineno);
+
+ fmt = _(fmt);
+
+ if (!(log_flags & PG_LOG_FLAG_TERSE) || filename)
+ {
+ if (sgr_locus)
+ fprintf(stderr, ANSI_ESCAPE_FMT, sgr_locus);
+ if (!(log_flags & PG_LOG_FLAG_TERSE))
+ fprintf(stderr, "%s:", progname);
+ if (filename)
+ {
+ fprintf(stderr, "%s:", filename);
+ if (lineno > 0)
+ fprintf(stderr, UINT64_FORMAT ":", lineno);
+ }
+ fprintf(stderr, " ");
+ if (sgr_locus)
+ fprintf(stderr, ANSI_ESCAPE_RESET);
+ }
+
+ if (!(log_flags & PG_LOG_FLAG_TERSE))
+ {
+ switch (part)
+ {
+ case PG_LOG_PRIMARY:
+ switch (level)
+ {
+ case PG_LOG_ERROR:
+ if (sgr_error)
+ fprintf(stderr, ANSI_ESCAPE_FMT, sgr_error);
+ fprintf(stderr, _("error: "));
+ if (sgr_error)
+ fprintf(stderr, ANSI_ESCAPE_RESET);
+ break;
+ case PG_LOG_WARNING:
+ if (sgr_warning)
+ fprintf(stderr, ANSI_ESCAPE_FMT, sgr_warning);
+ fprintf(stderr, _("warning: "));
+ if (sgr_warning)
+ fprintf(stderr, ANSI_ESCAPE_RESET);
+ break;
+ default:
+ break;
+ }
+ break;
+ case PG_LOG_DETAIL:
+ if (sgr_note)
+ fprintf(stderr, ANSI_ESCAPE_FMT, sgr_note);
+ fprintf(stderr, _("detail: "));
+ if (sgr_note)
+ fprintf(stderr, ANSI_ESCAPE_RESET);
+ break;
+ case PG_LOG_HINT:
+ if (sgr_note)
+ fprintf(stderr, ANSI_ESCAPE_FMT, sgr_note);
+ fprintf(stderr, _("hint: "));
+ if (sgr_note)
+ fprintf(stderr, ANSI_ESCAPE_RESET);
+ break;
+ }
+ }
+
+ errno = save_errno;
+
+ va_copy(ap2, ap);
+ required_len = vsnprintf(NULL, 0, fmt, ap2) + 1;
+ va_end(ap2);
+
+ buf = pg_malloc_extended(required_len, MCXT_ALLOC_NO_OOM);
+
+ errno = save_errno; /* malloc might change errno */
+
+ if (!buf)
+ {
+ /* memory trouble, just print what we can and get out of here */
+ vfprintf(stderr, fmt, ap);
+ return;
+ }
+
+ vsnprintf(buf, required_len, fmt, ap);
+
+ /* strip one newline, for PQerrorMessage() */
+ if (required_len >= 2 && buf[required_len - 2] == '\n')
+ buf[required_len - 2] = '\0';
+
+ fprintf(stderr, "%s\n", buf);
+
+ free(buf);
+}
diff --git a/src/common/md5.c b/src/common/md5.c
new file mode 100644
index 0000000..bf56311
--- /dev/null
+++ b/src/common/md5.c
@@ -0,0 +1,439 @@
+/*-------------------------------------------------------------------------
+ *
+ * md5.c
+ * Implements the MD5 Message-Digest Algorithm
+ *
+ * Fallback implementation of MD5, as specified in RFC 1321. This
+ * implementation is a simple one, in that it needs every input byte
+ * to be buffered before doing any calculations.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/md5.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* $KAME: md5.c,v 1.3 2000/02/22 14:01:17 itojun Exp $ */
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "md5_int.h"
+
+#define SHIFT(X, s) (((X) << (s)) | ((X) >> (32 - (s))))
+
+#define F(X, Y, Z) (((X) & (Y)) | ((~X) & (Z)))
+#define G(X, Y, Z) (((X) & (Z)) | ((Y) & (~Z)))
+#define H(X, Y, Z) ((X) ^ (Y) ^ (Z))
+#define I(X, Y, Z) ((Y) ^ ((X) | (~Z)))
+
+#define ROUND1(a, b, c, d, k, s, i) \
+do { \
+ (a) = (a) + F((b), (c), (d)) + X[(k)] + T[(i)]; \
+ (a) = SHIFT((a), (s)); \
+ (a) = (b) + (a); \
+} while (0)
+
+#define ROUND2(a, b, c, d, k, s, i) \
+do { \
+ (a) = (a) + G((b), (c), (d)) + X[(k)] + T[(i)]; \
+ (a) = SHIFT((a), (s)); \
+ (a) = (b) + (a); \
+} while (0)
+
+#define ROUND3(a, b, c, d, k, s, i) \
+do { \
+ (a) = (a) + H((b), (c), (d)) + X[(k)] + T[(i)]; \
+ (a) = SHIFT((a), (s)); \
+ (a) = (b) + (a); \
+} while (0)
+
+#define ROUND4(a, b, c, d, k, s, i) \
+do { \
+ (a) = (a) + I((b), (c), (d)) + X[(k)] + T[(i)]; \
+ (a) = SHIFT((a), (s)); \
+ (a) = (b) + (a); \
+} while (0)
+
+#define Sa 7
+#define Sb 12
+#define Sc 17
+#define Sd 22
+
+#define Se 5
+#define Sf 9
+#define Sg 14
+#define Sh 20
+
+#define Si 4
+#define Sj 11
+#define Sk 16
+#define Sl 23
+
+#define Sm 6
+#define Sn 10
+#define So 15
+#define Sp 21
+
+#define MD5_A0 0x67452301
+#define MD5_B0 0xefcdab89
+#define MD5_C0 0x98badcfe
+#define MD5_D0 0x10325476
+
+/* Integer part of 4294967296 times abs(sin(i)), where i is in radians. */
+static const uint32 T[65] = {
+ 0,
+ 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
+ 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
+ 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
+ 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
+
+ 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
+ 0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8,
+ 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
+ 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
+
+ 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
+ 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
+ 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
+ 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
+
+ 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
+ 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
+ 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
+ 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
+};
+
+static const uint8 md5_paddat[MD5_BUFLEN] = {
+ 0x80, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+#ifdef WORDS_BIGENDIAN
+static uint32 X[16];
+#endif
+
+static void
+md5_calc(const uint8 *b64, pg_md5_ctx *ctx)
+{
+ uint32 A = ctx->md5_sta;
+ uint32 B = ctx->md5_stb;
+ uint32 C = ctx->md5_stc;
+ uint32 D = ctx->md5_std;
+
+#ifndef WORDS_BIGENDIAN
+ const uint32 *X = (const uint32 *) b64;
+#else
+ /* 4 byte words */
+ /* what a brute force but fast! */
+ uint8 *y = (uint8 *) X;
+
+ y[0] = b64[3];
+ y[1] = b64[2];
+ y[2] = b64[1];
+ y[3] = b64[0];
+ y[4] = b64[7];
+ y[5] = b64[6];
+ y[6] = b64[5];
+ y[7] = b64[4];
+ y[8] = b64[11];
+ y[9] = b64[10];
+ y[10] = b64[9];
+ y[11] = b64[8];
+ y[12] = b64[15];
+ y[13] = b64[14];
+ y[14] = b64[13];
+ y[15] = b64[12];
+ y[16] = b64[19];
+ y[17] = b64[18];
+ y[18] = b64[17];
+ y[19] = b64[16];
+ y[20] = b64[23];
+ y[21] = b64[22];
+ y[22] = b64[21];
+ y[23] = b64[20];
+ y[24] = b64[27];
+ y[25] = b64[26];
+ y[26] = b64[25];
+ y[27] = b64[24];
+ y[28] = b64[31];
+ y[29] = b64[30];
+ y[30] = b64[29];
+ y[31] = b64[28];
+ y[32] = b64[35];
+ y[33] = b64[34];
+ y[34] = b64[33];
+ y[35] = b64[32];
+ y[36] = b64[39];
+ y[37] = b64[38];
+ y[38] = b64[37];
+ y[39] = b64[36];
+ y[40] = b64[43];
+ y[41] = b64[42];
+ y[42] = b64[41];
+ y[43] = b64[40];
+ y[44] = b64[47];
+ y[45] = b64[46];
+ y[46] = b64[45];
+ y[47] = b64[44];
+ y[48] = b64[51];
+ y[49] = b64[50];
+ y[50] = b64[49];
+ y[51] = b64[48];
+ y[52] = b64[55];
+ y[53] = b64[54];
+ y[54] = b64[53];
+ y[55] = b64[52];
+ y[56] = b64[59];
+ y[57] = b64[58];
+ y[58] = b64[57];
+ y[59] = b64[56];
+ y[60] = b64[63];
+ y[61] = b64[62];
+ y[62] = b64[61];
+ y[63] = b64[60];
+#endif
+
+ ROUND1(A, B, C, D, 0, Sa, 1);
+ ROUND1(D, A, B, C, 1, Sb, 2);
+ ROUND1(C, D, A, B, 2, Sc, 3);
+ ROUND1(B, C, D, A, 3, Sd, 4);
+ ROUND1(A, B, C, D, 4, Sa, 5);
+ ROUND1(D, A, B, C, 5, Sb, 6);
+ ROUND1(C, D, A, B, 6, Sc, 7);
+ ROUND1(B, C, D, A, 7, Sd, 8);
+ ROUND1(A, B, C, D, 8, Sa, 9);
+ ROUND1(D, A, B, C, 9, Sb, 10);
+ ROUND1(C, D, A, B, 10, Sc, 11);
+ ROUND1(B, C, D, A, 11, Sd, 12);
+ ROUND1(A, B, C, D, 12, Sa, 13);
+ ROUND1(D, A, B, C, 13, Sb, 14);
+ ROUND1(C, D, A, B, 14, Sc, 15);
+ ROUND1(B, C, D, A, 15, Sd, 16);
+
+ ROUND2(A, B, C, D, 1, Se, 17);
+ ROUND2(D, A, B, C, 6, Sf, 18);
+ ROUND2(C, D, A, B, 11, Sg, 19);
+ ROUND2(B, C, D, A, 0, Sh, 20);
+ ROUND2(A, B, C, D, 5, Se, 21);
+ ROUND2(D, A, B, C, 10, Sf, 22);
+ ROUND2(C, D, A, B, 15, Sg, 23);
+ ROUND2(B, C, D, A, 4, Sh, 24);
+ ROUND2(A, B, C, D, 9, Se, 25);
+ ROUND2(D, A, B, C, 14, Sf, 26);
+ ROUND2(C, D, A, B, 3, Sg, 27);
+ ROUND2(B, C, D, A, 8, Sh, 28);
+ ROUND2(A, B, C, D, 13, Se, 29);
+ ROUND2(D, A, B, C, 2, Sf, 30);
+ ROUND2(C, D, A, B, 7, Sg, 31);
+ ROUND2(B, C, D, A, 12, Sh, 32);
+
+ ROUND3(A, B, C, D, 5, Si, 33);
+ ROUND3(D, A, B, C, 8, Sj, 34);
+ ROUND3(C, D, A, B, 11, Sk, 35);
+ ROUND3(B, C, D, A, 14, Sl, 36);
+ ROUND3(A, B, C, D, 1, Si, 37);
+ ROUND3(D, A, B, C, 4, Sj, 38);
+ ROUND3(C, D, A, B, 7, Sk, 39);
+ ROUND3(B, C, D, A, 10, Sl, 40);
+ ROUND3(A, B, C, D, 13, Si, 41);
+ ROUND3(D, A, B, C, 0, Sj, 42);
+ ROUND3(C, D, A, B, 3, Sk, 43);
+ ROUND3(B, C, D, A, 6, Sl, 44);
+ ROUND3(A, B, C, D, 9, Si, 45);
+ ROUND3(D, A, B, C, 12, Sj, 46);
+ ROUND3(C, D, A, B, 15, Sk, 47);
+ ROUND3(B, C, D, A, 2, Sl, 48);
+
+ ROUND4(A, B, C, D, 0, Sm, 49);
+ ROUND4(D, A, B, C, 7, Sn, 50);
+ ROUND4(C, D, A, B, 14, So, 51);
+ ROUND4(B, C, D, A, 5, Sp, 52);
+ ROUND4(A, B, C, D, 12, Sm, 53);
+ ROUND4(D, A, B, C, 3, Sn, 54);
+ ROUND4(C, D, A, B, 10, So, 55);
+ ROUND4(B, C, D, A, 1, Sp, 56);
+ ROUND4(A, B, C, D, 8, Sm, 57);
+ ROUND4(D, A, B, C, 15, Sn, 58);
+ ROUND4(C, D, A, B, 6, So, 59);
+ ROUND4(B, C, D, A, 13, Sp, 60);
+ ROUND4(A, B, C, D, 4, Sm, 61);
+ ROUND4(D, A, B, C, 11, Sn, 62);
+ ROUND4(C, D, A, B, 2, So, 63);
+ ROUND4(B, C, D, A, 9, Sp, 64);
+
+ ctx->md5_sta += A;
+ ctx->md5_stb += B;
+ ctx->md5_stc += C;
+ ctx->md5_std += D;
+}
+
+static void
+md5_pad(pg_md5_ctx *ctx)
+{
+ unsigned int gap;
+
+ /* Don't count up padding. Keep md5_n. */
+ gap = MD5_BUFLEN - ctx->md5_i;
+ if (gap > 8)
+ {
+ memmove(ctx->md5_buf + ctx->md5_i, md5_paddat,
+ gap - sizeof(ctx->md5_n));
+ }
+ else
+ {
+ /* including gap == 8 */
+ memmove(ctx->md5_buf + ctx->md5_i, md5_paddat, gap);
+ md5_calc(ctx->md5_buf, ctx);
+ memmove(ctx->md5_buf, md5_paddat + gap,
+ MD5_BUFLEN - sizeof(ctx->md5_n));
+ }
+
+ /* 8 byte word */
+#ifndef WORDS_BIGENDIAN
+ memmove(&ctx->md5_buf[56], &ctx->md5_n8[0], 8);
+#else
+ ctx->md5_buf[56] = ctx->md5_n8[7];
+ ctx->md5_buf[57] = ctx->md5_n8[6];
+ ctx->md5_buf[58] = ctx->md5_n8[5];
+ ctx->md5_buf[59] = ctx->md5_n8[4];
+ ctx->md5_buf[60] = ctx->md5_n8[3];
+ ctx->md5_buf[61] = ctx->md5_n8[2];
+ ctx->md5_buf[62] = ctx->md5_n8[1];
+ ctx->md5_buf[63] = ctx->md5_n8[0];
+#endif
+
+ md5_calc(ctx->md5_buf, ctx);
+}
+
+static void
+md5_result(uint8 *digest, pg_md5_ctx *ctx)
+{
+ /* 4 byte words */
+#ifndef WORDS_BIGENDIAN
+ memmove(digest, &ctx->md5_st8[0], 16);
+#else
+ digest[0] = ctx->md5_st8[3];
+ digest[1] = ctx->md5_st8[2];
+ digest[2] = ctx->md5_st8[1];
+ digest[3] = ctx->md5_st8[0];
+ digest[4] = ctx->md5_st8[7];
+ digest[5] = ctx->md5_st8[6];
+ digest[6] = ctx->md5_st8[5];
+ digest[7] = ctx->md5_st8[4];
+ digest[8] = ctx->md5_st8[11];
+ digest[9] = ctx->md5_st8[10];
+ digest[10] = ctx->md5_st8[9];
+ digest[11] = ctx->md5_st8[8];
+ digest[12] = ctx->md5_st8[15];
+ digest[13] = ctx->md5_st8[14];
+ digest[14] = ctx->md5_st8[13];
+ digest[15] = ctx->md5_st8[12];
+#endif
+}
+
+
+/* External routines for this MD5 implementation */
+
+/*
+ * pg_md5_init
+ *
+ * Initialize a MD5 context.
+ */
+void
+pg_md5_init(pg_md5_ctx *ctx)
+{
+ ctx->md5_n = 0;
+ ctx->md5_i = 0;
+ ctx->md5_sta = MD5_A0;
+ ctx->md5_stb = MD5_B0;
+ ctx->md5_stc = MD5_C0;
+ ctx->md5_std = MD5_D0;
+ memset(ctx->md5_buf, 0, sizeof(ctx->md5_buf));
+}
+
+
+/*
+ * pg_md5_update
+ *
+ * Update a MD5 context.
+ */
+void
+pg_md5_update(pg_md5_ctx *ctx, const uint8 *data, size_t len)
+{
+ unsigned int gap,
+ i;
+
+ ctx->md5_n += len * 8; /* byte to bit */
+ gap = MD5_BUFLEN - ctx->md5_i;
+
+ if (len >= gap)
+ {
+ memmove(ctx->md5_buf + ctx->md5_i, data, gap);
+ md5_calc(ctx->md5_buf, ctx);
+
+ for (i = gap; i + MD5_BUFLEN <= len; i += MD5_BUFLEN)
+ md5_calc(data + i, ctx);
+
+ ctx->md5_i = len - i;
+ memmove(ctx->md5_buf, data + i, ctx->md5_i);
+ }
+ else
+ {
+ memmove(ctx->md5_buf + ctx->md5_i, data, len);
+ ctx->md5_i += len;
+ }
+}
+
+/*
+ * pg_md5_final
+ *
+ * Finalize a MD5 context.
+ */
+void
+pg_md5_final(pg_md5_ctx *ctx, uint8 *dest)
+{
+ md5_pad(ctx);
+ md5_result(dest, ctx);
+}
diff --git a/src/common/md5_common.c b/src/common/md5_common.c
new file mode 100644
index 0000000..82ce75d
--- /dev/null
+++ b/src/common/md5_common.c
@@ -0,0 +1,172 @@
+/*-------------------------------------------------------------------------
+ *
+ * md5_common.c
+ * Routines shared between all MD5 implementations used for encrypted
+ * passwords.
+ *
+ * Sverre H. Huseby <sverrehu@online.no>
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/md5_common.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/cryptohash.h"
+#include "common/md5.h"
+
+static void
+bytesToHex(uint8 b[16], char *s)
+{
+ static const char *hex = "0123456789abcdef";
+ int q,
+ w;
+
+ for (q = 0, w = 0; q < 16; q++)
+ {
+ s[w++] = hex[(b[q] >> 4) & 0x0F];
+ s[w++] = hex[b[q] & 0x0F];
+ }
+ s[w] = '\0';
+}
+
+/*
+ * pg_md5_hash
+ *
+ * Calculates the MD5 sum of the bytes in a buffer.
+ *
+ * SYNOPSIS #include "md5.h"
+ * int pg_md5_hash(const void *buff, size_t len, char *hexsum)
+ *
+ * INPUT buff the buffer containing the bytes that you want
+ * the MD5 sum of.
+ * len number of bytes in the buffer.
+ *
+ * OUTPUT hexsum the MD5 sum as a '\0'-terminated string of
+ * hexadecimal digits. an MD5 sum is 16 bytes long.
+ * each byte is represented by two hexadecimal
+ * characters. you thus need to provide an array
+ * of 33 characters, including the trailing '\0'.
+ *
+ * errstr filled with a constant-string error message
+ * on failure return; NULL on success.
+ *
+ * RETURNS false on failure (out of memory for internal buffers
+ * or MD5 computation failure) or true on success.
+ *
+ * STANDARDS MD5 is described in RFC 1321.
+ *
+ * AUTHOR Sverre H. Huseby <sverrehu@online.no>
+ *
+ */
+
+bool
+pg_md5_hash(const void *buff, size_t len, char *hexsum, const char **errstr)
+{
+ uint8 sum[MD5_DIGEST_LENGTH];
+ pg_cryptohash_ctx *ctx;
+
+ *errstr = NULL;
+ ctx = pg_cryptohash_create(PG_MD5);
+ if (ctx == NULL)
+ {
+ *errstr = pg_cryptohash_error(NULL); /* returns OOM */
+ return false;
+ }
+
+ if (pg_cryptohash_init(ctx) < 0 ||
+ pg_cryptohash_update(ctx, buff, len) < 0 ||
+ pg_cryptohash_final(ctx, sum, sizeof(sum)) < 0)
+ {
+ *errstr = pg_cryptohash_error(ctx);
+ pg_cryptohash_free(ctx);
+ return false;
+ }
+
+ bytesToHex(sum, hexsum);
+ pg_cryptohash_free(ctx);
+ return true;
+}
+
+/*
+ * pg_md5_binary
+ *
+ * As above, except that the MD5 digest is returned as a binary string
+ * (of size MD5_DIGEST_LENGTH) rather than being converted to ASCII hex.
+ */
+bool
+pg_md5_binary(const void *buff, size_t len, void *outbuf, const char **errstr)
+{
+ pg_cryptohash_ctx *ctx;
+
+ *errstr = NULL;
+ ctx = pg_cryptohash_create(PG_MD5);
+ if (ctx == NULL)
+ {
+ *errstr = pg_cryptohash_error(NULL); /* returns OOM */
+ return false;
+ }
+
+ if (pg_cryptohash_init(ctx) < 0 ||
+ pg_cryptohash_update(ctx, buff, len) < 0 ||
+ pg_cryptohash_final(ctx, outbuf, MD5_DIGEST_LENGTH) < 0)
+ {
+ *errstr = pg_cryptohash_error(ctx);
+ pg_cryptohash_free(ctx);
+ return false;
+ }
+
+ pg_cryptohash_free(ctx);
+ return true;
+}
+
+
+/*
+ * Computes MD5 checksum of "passwd" (a null-terminated string) followed
+ * by "salt" (which need not be null-terminated).
+ *
+ * Output format is "md5" followed by a 32-hex-digit MD5 checksum.
+ * Hence, the output buffer "buf" must be at least 36 bytes long.
+ *
+ * Returns true if okay, false on error with *errstr providing some
+ * error context.
+ */
+bool
+pg_md5_encrypt(const char *passwd, const char *salt, size_t salt_len,
+ char *buf, const char **errstr)
+{
+ size_t passwd_len = strlen(passwd);
+
+ /* +1 here is just to avoid risk of unportable malloc(0) */
+ char *crypt_buf = malloc(passwd_len + salt_len + 1);
+ bool ret;
+
+ if (!crypt_buf)
+ {
+ *errstr = _("out of memory");
+ return false;
+ }
+
+ /*
+ * Place salt at the end because it may be known by users trying to crack
+ * the MD5 output.
+ */
+ memcpy(crypt_buf, passwd, passwd_len);
+ memcpy(crypt_buf + passwd_len, salt, salt_len);
+
+ strcpy(buf, "md5");
+ ret = pg_md5_hash(crypt_buf, passwd_len + salt_len, buf + 3, errstr);
+
+ free(crypt_buf);
+
+ return ret;
+}
diff --git a/src/common/md5_int.h b/src/common/md5_int.h
new file mode 100644
index 0000000..63fd395
--- /dev/null
+++ b/src/common/md5_int.h
@@ -0,0 +1,85 @@
+/*-------------------------------------------------------------------------
+ *
+ * md5_int.h
+ * Internal headers for fallback implementation of MD5
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/md5_int.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* $KAME: md5.h,v 1.3 2000/02/22 14:01:18 itojun Exp $ */
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef PG_MD5_INT_H
+#define PG_MD5_INT_H
+
+#include "common/md5.h"
+
+#define MD5_BUFLEN 64
+
+/* Context data for MD5 */
+typedef struct
+{
+ union
+ {
+ uint32 md5_state32[4];
+ uint8 md5_state8[16];
+ } md5_st;
+
+#define md5_sta md5_st.md5_state32[0]
+#define md5_stb md5_st.md5_state32[1]
+#define md5_stc md5_st.md5_state32[2]
+#define md5_std md5_st.md5_state32[3]
+#define md5_st8 md5_st.md5_state8
+
+ union
+ {
+ uint64 md5_count64;
+ uint8 md5_count8[8];
+ } md5_count;
+#define md5_n md5_count.md5_count64
+#define md5_n8 md5_count.md5_count8
+
+ unsigned int md5_i;
+ uint8 md5_buf[MD5_BUFLEN];
+} pg_md5_ctx;
+
+/* Interface routines for MD5 */
+extern void pg_md5_init(pg_md5_ctx *ctx);
+extern void pg_md5_update(pg_md5_ctx *ctx, const uint8 *data, size_t len);
+extern void pg_md5_final(pg_md5_ctx *ctx, uint8 *dest);
+
+#endif /* PG_MD5_INT_H */
diff --git a/src/common/meson.build b/src/common/meson.build
new file mode 100644
index 0000000..9efc80a
--- /dev/null
+++ b/src/common/meson.build
@@ -0,0 +1,177 @@
+# Copyright (c) 2022-2023, PostgreSQL Global Development Group
+
+common_sources = files(
+ 'archive.c',
+ 'base64.c',
+ 'checksum_helper.c',
+ 'compression.c',
+ 'controldata_utils.c',
+ 'encnames.c',
+ 'exec.c',
+ 'file_perm.c',
+ 'file_utils.c',
+ 'hashfn.c',
+ 'ip.c',
+ 'jsonapi.c',
+ 'keywords.c',
+ 'kwlookup.c',
+ 'link-canary.c',
+ 'md5_common.c',
+ 'percentrepl.c',
+ 'pg_get_line.c',
+ 'pg_lzcompress.c',
+ 'pg_prng.c',
+ 'pgfnames.c',
+ 'psprintf.c',
+ 'relpath.c',
+ 'rmtree.c',
+ 'saslprep.c',
+ 'scram-common.c',
+ 'string.c',
+ 'stringinfo.c',
+ 'unicode_norm.c',
+ 'username.c',
+ 'wait_error.c',
+ 'wchar.c',
+)
+
+if ssl.found()
+ common_sources += files(
+ 'cryptohash_openssl.c',
+ 'hmac_openssl.c',
+ 'protocol_openssl.c',
+ )
+else
+ common_sources += files(
+ 'cryptohash.c',
+ 'hmac.c',
+ 'md5.c',
+ 'sha1.c',
+ 'sha2.c',
+ )
+endif
+
+common_kwlist = custom_target('kwlist',
+ input: files('../include/parser/kwlist.h'),
+ output: 'kwlist_d.h',
+ depend_files: gen_kwlist_deps,
+ command: [gen_kwlist_cmd, '--extern'])
+generated_sources += common_kwlist
+common_sources += common_kwlist
+
+# The code imported from Ryu gets a pass on declaration-after-statement,
+# in order to keep it more closely aligned with its upstream.
+ryu_sources = files(
+ 'd2s.c',
+ 'f2s.c',
+)
+ryu_cflags = []
+
+ryu_cflags += cflags_no_decl_after_statement
+
+config_info_sources = files('config_info.c',)
+config_info_cflags = [
+ '-DVAL_CC="@0@"'.format(var_cc),
+ '-DVAL_CPPFLAGS="@0@"'.format(var_cppflags),
+ '-DVAL_CFLAGS="@0@"'.format(var_cflags),
+ '-DVAL_CFLAGS_SL="@0@"'.format(var_cflags_sl),
+ '-DVAL_LDFLAGS="@0@"'.format(var_ldflags),
+ '-DVAL_LDFLAGS_EX="@0@"'.format(var_ldflags_ex),
+ '-DVAL_LDFLAGS_SL="@0@"'.format(var_ldflags_sl),
+ '-DVAL_LIBS="@0@"'.format(var_libs),
+]
+
+# Some files need to be built with different cflags. The different sets are
+# defined here.
+common_cflags = {
+ 'ryu': ryu_cflags,
+ 'config_info': config_info_cflags,
+}
+common_sources_cflags = {
+ 'ryu': ryu_sources,
+ 'config_info': config_info_sources
+}
+
+
+# A few files are currently only built for frontend, not server
+# (Mkvcbuild.pm has a copy of this list, too). logging.c is excluded
+# from OBJS_FRONTEND_SHLIB (shared library) as a matter of policy,
+# because it is not appropriate for general purpose libraries such
+# as libpq to report errors directly.
+
+common_sources_frontend_shlib = common_sources
+common_sources_frontend_shlib += files(
+ 'fe_memutils.c',
+ 'restricted_token.c',
+ 'sprompt.c',
+)
+
+common_sources_frontend_static = common_sources_frontend_shlib
+common_sources_frontend_static += files(
+ 'logging.c',
+)
+
+# Build pgport once for backend, once for use in frontend binaries, and once
+# for use in shared libraries
+#
+# XXX: in most environments we could probably link_whole pgcommon_shlib
+# against pgcommon_static, instead of compiling twice.
+#
+# For the server build of pgcommon, depend on lwlocknames_h, because at least
+# cryptohash_openssl.c, hmac_openssl.c depend on it. That's arguably a
+# layering violation, but ...
+pgcommon = {}
+pgcommon_variants = {
+ '_srv': internal_lib_args + {
+ 'sources': common_sources + [lwlocknames_h],
+ 'dependencies': [backend_common_code],
+ },
+ '': default_lib_args + {
+ 'sources': common_sources_frontend_static,
+ 'dependencies': [frontend_common_code],
+ },
+ '_shlib': default_lib_args + {
+ 'pic': true,
+ 'sources': common_sources_frontend_shlib,
+ 'dependencies': [frontend_common_code],
+ },
+}
+
+foreach name, opts : pgcommon_variants
+
+ # Build internal static libraries for sets of files that need to be built
+ # with different cflags
+ cflag_libs = []
+ foreach cflagname, sources : common_sources_cflags
+ if sources.length() == 0
+ continue
+ endif
+ c_args = opts.get('c_args', []) + common_cflags[cflagname]
+ cflag_libs += static_library('libpgcommon@0@_@1@'.format(name, cflagname),
+ c_pch: pch_c_h,
+ include_directories: include_directories('.'),
+ kwargs: opts + {
+ 'sources': sources,
+ 'c_args': c_args,
+ 'build_by_default': false,
+ 'install': false,
+ },
+ )
+ endforeach
+
+ lib = static_library('libpgcommon@0@'.format(name),
+ link_with: cflag_libs,
+ c_pch: pch_c_h,
+ include_directories: include_directories('.'),
+ kwargs: opts + {
+ 'dependencies': opts['dependencies'] + [ssl],
+ }
+ )
+ pgcommon += {name: lib}
+endforeach
+
+common_srv = pgcommon['_srv']
+common_shlib = pgcommon['_shlib']
+common_static = pgcommon['']
+
+subdir('unicode')
diff --git a/src/common/percentrepl.c b/src/common/percentrepl.c
new file mode 100644
index 0000000..7aa85fd
--- /dev/null
+++ b/src/common/percentrepl.c
@@ -0,0 +1,137 @@
+/*-------------------------------------------------------------------------
+ *
+ * percentrepl.c
+ * Common routines to replace percent placeholders in strings
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/percentrepl.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#include "common/logging.h"
+#endif
+
+#include "common/percentrepl.h"
+#include "lib/stringinfo.h"
+
+/*
+ * replace_percent_placeholders
+ *
+ * Replace percent-letter placeholders in input string with the supplied
+ * values. For example, to replace %f with foo and %b with bar, call
+ *
+ * replace_percent_placeholders(instr, "param_name", "bf", bar, foo);
+ *
+ * The return value is palloc'd.
+ *
+ * "%%" is replaced by a single "%".
+ *
+ * This throws an error for an unsupported placeholder or a "%" at the end of
+ * the input string.
+ *
+ * A value may be NULL. If the corresponding placeholder is found in the
+ * input string, it will be treated as if an unsupported placeholder was used.
+ * This allows callers to share a "letters" specification but vary the
+ * actually supported placeholders at run time.
+ *
+ * This functions is meant for cases where all the values are readily
+ * available or cheap to compute and most invocations will use most values
+ * (for example for archive_command). Also, it requires that all values are
+ * strings. It won't be a good match for things like log prefixes or prompts
+ * that use a mix of data types and any invocation will only use a few of the
+ * possible values.
+ *
+ * param_name is the name of the underlying GUC parameter, for error
+ * reporting. At the moment, this function is only used for GUC parameters.
+ * If other kinds of uses were added, the error reporting would need to be
+ * revised.
+ */
+char *
+replace_percent_placeholders(const char *instr, const char *param_name, const char *letters,...)
+{
+ StringInfoData result;
+
+ initStringInfo(&result);
+
+ for (const char *sp = instr; *sp; sp++)
+ {
+ if (*sp == '%')
+ {
+ if (sp[1] == '%')
+ {
+ /* Convert %% to a single % */
+ sp++;
+ appendStringInfoChar(&result, *sp);
+ }
+ else if (sp[1] == '\0')
+ {
+ /* Incomplete escape sequence, expected a character afterward */
+#ifdef FRONTEND
+ pg_log_error("invalid value for parameter \"%s\": \"%s\"", param_name, instr);
+ pg_log_error_detail("String ends unexpectedly after escape character \"%%\".");
+ exit(1);
+#else
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": \"%s\"", param_name, instr),
+ errdetail("String ends unexpectedly after escape character \"%%\"."));
+#endif
+ }
+ else
+ {
+ /* Look up placeholder character */
+ bool found = false;
+ va_list ap;
+
+ sp++;
+
+ va_start(ap, letters);
+ for (const char *lp = letters; *lp; lp++)
+ {
+ char *val = va_arg(ap, char *);
+
+ if (*sp == *lp)
+ {
+ if (val)
+ {
+ appendStringInfoString(&result, val);
+ found = true;
+ }
+ /* If val is NULL, we will report an error. */
+ break;
+ }
+ }
+ va_end(ap);
+ if (!found)
+ {
+ /* Unknown placeholder */
+#ifdef FRONTEND
+ pg_log_error("invalid value for parameter \"%s\": \"%s\"", param_name, instr);
+ pg_log_error_detail("String contains unexpected placeholder \"%%%c\".", *sp);
+ exit(1);
+#else
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": \"%s\"", param_name, instr),
+ errdetail("String contains unexpected placeholder \"%%%c\".", *sp));
+#endif
+ }
+ }
+ }
+ else
+ {
+ appendStringInfoChar(&result, *sp);
+ }
+ }
+
+ return result.data;
+}
diff --git a/src/common/pg_get_line.c b/src/common/pg_get_line.c
new file mode 100644
index 0000000..3cdf090
--- /dev/null
+++ b/src/common/pg_get_line.c
@@ -0,0 +1,180 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_get_line.c
+ * fgets() with an expansible result buffer
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/pg_get_line.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <setjmp.h>
+
+#include "common/string.h"
+#include "lib/stringinfo.h"
+
+
+/*
+ * pg_get_line()
+ *
+ * This is meant to be equivalent to fgets(), except that instead of
+ * reading into a caller-supplied, fixed-size buffer, it reads into
+ * a palloc'd (in frontend, really malloc'd) string, which is resized
+ * as needed to handle indefinitely long input lines. The caller is
+ * responsible for pfree'ing the result string when appropriate.
+ *
+ * As with fgets(), returns NULL if there is a read error or if no
+ * characters are available before EOF. The caller can distinguish
+ * these cases by checking ferror(stream).
+ *
+ * Since this is meant to be equivalent to fgets(), the trailing newline
+ * (if any) is not stripped. Callers may wish to apply pg_strip_crlf().
+ *
+ * Note that while I/O errors are reflected back to the caller to be
+ * dealt with, an OOM condition for the palloc'd buffer will not be;
+ * there'll be an ereport(ERROR) or exit(1) inside stringinfo.c.
+ *
+ * Also note that the palloc'd buffer is usually a lot longer than
+ * strictly necessary, so it may be inadvisable to use this function
+ * to collect lots of long-lived data. A less memory-hungry option
+ * is to use pg_get_line_buf() or pg_get_line_append() in a loop,
+ * then pstrdup() each line.
+ *
+ * prompt_ctx can optionally be provided to allow this function to be
+ * canceled via an existing SIGINT signal handler that will longjmp to the
+ * specified place only when *(prompt_ctx->enabled) is true. If canceled,
+ * this function returns NULL, and prompt_ctx->canceled is set to true.
+ */
+char *
+pg_get_line(FILE *stream, PromptInterruptContext *prompt_ctx)
+{
+ StringInfoData buf;
+
+ initStringInfo(&buf);
+
+ if (!pg_get_line_append(stream, &buf, prompt_ctx))
+ {
+ /* ensure that free() doesn't mess up errno */
+ int save_errno = errno;
+
+ pfree(buf.data);
+ errno = save_errno;
+ return NULL;
+ }
+
+ return buf.data;
+}
+
+/*
+ * pg_get_line_buf()
+ *
+ * This has similar behavior to pg_get_line(), and thence to fgets(),
+ * except that the collected data is returned in a caller-supplied
+ * StringInfo buffer. This is a convenient API for code that just
+ * wants to read and process one line at a time, without any artificial
+ * limit on line length.
+ *
+ * Returns true if a line was successfully collected (including the
+ * case of a non-newline-terminated line at EOF). Returns false if
+ * there was an I/O error or no data was available before EOF.
+ * (Check ferror(stream) to distinguish these cases.)
+ *
+ * In the false-result case, buf is reset to empty.
+ */
+bool
+pg_get_line_buf(FILE *stream, StringInfo buf)
+{
+ /* We just need to drop any data from the previous call */
+ resetStringInfo(buf);
+ return pg_get_line_append(stream, buf, NULL);
+}
+
+/*
+ * pg_get_line_append()
+ *
+ * This has similar behavior to pg_get_line(), and thence to fgets(),
+ * except that the collected data is appended to whatever is in *buf.
+ * This is useful in preference to pg_get_line_buf() if the caller wants
+ * to merge some lines together, e.g. to implement backslash continuation.
+ *
+ * Returns true if a line was successfully collected (including the
+ * case of a non-newline-terminated line at EOF). Returns false if
+ * there was an I/O error or no data was available before EOF.
+ * (Check ferror(stream) to distinguish these cases.)
+ *
+ * In the false-result case, the contents of *buf are logically unmodified,
+ * though it's possible that the buffer has been resized.
+ *
+ * prompt_ctx can optionally be provided to allow this function to be
+ * canceled via an existing SIGINT signal handler that will longjmp to the
+ * specified place only when *(prompt_ctx->enabled) is true. If canceled,
+ * this function returns false, and prompt_ctx->canceled is set to true.
+ */
+bool
+pg_get_line_append(FILE *stream, StringInfo buf,
+ PromptInterruptContext *prompt_ctx)
+{
+ int orig_len = buf->len;
+
+ if (prompt_ctx && sigsetjmp(*((sigjmp_buf *) prompt_ctx->jmpbuf), 1) != 0)
+ {
+ /* Got here with longjmp */
+ prompt_ctx->canceled = true;
+ /* Discard any data we collected before detecting error */
+ buf->len = orig_len;
+ buf->data[orig_len] = '\0';
+ return false;
+ }
+
+ /* Loop until newline or EOF/error */
+ for (;;)
+ {
+ char *res;
+
+ /* Enable longjmp while waiting for input */
+ if (prompt_ctx)
+ *(prompt_ctx->enabled) = true;
+
+ /* Read some data, appending it to whatever we already have */
+ res = fgets(buf->data + buf->len, buf->maxlen - buf->len, stream);
+
+ /* Disable longjmp again, then break if fgets failed */
+ if (prompt_ctx)
+ *(prompt_ctx->enabled) = false;
+
+ if (res == NULL)
+ break;
+
+ /* Got data, so update buf->len */
+ buf->len += strlen(buf->data + buf->len);
+
+ /* Done if we have collected a newline */
+ if (buf->len > orig_len && buf->data[buf->len - 1] == '\n')
+ return true;
+
+ /* Make some more room in the buffer, and loop to read more data */
+ enlargeStringInfo(buf, 128);
+ }
+
+ /* Check for I/O errors and EOF */
+ if (ferror(stream) || buf->len == orig_len)
+ {
+ /* Discard any data we collected before detecting error */
+ buf->len = orig_len;
+ buf->data[orig_len] = '\0';
+ return false;
+ }
+
+ /* No newline at EOF, but we did collect some data */
+ return true;
+}
diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c
new file mode 100644
index 0000000..95ad338
--- /dev/null
+++ b/src/common/pg_lzcompress.c
@@ -0,0 +1,876 @@
+/* ----------
+ * pg_lzcompress.c -
+ *
+ * This is an implementation of LZ compression for PostgreSQL.
+ * It uses a simple history table and generates 2-3 byte tags
+ * capable of backward copy information for 3-273 bytes with
+ * a max offset of 4095.
+ *
+ * Entry routines:
+ *
+ * int32
+ * pglz_compress(const char *source, int32 slen, char *dest,
+ * const PGLZ_Strategy *strategy);
+ *
+ * source is the input data to be compressed.
+ *
+ * slen is the length of the input data.
+ *
+ * dest is the output area for the compressed result.
+ * It must be at least as big as PGLZ_MAX_OUTPUT(slen).
+ *
+ * strategy is a pointer to some information controlling
+ * the compression algorithm. If NULL, the compiled
+ * in default strategy is used.
+ *
+ * The return value is the number of bytes written in the
+ * buffer dest, or -1 if compression fails; in the latter
+ * case the contents of dest are undefined.
+ *
+ * int32
+ * pglz_decompress(const char *source, int32 slen, char *dest,
+ * int32 rawsize, bool check_complete)
+ *
+ * source is the compressed input.
+ *
+ * slen is the length of the compressed input.
+ *
+ * dest is the area where the uncompressed data will be
+ * written to. It is the callers responsibility to
+ * provide enough space.
+ *
+ * The data is written to buff exactly as it was handed
+ * to pglz_compress(). No terminating zero byte is added.
+ *
+ * rawsize is the length of the uncompressed data.
+ *
+ * check_complete is a flag to let us know if -1 should be
+ * returned in cases where we don't reach the end of the
+ * source or dest buffers, or not. This should be false
+ * if the caller is asking for only a partial result and
+ * true otherwise.
+ *
+ * The return value is the number of bytes written in the
+ * buffer dest, or -1 if decompression fails.
+ *
+ * The decompression algorithm and internal data format:
+ *
+ * It is made with the compressed data itself.
+ *
+ * The data representation is easiest explained by describing
+ * the process of decompression.
+ *
+ * If compressed_size == rawsize, then the data
+ * is stored uncompressed as plain bytes. Thus, the decompressor
+ * simply copies rawsize bytes to the destination.
+ *
+ * Otherwise the first byte tells what to do the next 8 times.
+ * We call this the control byte.
+ *
+ * An unset bit in the control byte means, that one uncompressed
+ * byte follows, which is copied from input to output.
+ *
+ * A set bit in the control byte means, that a tag of 2-3 bytes
+ * follows. A tag contains information to copy some bytes, that
+ * are already in the output buffer, to the current location in
+ * the output. Let's call the three tag bytes T1, T2 and T3. The
+ * position of the data to copy is coded as an offset from the
+ * actual output position.
+ *
+ * The offset is in the upper nibble of T1 and in T2.
+ * The length is in the lower nibble of T1.
+ *
+ * So the 16 bits of a 2 byte tag are coded as
+ *
+ * 7---T1--0 7---T2--0
+ * OOOO LLLL OOOO OOOO
+ *
+ * This limits the offset to 1-4095 (12 bits) and the length
+ * to 3-18 (4 bits) because 3 is always added to it. To emit
+ * a tag of 2 bytes with a length of 2 only saves one control
+ * bit. But we lose one byte in the possible length of a tag.
+ *
+ * In the actual implementation, the 2 byte tag's length is
+ * limited to 3-17, because the value 0xF in the length nibble
+ * has special meaning. It means, that the next following
+ * byte (T3) has to be added to the length value of 18. That
+ * makes total limits of 1-4095 for offset and 3-273 for length.
+ *
+ * Now that we have successfully decoded a tag. We simply copy
+ * the output that occurred <offset> bytes back to the current
+ * output location in the specified <length>. Thus, a
+ * sequence of 200 spaces (think about bpchar fields) could be
+ * coded in 4 bytes. One literal space and a three byte tag to
+ * copy 199 bytes with a -1 offset. Whow - that's a compression
+ * rate of 98%! Well, the implementation needs to save the
+ * original data size too, so we need another 4 bytes for it
+ * and end up with a total compression rate of 96%, what's still
+ * worth a Whow.
+ *
+ * The compression algorithm
+ *
+ * The following uses numbers used in the default strategy.
+ *
+ * The compressor works best for attributes of a size between
+ * 1K and 1M. For smaller items there's not that much chance of
+ * redundancy in the character sequence (except for large areas
+ * of identical bytes like trailing spaces) and for bigger ones
+ * our 4K maximum look-back distance is too small.
+ *
+ * The compressor creates a table for lists of positions.
+ * For each input position (except the last 3), a hash key is
+ * built from the 4 next input bytes and the position remembered
+ * in the appropriate list. Thus, the table points to linked
+ * lists of likely to be at least in the first 4 characters
+ * matching strings. This is done on the fly while the input
+ * is compressed into the output area. Table entries are only
+ * kept for the last 4096 input positions, since we cannot use
+ * back-pointers larger than that anyway. The size of the hash
+ * table is chosen based on the size of the input - a larger table
+ * has a larger startup cost, as it needs to be initialized to
+ * zero, but reduces the number of hash collisions on long inputs.
+ *
+ * For each byte in the input, its hash key (built from this
+ * byte and the next 3) is used to find the appropriate list
+ * in the table. The lists remember the positions of all bytes
+ * that had the same hash key in the past in increasing backward
+ * offset order. Now for all entries in the used lists, the
+ * match length is computed by comparing the characters from the
+ * entries position with the characters from the actual input
+ * position.
+ *
+ * The compressor starts with a so called "good_match" of 128.
+ * It is a "prefer speed against compression ratio" optimizer.
+ * So if the first entry looked at already has 128 or more
+ * matching characters, the lookup stops and that position is
+ * used for the next tag in the output.
+ *
+ * For each subsequent entry in the history list, the "good_match"
+ * is lowered by 10%. So the compressor will be more happy with
+ * short matches the further it has to go back in the history.
+ * Another "speed against ratio" preference characteristic of
+ * the algorithm.
+ *
+ * Thus there are 3 stop conditions for the lookup of matches:
+ *
+ * - a match >= good_match is found
+ * - there are no more history entries to look at
+ * - the next history entry is already too far back
+ * to be coded into a tag.
+ *
+ * Finally the match algorithm checks that at least a match
+ * of 3 or more bytes has been found, because that is the smallest
+ * amount of copy information to code into a tag. If so, a tag
+ * is omitted and all the input bytes covered by that are just
+ * scanned for the history add's, otherwise a literal character
+ * is omitted and only his history entry added.
+ *
+ * Acknowledgments:
+ *
+ * Many thanks to Adisak Pochanayon, who's article about SLZ
+ * inspired me to write the PostgreSQL compression this way.
+ *
+ * Jan Wieck
+ *
+ * Copyright (c) 1999-2023, PostgreSQL Global Development Group
+ *
+ * src/common/pg_lzcompress.c
+ * ----------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <limits.h>
+
+#include "common/pg_lzcompress.h"
+
+
+/* ----------
+ * Local definitions
+ * ----------
+ */
+#define PGLZ_MAX_HISTORY_LISTS 8192 /* must be power of 2 */
+#define PGLZ_HISTORY_SIZE 4096
+#define PGLZ_MAX_MATCH 273
+
+
+/* ----------
+ * PGLZ_HistEntry -
+ *
+ * Linked list for the backward history lookup
+ *
+ * All the entries sharing a hash key are linked in a doubly linked list.
+ * This makes it easy to remove an entry when it's time to recycle it
+ * (because it's more than 4K positions old).
+ * ----------
+ */
+typedef struct PGLZ_HistEntry
+{
+ struct PGLZ_HistEntry *next; /* links for my hash key's list */
+ struct PGLZ_HistEntry *prev;
+ int hindex; /* my current hash key */
+ const char *pos; /* my input position */
+} PGLZ_HistEntry;
+
+
+/* ----------
+ * The provided standard strategies
+ * ----------
+ */
+static const PGLZ_Strategy strategy_default_data = {
+ 32, /* Data chunks less than 32 bytes are not
+ * compressed */
+ INT_MAX, /* No upper limit on what we'll try to
+ * compress */
+ 25, /* Require 25% compression rate, or not worth
+ * it */
+ 1024, /* Give up if no compression in the first 1KB */
+ 128, /* Stop history lookup if a match of 128 bytes
+ * is found */
+ 10 /* Lower good match size by 10% at every loop
+ * iteration */
+};
+const PGLZ_Strategy *const PGLZ_strategy_default = &strategy_default_data;
+
+
+static const PGLZ_Strategy strategy_always_data = {
+ 0, /* Chunks of any size are compressed */
+ INT_MAX,
+ 0, /* It's enough to save one single byte */
+ INT_MAX, /* Never give up early */
+ 128, /* Stop history lookup if a match of 128 bytes
+ * is found */
+ 6 /* Look harder for a good match */
+};
+const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data;
+
+
+/* ----------
+ * Statically allocated work arrays for history
+ * ----------
+ */
+static int16 hist_start[PGLZ_MAX_HISTORY_LISTS];
+static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE + 1];
+
+/*
+ * Element 0 in hist_entries is unused, and means 'invalid'. Likewise,
+ * INVALID_ENTRY_PTR in next/prev pointers mean 'invalid'.
+ */
+#define INVALID_ENTRY 0
+#define INVALID_ENTRY_PTR (&hist_entries[INVALID_ENTRY])
+
+/* ----------
+ * pglz_hist_idx -
+ *
+ * Computes the history table slot for the lookup by the next 4
+ * characters in the input.
+ *
+ * NB: because we use the next 4 characters, we are not guaranteed to
+ * find 3-character matches; they very possibly will be in the wrong
+ * hash list. This seems an acceptable tradeoff for spreading out the
+ * hash keys more.
+ * ----------
+ */
+#define pglz_hist_idx(_s,_e, _mask) ( \
+ ((((_e) - (_s)) < 4) ? (int) (_s)[0] : \
+ (((_s)[0] << 6) ^ ((_s)[1] << 4) ^ \
+ ((_s)[2] << 2) ^ (_s)[3])) & (_mask) \
+ )
+
+
+/* ----------
+ * pglz_hist_add -
+ *
+ * Adds a new entry to the history table.
+ *
+ * If _recycle is true, then we are recycling a previously used entry,
+ * and must first delink it from its old hashcode's linked list.
+ *
+ * NOTE: beware of multiple evaluations of macro's arguments, and note that
+ * _hn and _recycle are modified in the macro.
+ * ----------
+ */
+#define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e, _mask) \
+do { \
+ int __hindex = pglz_hist_idx((_s),(_e), (_mask)); \
+ int16 *__myhsp = &(_hs)[__hindex]; \
+ PGLZ_HistEntry *__myhe = &(_he)[_hn]; \
+ if (_recycle) { \
+ if (__myhe->prev == NULL) \
+ (_hs)[__myhe->hindex] = __myhe->next - (_he); \
+ else \
+ __myhe->prev->next = __myhe->next; \
+ if (__myhe->next != NULL) \
+ __myhe->next->prev = __myhe->prev; \
+ } \
+ __myhe->next = &(_he)[*__myhsp]; \
+ __myhe->prev = NULL; \
+ __myhe->hindex = __hindex; \
+ __myhe->pos = (_s); \
+ /* If there was an existing entry in this hash slot, link */ \
+ /* this new entry to it. However, the 0th entry in the */ \
+ /* entries table is unused, so we can freely scribble on it. */ \
+ /* So don't bother checking if the slot was used - we'll */ \
+ /* scribble on the unused entry if it was not, but that's */ \
+ /* harmless. Avoiding the branch in this critical path */ \
+ /* speeds this up a little bit. */ \
+ /* if (*__myhsp != INVALID_ENTRY) */ \
+ (_he)[(*__myhsp)].prev = __myhe; \
+ *__myhsp = _hn; \
+ if (++(_hn) >= PGLZ_HISTORY_SIZE + 1) { \
+ (_hn) = 1; \
+ (_recycle) = true; \
+ } \
+} while (0)
+
+
+/* ----------
+ * pglz_out_ctrl -
+ *
+ * Outputs the last and allocates a new control byte if needed.
+ * ----------
+ */
+#define pglz_out_ctrl(__ctrlp,__ctrlb,__ctrl,__buf) \
+do { \
+ if ((__ctrl & 0xff) == 0) \
+ { \
+ *(__ctrlp) = __ctrlb; \
+ __ctrlp = (__buf)++; \
+ __ctrlb = 0; \
+ __ctrl = 1; \
+ } \
+} while (0)
+
+
+/* ----------
+ * pglz_out_literal -
+ *
+ * Outputs a literal byte to the destination buffer including the
+ * appropriate control bit.
+ * ----------
+ */
+#define pglz_out_literal(_ctrlp,_ctrlb,_ctrl,_buf,_byte) \
+do { \
+ pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \
+ *(_buf)++ = (unsigned char)(_byte); \
+ _ctrl <<= 1; \
+} while (0)
+
+
+/* ----------
+ * pglz_out_tag -
+ *
+ * Outputs a backward reference tag of 2-4 bytes (depending on
+ * offset and length) to the destination buffer including the
+ * appropriate control bit.
+ * ----------
+ */
+#define pglz_out_tag(_ctrlp,_ctrlb,_ctrl,_buf,_len,_off) \
+do { \
+ pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \
+ _ctrlb |= _ctrl; \
+ _ctrl <<= 1; \
+ if (_len > 17) \
+ { \
+ (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | 0x0f); \
+ (_buf)[1] = (unsigned char)(((_off) & 0xff)); \
+ (_buf)[2] = (unsigned char)((_len) - 18); \
+ (_buf) += 3; \
+ } else { \
+ (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | ((_len) - 3)); \
+ (_buf)[1] = (unsigned char)((_off) & 0xff); \
+ (_buf) += 2; \
+ } \
+} while (0)
+
+
+/* ----------
+ * pglz_find_match -
+ *
+ * Lookup the history table if the actual input stream matches
+ * another sequence of characters, starting somewhere earlier
+ * in the input buffer.
+ * ----------
+ */
+static inline int
+pglz_find_match(int16 *hstart, const char *input, const char *end,
+ int *lenp, int *offp, int good_match, int good_drop, int mask)
+{
+ PGLZ_HistEntry *hent;
+ int16 hentno;
+ int32 len = 0;
+ int32 off = 0;
+
+ /*
+ * Traverse the linked history list until a good enough match is found.
+ */
+ hentno = hstart[pglz_hist_idx(input, end, mask)];
+ hent = &hist_entries[hentno];
+ while (hent != INVALID_ENTRY_PTR)
+ {
+ const char *ip = input;
+ const char *hp = hent->pos;
+ int32 thisoff;
+ int32 thislen;
+
+ /*
+ * Stop if the offset does not fit into our tag anymore.
+ */
+ thisoff = ip - hp;
+ if (thisoff >= 0x0fff)
+ break;
+
+ /*
+ * Determine length of match. A better match must be larger than the
+ * best so far. And if we already have a match of 16 or more bytes,
+ * it's worth the call overhead to use memcmp() to check if this match
+ * is equal for the same size. After that we must fallback to
+ * character by character comparison to know the exact position where
+ * the diff occurred.
+ */
+ thislen = 0;
+ if (len >= 16)
+ {
+ if (memcmp(ip, hp, len) == 0)
+ {
+ thislen = len;
+ ip += len;
+ hp += len;
+ while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH)
+ {
+ thislen++;
+ ip++;
+ hp++;
+ }
+ }
+ }
+ else
+ {
+ while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH)
+ {
+ thislen++;
+ ip++;
+ hp++;
+ }
+ }
+
+ /*
+ * Remember this match as the best (if it is)
+ */
+ if (thislen > len)
+ {
+ len = thislen;
+ off = thisoff;
+ }
+
+ /*
+ * Advance to the next history entry
+ */
+ hent = hent->next;
+
+ /*
+ * Be happy with lesser good matches the more entries we visited. But
+ * no point in doing calculation if we're at end of list.
+ */
+ if (hent != INVALID_ENTRY_PTR)
+ {
+ if (len >= good_match)
+ break;
+ good_match -= (good_match * good_drop) / 100;
+ }
+ }
+
+ /*
+ * Return match information only if it results at least in one byte
+ * reduction.
+ */
+ if (len > 2)
+ {
+ *lenp = len;
+ *offp = off;
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/* ----------
+ * pglz_compress -
+ *
+ * Compresses source into dest using strategy. Returns the number of
+ * bytes written in buffer dest, or -1 if compression fails.
+ * ----------
+ */
+int32
+pglz_compress(const char *source, int32 slen, char *dest,
+ const PGLZ_Strategy *strategy)
+{
+ unsigned char *bp = (unsigned char *) dest;
+ unsigned char *bstart = bp;
+ int hist_next = 1;
+ bool hist_recycle = false;
+ const char *dp = source;
+ const char *dend = source + slen;
+ unsigned char ctrl_dummy = 0;
+ unsigned char *ctrlp = &ctrl_dummy;
+ unsigned char ctrlb = 0;
+ unsigned char ctrl = 0;
+ bool found_match = false;
+ int32 match_len;
+ int32 match_off;
+ int32 good_match;
+ int32 good_drop;
+ int32 result_size;
+ int32 result_max;
+ int32 need_rate;
+ int hashsz;
+ int mask;
+
+ /*
+ * Our fallback strategy is the default.
+ */
+ if (strategy == NULL)
+ strategy = PGLZ_strategy_default;
+
+ /*
+ * If the strategy forbids compression (at all or if source chunk size out
+ * of range), fail.
+ */
+ if (strategy->match_size_good <= 0 ||
+ slen < strategy->min_input_size ||
+ slen > strategy->max_input_size)
+ return -1;
+
+ /*
+ * Limit the match parameters to the supported range.
+ */
+ good_match = strategy->match_size_good;
+ if (good_match > PGLZ_MAX_MATCH)
+ good_match = PGLZ_MAX_MATCH;
+ else if (good_match < 17)
+ good_match = 17;
+
+ good_drop = strategy->match_size_drop;
+ if (good_drop < 0)
+ good_drop = 0;
+ else if (good_drop > 100)
+ good_drop = 100;
+
+ need_rate = strategy->min_comp_rate;
+ if (need_rate < 0)
+ need_rate = 0;
+ else if (need_rate > 99)
+ need_rate = 99;
+
+ /*
+ * Compute the maximum result size allowed by the strategy, namely the
+ * input size minus the minimum wanted compression rate. This had better
+ * be <= slen, else we might overrun the provided output buffer.
+ */
+ if (slen > (INT_MAX / 100))
+ {
+ /* Approximate to avoid overflow */
+ result_max = (slen / 100) * (100 - need_rate);
+ }
+ else
+ result_max = (slen * (100 - need_rate)) / 100;
+
+ /*
+ * Experiments suggest that these hash sizes work pretty well. A large
+ * hash table minimizes collision, but has a higher startup cost. For a
+ * small input, the startup cost dominates. The table size must be a power
+ * of two.
+ */
+ if (slen < 128)
+ hashsz = 512;
+ else if (slen < 256)
+ hashsz = 1024;
+ else if (slen < 512)
+ hashsz = 2048;
+ else if (slen < 1024)
+ hashsz = 4096;
+ else
+ hashsz = 8192;
+ mask = hashsz - 1;
+
+ /*
+ * Initialize the history lists to empty. We do not need to zero the
+ * hist_entries[] array; its entries are initialized as they are used.
+ */
+ memset(hist_start, 0, hashsz * sizeof(int16));
+
+ /*
+ * Compress the source directly into the output buffer.
+ */
+ while (dp < dend)
+ {
+ /*
+ * If we already exceeded the maximum result size, fail.
+ *
+ * We check once per loop; since the loop body could emit as many as 4
+ * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better
+ * allow 4 slop bytes.
+ */
+ if (bp - bstart >= result_max)
+ return -1;
+
+ /*
+ * If we've emitted more than first_success_by bytes without finding
+ * anything compressible at all, fail. This lets us fall out
+ * reasonably quickly when looking at incompressible input (such as
+ * pre-compressed data).
+ */
+ if (!found_match && bp - bstart >= strategy->first_success_by)
+ return -1;
+
+ /*
+ * Try to find a match in the history
+ */
+ if (pglz_find_match(hist_start, dp, dend, &match_len,
+ &match_off, good_match, good_drop, mask))
+ {
+ /*
+ * Create the tag and add history entries for all matched
+ * characters.
+ */
+ pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off);
+ while (match_len--)
+ {
+ pglz_hist_add(hist_start, hist_entries,
+ hist_next, hist_recycle,
+ dp, dend, mask);
+ dp++; /* Do not do this ++ in the line above! */
+ /* The macro would do it four times - Jan. */
+ }
+ found_match = true;
+ }
+ else
+ {
+ /*
+ * No match found. Copy one literal byte.
+ */
+ pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp);
+ pglz_hist_add(hist_start, hist_entries,
+ hist_next, hist_recycle,
+ dp, dend, mask);
+ dp++; /* Do not do this ++ in the line above! */
+ /* The macro would do it four times - Jan. */
+ }
+ }
+
+ /*
+ * Write out the last control byte and check that we haven't overrun the
+ * output size allowed by the strategy.
+ */
+ *ctrlp = ctrlb;
+ result_size = bp - bstart;
+ if (result_size >= result_max)
+ return -1;
+
+ /* success */
+ return result_size;
+}
+
+
+/* ----------
+ * pglz_decompress -
+ *
+ * Decompresses source into dest. Returns the number of bytes
+ * decompressed into the destination buffer, or -1 if the
+ * compressed data is corrupted.
+ *
+ * If check_complete is true, the data is considered corrupted
+ * if we don't exactly fill the destination buffer. Callers that
+ * are extracting a slice typically can't apply this check.
+ * ----------
+ */
+int32
+pglz_decompress(const char *source, int32 slen, char *dest,
+ int32 rawsize, bool check_complete)
+{
+ const unsigned char *sp;
+ const unsigned char *srcend;
+ unsigned char *dp;
+ unsigned char *destend;
+
+ sp = (const unsigned char *) source;
+ srcend = ((const unsigned char *) source) + slen;
+ dp = (unsigned char *) dest;
+ destend = dp + rawsize;
+
+ while (sp < srcend && dp < destend)
+ {
+ /*
+ * Read one control byte and process the next 8 items (or as many as
+ * remain in the compressed input).
+ */
+ unsigned char ctrl = *sp++;
+ int ctrlc;
+
+ for (ctrlc = 0; ctrlc < 8 && sp < srcend && dp < destend; ctrlc++)
+ {
+ if (ctrl & 1)
+ {
+ /*
+ * Set control bit means we must read a match tag. The match
+ * is coded with two bytes. First byte uses lower nibble to
+ * code length - 3. Higher nibble contains upper 4 bits of the
+ * offset. The next following byte contains the lower 8 bits
+ * of the offset. If the length is coded as 18, another
+ * extension tag byte tells how much longer the match really
+ * was (0-255).
+ */
+ int32 len;
+ int32 off;
+
+ len = (sp[0] & 0x0f) + 3;
+ off = ((sp[0] & 0xf0) << 4) | sp[1];
+ sp += 2;
+ if (len == 18)
+ len += *sp++;
+
+ /*
+ * Check for corrupt data: if we fell off the end of the
+ * source, or if we obtained off = 0, or if off is more than
+ * the distance back to the buffer start, we have problems.
+ * (We must check for off = 0, else we risk an infinite loop
+ * below in the face of corrupt data. Likewise, the upper
+ * limit on off prevents accessing outside the buffer
+ * boundaries.)
+ */
+ if (unlikely(sp > srcend || off == 0 ||
+ off > (dp - (unsigned char *) dest)))
+ return -1;
+
+ /*
+ * Don't emit more data than requested.
+ */
+ len = Min(len, destend - dp);
+
+ /*
+ * Now we copy the bytes specified by the tag from OUTPUT to
+ * OUTPUT (copy len bytes from dp - off to dp). The copied
+ * areas could overlap, so to avoid undefined behavior in
+ * memcpy(), be careful to copy only non-overlapping regions.
+ *
+ * Note that we cannot use memmove() instead, since while its
+ * behavior is well-defined, it's also not what we want.
+ */
+ while (off < len)
+ {
+ /*
+ * We can safely copy "off" bytes since that clearly
+ * results in non-overlapping source and destination.
+ */
+ memcpy(dp, dp - off, off);
+ len -= off;
+ dp += off;
+
+ /*----------
+ * This bit is less obvious: we can double "off" after
+ * each such step. Consider this raw input:
+ * 112341234123412341234
+ * This will be encoded as 5 literal bytes "11234" and
+ * then a match tag with length 16 and offset 4. After
+ * memcpy'ing the first 4 bytes, we will have emitted
+ * 112341234
+ * so we can double "off" to 8, then after the next step
+ * we have emitted
+ * 11234123412341234
+ * Then we can double "off" again, after which it is more
+ * than the remaining "len" so we fall out of this loop
+ * and finish with a non-overlapping copy of the
+ * remainder. In general, a match tag with off < len
+ * implies that the decoded data has a repeat length of
+ * "off". We can handle 1, 2, 4, etc repetitions of the
+ * repeated string per memcpy until we get to a situation
+ * where the final copy step is non-overlapping.
+ *
+ * (Another way to understand this is that we are keeping
+ * the copy source point dp - off the same throughout.)
+ *----------
+ */
+ off += off;
+ }
+ memcpy(dp, dp - off, len);
+ dp += len;
+ }
+ else
+ {
+ /*
+ * An unset control bit means LITERAL BYTE. So we just copy
+ * one from INPUT to OUTPUT.
+ */
+ *dp++ = *sp++;
+ }
+
+ /*
+ * Advance the control bit
+ */
+ ctrl >>= 1;
+ }
+ }
+
+ /*
+ * If requested, check we decompressed the right amount.
+ */
+ if (check_complete && (dp != destend || sp != srcend))
+ return -1;
+
+ /*
+ * That's it.
+ */
+ return (char *) dp - dest;
+}
+
+
+/* ----------
+ * pglz_maximum_compressed_size -
+ *
+ * Calculate the maximum compressed size for a given amount of raw data.
+ * Return the maximum size, or total compressed size if maximum size is
+ * larger than total compressed size.
+ *
+ * We can't use PGLZ_MAX_OUTPUT for this purpose, because that's used to size
+ * the compression buffer (and abort the compression). It does not really say
+ * what's the maximum compressed size for an input of a given length, and it
+ * may happen that while the whole value is compressible (and thus fits into
+ * PGLZ_MAX_OUTPUT nicely), the prefix is not compressible at all.
+ * ----------
+ */
+int32
+pglz_maximum_compressed_size(int32 rawsize, int32 total_compressed_size)
+{
+ int64 compressed_size;
+
+ /*
+ * pglz uses one control bit per byte, so if the entire desired prefix is
+ * represented as literal bytes, we'll need (rawsize * 9) bits. We care
+ * about bytes though, so be sure to round up not down.
+ *
+ * Use int64 here to prevent overflow during calculation.
+ */
+ compressed_size = ((int64) rawsize * 9 + 7) / 8;
+
+ /*
+ * The above fails to account for a corner case: we could have compressed
+ * data that starts with N-1 or N-2 literal bytes and then has a match tag
+ * of 2 or 3 bytes. It's therefore possible that we need to fetch 1 or 2
+ * more bytes in order to have the whole match tag. (Match tags earlier
+ * in the compressed data don't cause a problem, since they should
+ * represent more decompressed bytes than they occupy themselves.)
+ */
+ compressed_size += 2;
+
+ /*
+ * Maximum compressed size can't be larger than total compressed size.
+ * (This also ensures that our result fits in int32.)
+ */
+ compressed_size = Min(compressed_size, total_compressed_size);
+
+ return (int32) compressed_size;
+}
diff --git a/src/common/pg_prng.c b/src/common/pg_prng.c
new file mode 100644
index 0000000..c7bb92e
--- /dev/null
+++ b/src/common/pg_prng.c
@@ -0,0 +1,282 @@
+/*-------------------------------------------------------------------------
+ *
+ * Pseudo-Random Number Generator
+ *
+ * We use Blackman and Vigna's xoroshiro128** 1.0 algorithm
+ * to have a small, fast PRNG suitable for generating reasonably
+ * good-quality 64-bit data. This should not be considered
+ * cryptographically strong, however.
+ *
+ * About these generators: https://prng.di.unimi.it/
+ * See also https://en.wikipedia.org/wiki/List_of_random_number_generators
+ *
+ * Copyright (c) 2021-2023, PostgreSQL Global Development Group
+ *
+ * src/common/pg_prng.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "c.h"
+
+#include <math.h>
+
+#include "common/pg_prng.h"
+#include "port/pg_bitutils.h"
+
+/* X/Open (XSI) requires <math.h> to provide M_PI, but core POSIX does not */
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+
+/* process-wide state vector */
+pg_prng_state pg_global_prng_state;
+
+
+/*
+ * 64-bit rotate left
+ */
+static inline uint64
+rotl(uint64 x, int bits)
+{
+ return (x << bits) | (x >> (64 - bits));
+}
+
+/*
+ * The basic xoroshiro128** algorithm.
+ * Generates and returns a 64-bit uniformly distributed number,
+ * updating the state vector for next time.
+ *
+ * Note: the state vector must not be all-zeroes, as that is a fixed point.
+ */
+static uint64
+xoroshiro128ss(pg_prng_state *state)
+{
+ uint64 s0 = state->s0,
+ sx = state->s1 ^ s0,
+ val = rotl(s0 * 5, 7) * 9;
+
+ /* update state */
+ state->s0 = rotl(s0, 24) ^ sx ^ (sx << 16);
+ state->s1 = rotl(sx, 37);
+
+ return val;
+}
+
+/*
+ * We use this generator just to fill the xoroshiro128** state vector
+ * from a 64-bit seed.
+ */
+static uint64
+splitmix64(uint64 *state)
+{
+ /* state update */
+ uint64 val = (*state += UINT64CONST(0x9E3779B97f4A7C15));
+
+ /* value extraction */
+ val = (val ^ (val >> 30)) * UINT64CONST(0xBF58476D1CE4E5B9);
+ val = (val ^ (val >> 27)) * UINT64CONST(0x94D049BB133111EB);
+
+ return val ^ (val >> 31);
+}
+
+/*
+ * Initialize the PRNG state from a 64-bit integer,
+ * taking care that we don't produce all-zeroes.
+ */
+void
+pg_prng_seed(pg_prng_state *state, uint64 seed)
+{
+ state->s0 = splitmix64(&seed);
+ state->s1 = splitmix64(&seed);
+ /* Let's just make sure we didn't get all-zeroes */
+ (void) pg_prng_seed_check(state);
+}
+
+/*
+ * Initialize the PRNG state from a double in the range [-1.0, 1.0],
+ * taking care that we don't produce all-zeroes.
+ */
+void
+pg_prng_fseed(pg_prng_state *state, double fseed)
+{
+ /* Assume there's about 52 mantissa bits; the sign contributes too. */
+ int64 seed = ((double) ((UINT64CONST(1) << 52) - 1)) * fseed;
+
+ pg_prng_seed(state, (uint64) seed);
+}
+
+/*
+ * Validate a PRNG seed value.
+ */
+bool
+pg_prng_seed_check(pg_prng_state *state)
+{
+ /*
+ * If the seeding mechanism chanced to produce all-zeroes, insert
+ * something nonzero. Anything would do; use Knuth's LCG parameters.
+ */
+ if (unlikely(state->s0 == 0 && state->s1 == 0))
+ {
+ state->s0 = UINT64CONST(0x5851F42D4C957F2D);
+ state->s1 = UINT64CONST(0x14057B7EF767814F);
+ }
+
+ /* As a convenience for the pg_prng_strong_seed macro, return true */
+ return true;
+}
+
+/*
+ * Select a random uint64 uniformly from the range [0, PG_UINT64_MAX].
+ */
+uint64
+pg_prng_uint64(pg_prng_state *state)
+{
+ return xoroshiro128ss(state);
+}
+
+/*
+ * Select a random uint64 uniformly from the range [rmin, rmax].
+ * If the range is empty, rmin is always produced.
+ */
+uint64
+pg_prng_uint64_range(pg_prng_state *state, uint64 rmin, uint64 rmax)
+{
+ uint64 val;
+
+ if (likely(rmax > rmin))
+ {
+ /*
+ * Use bitmask rejection method to generate an offset in 0..range.
+ * Each generated val is less than twice "range", so on average we
+ * should not have to iterate more than twice.
+ */
+ uint64 range = rmax - rmin;
+ uint32 rshift = 63 - pg_leftmost_one_pos64(range);
+
+ do
+ {
+ val = xoroshiro128ss(state) >> rshift;
+ } while (val > range);
+ }
+ else
+ val = 0;
+
+ return rmin + val;
+}
+
+/*
+ * Select a random int64 uniformly from the range [PG_INT64_MIN, PG_INT64_MAX].
+ */
+int64
+pg_prng_int64(pg_prng_state *state)
+{
+ return (int64) xoroshiro128ss(state);
+}
+
+/*
+ * Select a random int64 uniformly from the range [0, PG_INT64_MAX].
+ */
+int64
+pg_prng_int64p(pg_prng_state *state)
+{
+ return (int64) (xoroshiro128ss(state) & UINT64CONST(0x7FFFFFFFFFFFFFFF));
+}
+
+/*
+ * Select a random uint32 uniformly from the range [0, PG_UINT32_MAX].
+ */
+uint32
+pg_prng_uint32(pg_prng_state *state)
+{
+ /*
+ * Although xoroshiro128** is not known to have any weaknesses in
+ * randomness of low-order bits, we prefer to use the upper bits of its
+ * result here and below.
+ */
+ uint64 v = xoroshiro128ss(state);
+
+ return (uint32) (v >> 32);
+}
+
+/*
+ * Select a random int32 uniformly from the range [PG_INT32_MIN, PG_INT32_MAX].
+ */
+int32
+pg_prng_int32(pg_prng_state *state)
+{
+ uint64 v = xoroshiro128ss(state);
+
+ return (int32) (v >> 32);
+}
+
+/*
+ * Select a random int32 uniformly from the range [0, PG_INT32_MAX].
+ */
+int32
+pg_prng_int32p(pg_prng_state *state)
+{
+ uint64 v = xoroshiro128ss(state);
+
+ return (int32) (v >> 33);
+}
+
+/*
+ * Select a random double uniformly from the range [0.0, 1.0).
+ *
+ * Note: if you want a result in the range (0.0, 1.0], the standard way
+ * to get that is "1.0 - pg_prng_double(state)".
+ */
+double
+pg_prng_double(pg_prng_state *state)
+{
+ uint64 v = xoroshiro128ss(state);
+
+ /*
+ * As above, assume there's 52 mantissa bits in a double. This result
+ * could round to 1.0 if double's precision is less than that; but we
+ * assume IEEE float arithmetic elsewhere in Postgres, so this seems OK.
+ */
+ return ldexp((double) (v >> (64 - 52)), -52);
+}
+
+/*
+ * Select a random double from the normal distribution with
+ * mean = 0.0 and stddev = 1.0.
+ *
+ * To get a result from a different normal distribution use
+ * STDDEV * pg_prng_double_normal() + MEAN
+ *
+ * Uses https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
+ */
+double
+pg_prng_double_normal(pg_prng_state *state)
+{
+ double u1,
+ u2,
+ z0;
+
+ /*
+ * pg_prng_double generates [0, 1), but for the basic version of the
+ * Box-Muller transform the two uniformly distributed random numbers are
+ * expected to be in (0, 1]; in particular we'd better not compute log(0).
+ */
+ u1 = 1.0 - pg_prng_double(state);
+ u2 = 1.0 - pg_prng_double(state);
+
+ /* Apply Box-Muller transform to get one normal-valued output */
+ z0 = sqrt(-2.0 * log(u1)) * sin(2.0 * M_PI * u2);
+ return z0;
+}
+
+/*
+ * Select a random boolean value.
+ */
+bool
+pg_prng_bool(pg_prng_state *state)
+{
+ uint64 v = xoroshiro128ss(state);
+
+ return (bool) (v >> 63);
+}
diff --git a/src/common/pgfnames.c b/src/common/pgfnames.c
new file mode 100644
index 0000000..9d2fe9d
--- /dev/null
+++ b/src/common/pgfnames.c
@@ -0,0 +1,94 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgfnames.c
+ * directory handling functions
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/pgfnames.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <dirent.h>
+
+#ifndef FRONTEND
+#define pg_log_warning(...) elog(WARNING, __VA_ARGS__)
+#else
+#include "common/logging.h"
+#endif
+
+/*
+ * pgfnames
+ *
+ * return a list of the names of objects in the argument directory. Caller
+ * must call pgfnames_cleanup later to free the memory allocated by this
+ * function.
+ */
+char **
+pgfnames(const char *path)
+{
+ DIR *dir;
+ struct dirent *file;
+ char **filenames;
+ int numnames = 0;
+ int fnsize = 200; /* enough for many small dbs */
+
+ dir = opendir(path);
+ if (dir == NULL)
+ {
+ pg_log_warning("could not open directory \"%s\": %m", path);
+ return NULL;
+ }
+
+ filenames = (char **) palloc(fnsize * sizeof(char *));
+
+ while (errno = 0, (file = readdir(dir)) != NULL)
+ {
+ if (strcmp(file->d_name, ".") != 0 && strcmp(file->d_name, "..") != 0)
+ {
+ if (numnames + 1 >= fnsize)
+ {
+ fnsize *= 2;
+ filenames = (char **) repalloc(filenames,
+ fnsize * sizeof(char *));
+ }
+ filenames[numnames++] = pstrdup(file->d_name);
+ }
+ }
+
+ if (errno)
+ pg_log_warning("could not read directory \"%s\": %m", path);
+
+ filenames[numnames] = NULL;
+
+ if (closedir(dir))
+ pg_log_warning("could not close directory \"%s\": %m", path);
+
+ return filenames;
+}
+
+
+/*
+ * pgfnames_cleanup
+ *
+ * deallocate memory used for filenames
+ */
+void
+pgfnames_cleanup(char **filenames)
+{
+ char **fn;
+
+ for (fn = filenames; *fn; fn++)
+ pfree(*fn);
+
+ pfree(filenames);
+}
diff --git a/src/common/protocol_openssl.c b/src/common/protocol_openssl.c
new file mode 100644
index 0000000..089cbd3
--- /dev/null
+++ b/src/common/protocol_openssl.c
@@ -0,0 +1,117 @@
+/*-------------------------------------------------------------------------
+ *
+ * protocol_openssl.c
+ * OpenSSL functionality shared between frontend and backend
+ *
+ * This should only be used if code is compiled with OpenSSL support.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/protocol_openssl.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/openssl.h"
+
+/*
+ * Replacements for APIs introduced in OpenSSL 1.1.0.
+ */
+#ifndef SSL_CTX_set_min_proto_version
+
+/*
+ * OpenSSL versions that support TLS 1.3 shouldn't get here because they
+ * already have these functions. So we don't have to keep updating the below
+ * code for every new TLS version, and eventually it can go away. But let's
+ * just check this to make sure ...
+ */
+#ifdef TLS1_3_VERSION
+#error OpenSSL version mismatch
+#endif
+
+int
+SSL_CTX_set_min_proto_version(SSL_CTX *ctx, int version)
+{
+ int ssl_options = SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3;
+
+ if (version > TLS1_VERSION)
+ ssl_options |= SSL_OP_NO_TLSv1;
+
+ /*
+ * Some OpenSSL versions define TLS*_VERSION macros but not the
+ * corresponding SSL_OP_NO_* macro, so in those cases we have to return
+ * unsuccessfully here.
+ */
+#ifdef TLS1_1_VERSION
+ if (version > TLS1_1_VERSION)
+ {
+#ifdef SSL_OP_NO_TLSv1_1
+ ssl_options |= SSL_OP_NO_TLSv1_1;
+#else
+ return 0;
+#endif
+ }
+#endif
+#ifdef TLS1_2_VERSION
+ if (version > TLS1_2_VERSION)
+ {
+#ifdef SSL_OP_NO_TLSv1_2
+ ssl_options |= SSL_OP_NO_TLSv1_2;
+#else
+ return 0;
+#endif
+ }
+#endif
+
+ SSL_CTX_set_options(ctx, ssl_options);
+
+ return 1; /* success */
+}
+
+int
+SSL_CTX_set_max_proto_version(SSL_CTX *ctx, int version)
+{
+ int ssl_options = 0;
+
+ Assert(version != 0);
+
+ /*
+ * Some OpenSSL versions define TLS*_VERSION macros but not the
+ * corresponding SSL_OP_NO_* macro, so in those cases we have to return
+ * unsuccessfully here.
+ */
+#ifdef TLS1_1_VERSION
+ if (version < TLS1_1_VERSION)
+ {
+#ifdef SSL_OP_NO_TLSv1_1
+ ssl_options |= SSL_OP_NO_TLSv1_1;
+#else
+ return 0;
+#endif
+ }
+#endif
+#ifdef TLS1_2_VERSION
+ if (version < TLS1_2_VERSION)
+ {
+#ifdef SSL_OP_NO_TLSv1_2
+ ssl_options |= SSL_OP_NO_TLSv1_2;
+#else
+ return 0;
+#endif
+ }
+#endif
+
+ SSL_CTX_set_options(ctx, ssl_options);
+
+ return 1; /* success */
+}
+
+#endif /* !SSL_CTX_set_min_proto_version */
diff --git a/src/common/psprintf.c b/src/common/psprintf.c
new file mode 100644
index 0000000..c1d2807
--- /dev/null
+++ b/src/common/psprintf.c
@@ -0,0 +1,151 @@
+/*-------------------------------------------------------------------------
+ *
+ * psprintf.c
+ * sprintf into an allocated-on-demand buffer
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/psprintf.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+
+#include "postgres.h"
+
+#include "utils/memutils.h"
+
+#else
+
+#include "postgres_fe.h"
+
+/* It's possible we could use a different value for this in frontend code */
+#define MaxAllocSize ((Size) 0x3fffffff) /* 1 gigabyte - 1 */
+
+#endif
+
+
+/*
+ * psprintf
+ *
+ * Format text data under the control of fmt (an sprintf-style format string)
+ * and return it in an allocated-on-demand buffer. The buffer is allocated
+ * with palloc in the backend, or malloc in frontend builds. Caller is
+ * responsible to free the buffer when no longer needed, if appropriate.
+ *
+ * Errors are not returned to the caller, but are reported via elog(ERROR)
+ * in the backend, or printf-to-stderr-and-exit() in frontend builds.
+ * One should therefore think twice about using this in libpq.
+ */
+char *
+psprintf(const char *fmt,...)
+{
+ int save_errno = errno;
+ size_t len = 128; /* initial assumption about buffer size */
+
+ for (;;)
+ {
+ char *result;
+ va_list args;
+ size_t newlen;
+
+ /*
+ * Allocate result buffer. Note that in frontend this maps to malloc
+ * with exit-on-error.
+ */
+ result = (char *) palloc(len);
+
+ /* Try to format the data. */
+ errno = save_errno;
+ va_start(args, fmt);
+ newlen = pvsnprintf(result, len, fmt, args);
+ va_end(args);
+
+ if (newlen < len)
+ return result; /* success */
+
+ /* Release buffer and loop around to try again with larger len. */
+ pfree(result);
+ len = newlen;
+ }
+}
+
+/*
+ * pvsnprintf
+ *
+ * Attempt to format text data under the control of fmt (an sprintf-style
+ * format string) and insert it into buf (which has length len).
+ *
+ * If successful, return the number of bytes emitted, not counting the
+ * trailing zero byte. This will always be strictly less than len.
+ *
+ * If there's not enough space in buf, return an estimate of the buffer size
+ * needed to succeed (this *must* be more than the given len, else callers
+ * might loop infinitely).
+ *
+ * Other error cases do not return, but exit via elog(ERROR) or exit().
+ * Hence, this shouldn't be used inside libpq.
+ *
+ * Caution: callers must be sure to preserve their entry-time errno
+ * when looping, in case the fmt contains "%m".
+ *
+ * Note that the semantics of the return value are not exactly C99's.
+ * First, we don't promise that the estimated buffer size is exactly right;
+ * callers must be prepared to loop multiple times to get the right size.
+ * (Given a C99-compliant vsnprintf, that won't happen, but it is rumored
+ * that some implementations don't always return the same value ...)
+ * Second, we return the recommended buffer size, not one less than that;
+ * this lets overflow concerns be handled here rather than in the callers.
+ */
+size_t
+pvsnprintf(char *buf, size_t len, const char *fmt, va_list args)
+{
+ int nprinted;
+
+ nprinted = vsnprintf(buf, len, fmt, args);
+
+ /* We assume failure means the fmt is bogus, hence hard failure is OK */
+ if (unlikely(nprinted < 0))
+ {
+#ifndef FRONTEND
+ elog(ERROR, "vsnprintf failed: %m with format string \"%s\"", fmt);
+#else
+ fprintf(stderr, "vsnprintf failed: %s with format string \"%s\"\n",
+ strerror(errno), fmt);
+ exit(EXIT_FAILURE);
+#endif
+ }
+
+ if ((size_t) nprinted < len)
+ {
+ /* Success. Note nprinted does not include trailing null. */
+ return (size_t) nprinted;
+ }
+
+ /*
+ * We assume a C99-compliant vsnprintf, so believe its estimate of the
+ * required space, and add one for the trailing null. (If it's wrong, the
+ * logic will still work, but we may loop multiple times.)
+ *
+ * Choke if the required space would exceed MaxAllocSize. Note we use
+ * this palloc-oriented overflow limit even when in frontend.
+ */
+ if (unlikely((size_t) nprinted > MaxAllocSize - 1))
+ {
+#ifndef FRONTEND
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("out of memory")));
+#else
+ fprintf(stderr, _("out of memory\n"));
+ exit(EXIT_FAILURE);
+#endif
+ }
+
+ return nprinted + 1;
+}
diff --git a/src/common/relpath.c b/src/common/relpath.c
new file mode 100644
index 0000000..87de5f6
--- /dev/null
+++ b/src/common/relpath.c
@@ -0,0 +1,210 @@
+/*-------------------------------------------------------------------------
+ * relpath.c
+ * Shared frontend/backend code to compute pathnames of relation files
+ *
+ * This module also contains some logic associated with fork names.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/relpath.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "catalog/pg_tablespace_d.h"
+#include "common/relpath.h"
+#include "storage/backendid.h"
+
+
+/*
+ * Lookup table of fork name by fork number.
+ *
+ * If you add a new entry, remember to update the errhint in
+ * forkname_to_number() below, and update the SGML documentation for
+ * pg_relation_size().
+ */
+const char *const forkNames[] = {
+ "main", /* MAIN_FORKNUM */
+ "fsm", /* FSM_FORKNUM */
+ "vm", /* VISIBILITYMAP_FORKNUM */
+ "init" /* INIT_FORKNUM */
+};
+
+StaticAssertDecl(lengthof(forkNames) == (MAX_FORKNUM + 1),
+ "array length mismatch");
+
+/*
+ * forkname_to_number - look up fork number by name
+ *
+ * In backend, we throw an error for no match; in frontend, we just
+ * return InvalidForkNumber.
+ */
+ForkNumber
+forkname_to_number(const char *forkName)
+{
+ ForkNumber forkNum;
+
+ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
+ if (strcmp(forkNames[forkNum], forkName) == 0)
+ return forkNum;
+
+#ifndef FRONTEND
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid fork name"),
+ errhint("Valid fork names are \"main\", \"fsm\", "
+ "\"vm\", and \"init\".")));
+#endif
+
+ return InvalidForkNumber;
+}
+
+/*
+ * forkname_chars
+ * We use this to figure out whether a filename could be a relation
+ * fork (as opposed to an oddly named stray file that somehow ended
+ * up in the database directory). If the passed string begins with
+ * a fork name (other than the main fork name), we return its length,
+ * and set *fork (if not NULL) to the fork number. If not, we return 0.
+ *
+ * Note that the present coding assumes that there are no fork names which
+ * are prefixes of other fork names.
+ */
+int
+forkname_chars(const char *str, ForkNumber *fork)
+{
+ ForkNumber forkNum;
+
+ for (forkNum = 1; forkNum <= MAX_FORKNUM; forkNum++)
+ {
+ int len = strlen(forkNames[forkNum]);
+
+ if (strncmp(forkNames[forkNum], str, len) == 0)
+ {
+ if (fork)
+ *fork = forkNum;
+ return len;
+ }
+ }
+ if (fork)
+ *fork = InvalidForkNumber;
+ return 0;
+}
+
+
+/*
+ * GetDatabasePath - construct path to a database directory
+ *
+ * Result is a palloc'd string.
+ *
+ * XXX this must agree with GetRelationPath()!
+ */
+char *
+GetDatabasePath(Oid dbOid, Oid spcOid)
+{
+ if (spcOid == GLOBALTABLESPACE_OID)
+ {
+ /* Shared system relations live in {datadir}/global */
+ Assert(dbOid == 0);
+ return pstrdup("global");
+ }
+ else if (spcOid == DEFAULTTABLESPACE_OID)
+ {
+ /* The default tablespace is {datadir}/base */
+ return psprintf("base/%u", dbOid);
+ }
+ else
+ {
+ /* All other tablespaces are accessed via symlinks */
+ return psprintf("pg_tblspc/%u/%s/%u",
+ spcOid, TABLESPACE_VERSION_DIRECTORY, dbOid);
+ }
+}
+
+/*
+ * GetRelationPath - construct path to a relation's file
+ *
+ * Result is a palloc'd string.
+ *
+ * Note: ideally, backendId would be declared as type BackendId, but relpath.h
+ * would have to include a backend-only header to do that; doesn't seem worth
+ * the trouble considering BackendId is just int anyway.
+ */
+char *
+GetRelationPath(Oid dbOid, Oid spcOid, RelFileNumber relNumber,
+ int backendId, ForkNumber forkNumber)
+{
+ char *path;
+
+ if (spcOid == GLOBALTABLESPACE_OID)
+ {
+ /* Shared system relations live in {datadir}/global */
+ Assert(dbOid == 0);
+ Assert(backendId == InvalidBackendId);
+ if (forkNumber != MAIN_FORKNUM)
+ path = psprintf("global/%u_%s",
+ relNumber, forkNames[forkNumber]);
+ else
+ path = psprintf("global/%u", relNumber);
+ }
+ else if (spcOid == DEFAULTTABLESPACE_OID)
+ {
+ /* The default tablespace is {datadir}/base */
+ if (backendId == InvalidBackendId)
+ {
+ if (forkNumber != MAIN_FORKNUM)
+ path = psprintf("base/%u/%u_%s",
+ dbOid, relNumber,
+ forkNames[forkNumber]);
+ else
+ path = psprintf("base/%u/%u",
+ dbOid, relNumber);
+ }
+ else
+ {
+ if (forkNumber != MAIN_FORKNUM)
+ path = psprintf("base/%u/t%d_%u_%s",
+ dbOid, backendId, relNumber,
+ forkNames[forkNumber]);
+ else
+ path = psprintf("base/%u/t%d_%u",
+ dbOid, backendId, relNumber);
+ }
+ }
+ else
+ {
+ /* All other tablespaces are accessed via symlinks */
+ if (backendId == InvalidBackendId)
+ {
+ if (forkNumber != MAIN_FORKNUM)
+ path = psprintf("pg_tblspc/%u/%s/%u/%u_%s",
+ spcOid, TABLESPACE_VERSION_DIRECTORY,
+ dbOid, relNumber,
+ forkNames[forkNumber]);
+ else
+ path = psprintf("pg_tblspc/%u/%s/%u/%u",
+ spcOid, TABLESPACE_VERSION_DIRECTORY,
+ dbOid, relNumber);
+ }
+ else
+ {
+ if (forkNumber != MAIN_FORKNUM)
+ path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u_%s",
+ spcOid, TABLESPACE_VERSION_DIRECTORY,
+ dbOid, backendId, relNumber,
+ forkNames[forkNumber]);
+ else
+ path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u",
+ spcOid, TABLESPACE_VERSION_DIRECTORY,
+ dbOid, backendId, relNumber);
+ }
+ }
+ return path;
+}
diff --git a/src/common/restricted_token.c b/src/common/restricted_token.c
new file mode 100644
index 0000000..4ae1ed1
--- /dev/null
+++ b/src/common/restricted_token.c
@@ -0,0 +1,174 @@
+/*-------------------------------------------------------------------------
+ *
+ * restricted_token.c
+ * helper routine to ensure restricted token on Windows
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/restricted_token.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#error "This file is not expected to be compiled for backend code"
+#endif
+
+#include "postgres_fe.h"
+
+#include "common/logging.h"
+#include "common/restricted_token.h"
+
+#ifdef WIN32
+
+/* internal vars */
+char *restrict_env;
+
+/* Windows API define missing from some versions of MingW headers */
+#ifndef DISABLE_MAX_PRIVILEGE
+#define DISABLE_MAX_PRIVILEGE 0x1
+#endif
+
+/*
+ * Create a restricted token and execute the specified process with it.
+ *
+ * Returns restricted token on success and 0 on failure.
+ *
+ * On any system not containing the required functions, do nothing
+ * but still report an error.
+ */
+HANDLE
+CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION *processInfo)
+{
+ BOOL b;
+ STARTUPINFO si;
+ HANDLE origToken;
+ HANDLE restrictedToken;
+ SID_IDENTIFIER_AUTHORITY NtAuthority = {SECURITY_NT_AUTHORITY};
+ SID_AND_ATTRIBUTES dropSids[2];
+
+ ZeroMemory(&si, sizeof(si));
+ si.cb = sizeof(si);
+
+ /* Open the current token to use as a base for the restricted one */
+ if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ALL_ACCESS, &origToken))
+ {
+ pg_log_error("could not open process token: error code %lu",
+ GetLastError());
+ return 0;
+ }
+
+ /* Allocate list of SIDs to remove */
+ ZeroMemory(&dropSids, sizeof(dropSids));
+ if (!AllocateAndInitializeSid(&NtAuthority, 2,
+ SECURITY_BUILTIN_DOMAIN_RID, DOMAIN_ALIAS_RID_ADMINS, 0, 0, 0, 0, 0,
+ 0, &dropSids[0].Sid) ||
+ !AllocateAndInitializeSid(&NtAuthority, 2,
+ SECURITY_BUILTIN_DOMAIN_RID, DOMAIN_ALIAS_RID_POWER_USERS, 0, 0, 0, 0, 0,
+ 0, &dropSids[1].Sid))
+ {
+ pg_log_error("could not allocate SIDs: error code %lu",
+ GetLastError());
+ CloseHandle(origToken);
+ return 0;
+ }
+
+ b = CreateRestrictedToken(origToken,
+ DISABLE_MAX_PRIVILEGE,
+ sizeof(dropSids) / sizeof(dropSids[0]),
+ dropSids,
+ 0, NULL,
+ 0, NULL,
+ &restrictedToken);
+
+ FreeSid(dropSids[1].Sid);
+ FreeSid(dropSids[0].Sid);
+ CloseHandle(origToken);
+
+ if (!b)
+ {
+ pg_log_error("could not create restricted token: error code %lu", GetLastError());
+ return 0;
+ }
+
+#ifndef __CYGWIN__
+ AddUserToTokenDacl(restrictedToken);
+#endif
+
+ if (!CreateProcessAsUser(restrictedToken,
+ NULL,
+ cmd,
+ NULL,
+ NULL,
+ TRUE,
+ CREATE_SUSPENDED,
+ NULL,
+ NULL,
+ &si,
+ processInfo))
+
+ {
+ pg_log_error("could not start process for command \"%s\": error code %lu", cmd, GetLastError());
+ return 0;
+ }
+
+ ResumeThread(processInfo->hThread);
+ return restrictedToken;
+}
+#endif
+
+/*
+ * On Windows make sure that we are running with a restricted token,
+ * On other platforms do nothing.
+ */
+void
+get_restricted_token(void)
+{
+#ifdef WIN32
+ HANDLE restrictedToken;
+
+ /*
+ * Before we execute another program, make sure that we are running with a
+ * restricted token. If not, re-execute ourselves with one.
+ */
+
+ if ((restrict_env = getenv("PG_RESTRICT_EXEC")) == NULL
+ || strcmp(restrict_env, "1") != 0)
+ {
+ PROCESS_INFORMATION pi;
+ char *cmdline;
+
+ ZeroMemory(&pi, sizeof(pi));
+
+ cmdline = pg_strdup(GetCommandLine());
+
+ setenv("PG_RESTRICT_EXEC", "1", 1);
+
+ if ((restrictedToken = CreateRestrictedProcess(cmdline, &pi)) == 0)
+ {
+ pg_log_error("could not re-execute with restricted token: error code %lu", GetLastError());
+ }
+ else
+ {
+ /*
+ * Successfully re-executed. Now wait for child process to capture
+ * the exit code.
+ */
+ DWORD x;
+
+ CloseHandle(restrictedToken);
+ CloseHandle(pi.hThread);
+ WaitForSingleObject(pi.hProcess, INFINITE);
+
+ if (!GetExitCodeProcess(pi.hProcess, &x))
+ pg_fatal("could not get exit code from subprocess: error code %lu", GetLastError());
+ exit(x);
+ }
+ pg_free(cmdline);
+ }
+#endif
+}
diff --git a/src/common/rmtree.c b/src/common/rmtree.c
new file mode 100644
index 0000000..cd99d3f
--- /dev/null
+++ b/src/common/rmtree.c
@@ -0,0 +1,130 @@
+/*-------------------------------------------------------------------------
+ *
+ * rmtree.c
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/rmtree.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "common/file_utils.h"
+
+#ifndef FRONTEND
+#include "storage/fd.h"
+#define pg_log_warning(...) elog(WARNING, __VA_ARGS__)
+#define LOG_LEVEL WARNING
+#define OPENDIR(x) AllocateDir(x)
+#define CLOSEDIR(x) FreeDir(x)
+#else
+#include "common/logging.h"
+#define LOG_LEVEL PG_LOG_WARNING
+#define OPENDIR(x) opendir(x)
+#define CLOSEDIR(x) closedir(x)
+#endif
+
+/*
+ * rmtree
+ *
+ * Delete a directory tree recursively.
+ * Assumes path points to a valid directory.
+ * Deletes everything under path.
+ * If rmtopdir is true deletes the directory too.
+ * Returns true if successful, false if there was any problem.
+ * (The details of the problem are reported already, so caller
+ * doesn't really have to say anything more, but most do.)
+ */
+bool
+rmtree(const char *path, bool rmtopdir)
+{
+ char pathbuf[MAXPGPATH];
+ DIR *dir;
+ struct dirent *de;
+ bool result = true;
+ size_t dirnames_size = 0;
+ size_t dirnames_capacity = 8;
+ char **dirnames = palloc(sizeof(char *) * dirnames_capacity);
+
+ dir = OPENDIR(path);
+ if (dir == NULL)
+ {
+ pg_log_warning("could not open directory \"%s\": %m", path);
+ return false;
+ }
+
+ while (errno = 0, (de = readdir(dir)))
+ {
+ if (strcmp(de->d_name, ".") == 0 ||
+ strcmp(de->d_name, "..") == 0)
+ continue;
+ snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name);
+ switch (get_dirent_type(pathbuf, de, false, LOG_LEVEL))
+ {
+ case PGFILETYPE_ERROR:
+ /* already logged, press on */
+ break;
+ case PGFILETYPE_DIR:
+
+ /*
+ * Defer recursion until after we've closed this directory, to
+ * avoid using more than one file descriptor at a time.
+ */
+ if (dirnames_size == dirnames_capacity)
+ {
+ dirnames = repalloc(dirnames,
+ sizeof(char *) * dirnames_capacity * 2);
+ dirnames_capacity *= 2;
+ }
+ dirnames[dirnames_size++] = pstrdup(pathbuf);
+ break;
+ default:
+ if (unlink(pathbuf) != 0 && errno != ENOENT)
+ {
+ pg_log_warning("could not remove file \"%s\": %m", pathbuf);
+ result = false;
+ }
+ break;
+ }
+ }
+
+ if (errno != 0)
+ {
+ pg_log_warning("could not read directory \"%s\": %m", path);
+ result = false;
+ }
+
+ CLOSEDIR(dir);
+
+ /* Now recurse into the subdirectories we found. */
+ for (size_t i = 0; i < dirnames_size; ++i)
+ {
+ if (!rmtree(dirnames[i], true))
+ result = false;
+ pfree(dirnames[i]);
+ }
+
+ if (rmtopdir)
+ {
+ if (rmdir(path) != 0)
+ {
+ pg_log_warning("could not remove directory \"%s\": %m", path);
+ result = false;
+ }
+ }
+
+ pfree(dirnames);
+
+ return result;
+}
diff --git a/src/common/ryu_common.h b/src/common/ryu_common.h
new file mode 100644
index 0000000..ad850ac
--- /dev/null
+++ b/src/common/ryu_common.h
@@ -0,0 +1,133 @@
+/*---------------------------------------------------------------------------
+ *
+ * Common routines for Ryu floating-point output.
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/ryu_common.h
+ *
+ * This is a modification of code taken from github.com/ulfjack/ryu under the
+ * terms of the Boost license (not the Apache license). The original copyright
+ * notice follows:
+ *
+ * Copyright 2018 Ulf Adams
+ *
+ * The contents of this file may be used under the terms of the Apache
+ * License, Version 2.0.
+ *
+ * (See accompanying file LICENSE-Apache or copy at
+ * http://www.apache.org/licenses/LICENSE-2.0)
+ *
+ * Alternatively, the contents of this file may be used under the terms of the
+ * Boost Software License, Version 1.0.
+ *
+ * (See accompanying file LICENSE-Boost or copy at
+ * https://www.boost.org/LICENSE_1_0.txt)
+ *
+ * Unless required by applicable law or agreed to in writing, this software is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.
+ *
+ *---------------------------------------------------------------------------
+ */
+#ifndef RYU_COMMON_H
+#define RYU_COMMON_H
+
+/*
+ * Upstream Ryu's output is always the shortest possible. But we adjust that
+ * slightly to improve portability: we avoid outputting the exact midpoint
+ * value between two representable floats, since that relies on the reader
+ * getting the round-to-even rule correct, which seems to be the common
+ * failure mode.
+ *
+ * Defining this to 1 would restore the upstream behavior.
+ */
+#define STRICTLY_SHORTEST 0
+
+#if SIZEOF_SIZE_T < 8
+#define RYU_32_BIT_PLATFORM
+#endif
+
+/* Returns e == 0 ? 1 : ceil(log_2(5^e)). */
+static inline uint32
+pow5bits(const int32 e)
+{
+ /*
+ * This approximation works up to the point that the multiplication
+ * overflows at e = 3529.
+ *
+ * If the multiplication were done in 64 bits, it would fail at 5^4004
+ * which is just greater than 2^9297.
+ */
+ Assert(e >= 0);
+ Assert(e <= 3528);
+ return ((((uint32) e) * 1217359) >> 19) + 1;
+}
+
+/* Returns floor(log_10(2^e)). */
+static inline int32
+log10Pow2(const int32 e)
+{
+ /*
+ * The first value this approximation fails for is 2^1651 which is just
+ * greater than 10^297.
+ */
+ Assert(e >= 0);
+ Assert(e <= 1650);
+ return (int32) ((((uint32) e) * 78913) >> 18);
+}
+
+/* Returns floor(log_10(5^e)). */
+static inline int32
+log10Pow5(const int32 e)
+{
+ /*
+ * The first value this approximation fails for is 5^2621 which is just
+ * greater than 10^1832.
+ */
+ Assert(e >= 0);
+ Assert(e <= 2620);
+ return (int32) ((((uint32) e) * 732923) >> 20);
+}
+
+static inline int
+copy_special_str(char *const result, const bool sign, const bool exponent, const bool mantissa)
+{
+ if (mantissa)
+ {
+ memcpy(result, "NaN", 3);
+ return 3;
+ }
+ if (sign)
+ {
+ result[0] = '-';
+ }
+ if (exponent)
+ {
+ memcpy(result + sign, "Infinity", 8);
+ return sign + 8;
+ }
+ result[sign] = '0';
+ return sign + 1;
+}
+
+static inline uint32
+float_to_bits(const float f)
+{
+ uint32 bits = 0;
+
+ memcpy(&bits, &f, sizeof(float));
+ return bits;
+}
+
+static inline uint64
+double_to_bits(const double d)
+{
+ uint64 bits = 0;
+
+ memcpy(&bits, &d, sizeof(double));
+ return bits;
+}
+
+#endif /* RYU_COMMON_H */
diff --git a/src/common/saslprep.c b/src/common/saslprep.c
new file mode 100644
index 0000000..3cf4988
--- /dev/null
+++ b/src/common/saslprep.c
@@ -0,0 +1,1245 @@
+/*-------------------------------------------------------------------------
+ * saslprep.c
+ * SASLprep normalization, for SCRAM authentication
+ *
+ * The SASLprep algorithm is used to process a user-supplied password into
+ * canonical form. For more details, see:
+ *
+ * [RFC3454] Preparation of Internationalized Strings ("stringprep"),
+ * http://www.ietf.org/rfc/rfc3454.txt
+ *
+ * [RFC4013] SASLprep: Stringprep Profile for User Names and Passwords
+ * http://www.ietf.org/rfc/rfc4013.txt
+ *
+ *
+ * Portions Copyright (c) 2017-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/saslprep.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/saslprep.h"
+#include "common/string.h"
+#include "common/unicode_norm.h"
+#include "mb/pg_wchar.h"
+
+/*
+ * In backend, we will use palloc/pfree. In frontend, use malloc, and
+ * return SASLPREP_OOM on out-of-memory.
+ */
+#ifndef FRONTEND
+#define STRDUP(s) pstrdup(s)
+#define ALLOC(size) palloc(size)
+#define FREE(size) pfree(size)
+#else
+#define STRDUP(s) strdup(s)
+#define ALLOC(size) malloc(size)
+#define FREE(size) free(size)
+#endif
+
+/* Prototypes for local functions */
+static int codepoint_range_cmp(const void *a, const void *b);
+static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize);
+static int pg_utf8_string_len(const char *source);
+
+/*
+ * Stringprep Mapping Tables.
+ *
+ * The stringprep specification includes a number of tables of Unicode
+ * codepoints, used in different parts of the algorithm. They are below,
+ * as arrays of codepoint ranges. Each range is a pair of codepoints,
+ * for the first and last codepoint included the range (inclusive!).
+ */
+
+/*
+ * C.1.2 Non-ASCII space characters
+ *
+ * These are all mapped to the ASCII space character (U+00A0).
+ */
+static const pg_wchar non_ascii_space_ranges[] =
+{
+ 0x00A0, 0x00A0,
+ 0x1680, 0x1680,
+ 0x2000, 0x200B,
+ 0x202F, 0x202F,
+ 0x205F, 0x205F,
+ 0x3000, 0x3000
+};
+
+/*
+ * B.1 Commonly mapped to nothing
+ *
+ * If any of these appear in the input, they are removed.
+ */
+static const pg_wchar commonly_mapped_to_nothing_ranges[] =
+{
+ 0x00AD, 0x00AD,
+ 0x034F, 0x034F,
+ 0x1806, 0x1806,
+ 0x180B, 0x180D,
+ 0x200B, 0x200D,
+ 0x2060, 0x2060,
+ 0xFE00, 0xFE0F,
+ 0xFEFF, 0xFEFF
+};
+
+/*
+ * prohibited_output_ranges is a union of all the characters from
+ * the following tables:
+ *
+ * C.1.2 Non-ASCII space characters
+ * C.2.1 ASCII control characters
+ * C.2.2 Non-ASCII control characters
+ * C.3 Private Use characters
+ * C.4 Non-character code points
+ * C.5 Surrogate code points
+ * C.6 Inappropriate for plain text characters
+ * C.7 Inappropriate for canonical representation characters
+ * C.7 Change display properties or deprecated characters
+ * C.8 Tagging characters
+ *
+ * These are the tables that are listed as "prohibited output"
+ * characters in the SASLprep profile.
+ *
+ * The comment after each code range indicates which source table
+ * the code came from. Note that there is some overlap in the source
+ * tables, so one code might originate from multiple source tables.
+ * Adjacent ranges have also been merged together, to save space.
+ */
+static const pg_wchar prohibited_output_ranges[] =
+{
+ 0x0000, 0x001F, /* C.2.1 */
+ 0x007F, 0x00A0, /* C.1.2, C.2.1, C.2.2 */
+ 0x0340, 0x0341, /* C.8 */
+ 0x06DD, 0x06DD, /* C.2.2 */
+ 0x070F, 0x070F, /* C.2.2 */
+ 0x1680, 0x1680, /* C.1.2 */
+ 0x180E, 0x180E, /* C.2.2 */
+ 0x2000, 0x200F, /* C.1.2, C.2.2, C.8 */
+ 0x2028, 0x202F, /* C.1.2, C.2.2, C.8 */
+ 0x205F, 0x2063, /* C.1.2, C.2.2 */
+ 0x206A, 0x206F, /* C.2.2, C.8 */
+ 0x2FF0, 0x2FFB, /* C.7 */
+ 0x3000, 0x3000, /* C.1.2 */
+ 0xD800, 0xF8FF, /* C.3, C.5 */
+ 0xFDD0, 0xFDEF, /* C.4 */
+ 0xFEFF, 0xFEFF, /* C.2.2 */
+ 0xFFF9, 0xFFFF, /* C.2.2, C.4, C.6 */
+ 0x1D173, 0x1D17A, /* C.2.2 */
+ 0x1FFFE, 0x1FFFF, /* C.4 */
+ 0x2FFFE, 0x2FFFF, /* C.4 */
+ 0x3FFFE, 0x3FFFF, /* C.4 */
+ 0x4FFFE, 0x4FFFF, /* C.4 */
+ 0x5FFFE, 0x5FFFF, /* C.4 */
+ 0x6FFFE, 0x6FFFF, /* C.4 */
+ 0x7FFFE, 0x7FFFF, /* C.4 */
+ 0x8FFFE, 0x8FFFF, /* C.4 */
+ 0x9FFFE, 0x9FFFF, /* C.4 */
+ 0xAFFFE, 0xAFFFF, /* C.4 */
+ 0xBFFFE, 0xBFFFF, /* C.4 */
+ 0xCFFFE, 0xCFFFF, /* C.4 */
+ 0xDFFFE, 0xDFFFF, /* C.4 */
+ 0xE0001, 0xE0001, /* C.9 */
+ 0xE0020, 0xE007F, /* C.9 */
+ 0xEFFFE, 0xEFFFF, /* C.4 */
+ 0xF0000, 0xFFFFF, /* C.3, C.4 */
+ 0x100000, 0x10FFFF /* C.3, C.4 */
+};
+
+/* A.1 Unassigned code points in Unicode 3.2 */
+static const pg_wchar unassigned_codepoint_ranges[] =
+{
+ 0x0221, 0x0221,
+ 0x0234, 0x024F,
+ 0x02AE, 0x02AF,
+ 0x02EF, 0x02FF,
+ 0x0350, 0x035F,
+ 0x0370, 0x0373,
+ 0x0376, 0x0379,
+ 0x037B, 0x037D,
+ 0x037F, 0x0383,
+ 0x038B, 0x038B,
+ 0x038D, 0x038D,
+ 0x03A2, 0x03A2,
+ 0x03CF, 0x03CF,
+ 0x03F7, 0x03FF,
+ 0x0487, 0x0487,
+ 0x04CF, 0x04CF,
+ 0x04F6, 0x04F7,
+ 0x04FA, 0x04FF,
+ 0x0510, 0x0530,
+ 0x0557, 0x0558,
+ 0x0560, 0x0560,
+ 0x0588, 0x0588,
+ 0x058B, 0x0590,
+ 0x05A2, 0x05A2,
+ 0x05BA, 0x05BA,
+ 0x05C5, 0x05CF,
+ 0x05EB, 0x05EF,
+ 0x05F5, 0x060B,
+ 0x060D, 0x061A,
+ 0x061C, 0x061E,
+ 0x0620, 0x0620,
+ 0x063B, 0x063F,
+ 0x0656, 0x065F,
+ 0x06EE, 0x06EF,
+ 0x06FF, 0x06FF,
+ 0x070E, 0x070E,
+ 0x072D, 0x072F,
+ 0x074B, 0x077F,
+ 0x07B2, 0x0900,
+ 0x0904, 0x0904,
+ 0x093A, 0x093B,
+ 0x094E, 0x094F,
+ 0x0955, 0x0957,
+ 0x0971, 0x0980,
+ 0x0984, 0x0984,
+ 0x098D, 0x098E,
+ 0x0991, 0x0992,
+ 0x09A9, 0x09A9,
+ 0x09B1, 0x09B1,
+ 0x09B3, 0x09B5,
+ 0x09BA, 0x09BB,
+ 0x09BD, 0x09BD,
+ 0x09C5, 0x09C6,
+ 0x09C9, 0x09CA,
+ 0x09CE, 0x09D6,
+ 0x09D8, 0x09DB,
+ 0x09DE, 0x09DE,
+ 0x09E4, 0x09E5,
+ 0x09FB, 0x0A01,
+ 0x0A03, 0x0A04,
+ 0x0A0B, 0x0A0E,
+ 0x0A11, 0x0A12,
+ 0x0A29, 0x0A29,
+ 0x0A31, 0x0A31,
+ 0x0A34, 0x0A34,
+ 0x0A37, 0x0A37,
+ 0x0A3A, 0x0A3B,
+ 0x0A3D, 0x0A3D,
+ 0x0A43, 0x0A46,
+ 0x0A49, 0x0A4A,
+ 0x0A4E, 0x0A58,
+ 0x0A5D, 0x0A5D,
+ 0x0A5F, 0x0A65,
+ 0x0A75, 0x0A80,
+ 0x0A84, 0x0A84,
+ 0x0A8C, 0x0A8C,
+ 0x0A8E, 0x0A8E,
+ 0x0A92, 0x0A92,
+ 0x0AA9, 0x0AA9,
+ 0x0AB1, 0x0AB1,
+ 0x0AB4, 0x0AB4,
+ 0x0ABA, 0x0ABB,
+ 0x0AC6, 0x0AC6,
+ 0x0ACA, 0x0ACA,
+ 0x0ACE, 0x0ACF,
+ 0x0AD1, 0x0ADF,
+ 0x0AE1, 0x0AE5,
+ 0x0AF0, 0x0B00,
+ 0x0B04, 0x0B04,
+ 0x0B0D, 0x0B0E,
+ 0x0B11, 0x0B12,
+ 0x0B29, 0x0B29,
+ 0x0B31, 0x0B31,
+ 0x0B34, 0x0B35,
+ 0x0B3A, 0x0B3B,
+ 0x0B44, 0x0B46,
+ 0x0B49, 0x0B4A,
+ 0x0B4E, 0x0B55,
+ 0x0B58, 0x0B5B,
+ 0x0B5E, 0x0B5E,
+ 0x0B62, 0x0B65,
+ 0x0B71, 0x0B81,
+ 0x0B84, 0x0B84,
+ 0x0B8B, 0x0B8D,
+ 0x0B91, 0x0B91,
+ 0x0B96, 0x0B98,
+ 0x0B9B, 0x0B9B,
+ 0x0B9D, 0x0B9D,
+ 0x0BA0, 0x0BA2,
+ 0x0BA5, 0x0BA7,
+ 0x0BAB, 0x0BAD,
+ 0x0BB6, 0x0BB6,
+ 0x0BBA, 0x0BBD,
+ 0x0BC3, 0x0BC5,
+ 0x0BC9, 0x0BC9,
+ 0x0BCE, 0x0BD6,
+ 0x0BD8, 0x0BE6,
+ 0x0BF3, 0x0C00,
+ 0x0C04, 0x0C04,
+ 0x0C0D, 0x0C0D,
+ 0x0C11, 0x0C11,
+ 0x0C29, 0x0C29,
+ 0x0C34, 0x0C34,
+ 0x0C3A, 0x0C3D,
+ 0x0C45, 0x0C45,
+ 0x0C49, 0x0C49,
+ 0x0C4E, 0x0C54,
+ 0x0C57, 0x0C5F,
+ 0x0C62, 0x0C65,
+ 0x0C70, 0x0C81,
+ 0x0C84, 0x0C84,
+ 0x0C8D, 0x0C8D,
+ 0x0C91, 0x0C91,
+ 0x0CA9, 0x0CA9,
+ 0x0CB4, 0x0CB4,
+ 0x0CBA, 0x0CBD,
+ 0x0CC5, 0x0CC5,
+ 0x0CC9, 0x0CC9,
+ 0x0CCE, 0x0CD4,
+ 0x0CD7, 0x0CDD,
+ 0x0CDF, 0x0CDF,
+ 0x0CE2, 0x0CE5,
+ 0x0CF0, 0x0D01,
+ 0x0D04, 0x0D04,
+ 0x0D0D, 0x0D0D,
+ 0x0D11, 0x0D11,
+ 0x0D29, 0x0D29,
+ 0x0D3A, 0x0D3D,
+ 0x0D44, 0x0D45,
+ 0x0D49, 0x0D49,
+ 0x0D4E, 0x0D56,
+ 0x0D58, 0x0D5F,
+ 0x0D62, 0x0D65,
+ 0x0D70, 0x0D81,
+ 0x0D84, 0x0D84,
+ 0x0D97, 0x0D99,
+ 0x0DB2, 0x0DB2,
+ 0x0DBC, 0x0DBC,
+ 0x0DBE, 0x0DBF,
+ 0x0DC7, 0x0DC9,
+ 0x0DCB, 0x0DCE,
+ 0x0DD5, 0x0DD5,
+ 0x0DD7, 0x0DD7,
+ 0x0DE0, 0x0DF1,
+ 0x0DF5, 0x0E00,
+ 0x0E3B, 0x0E3E,
+ 0x0E5C, 0x0E80,
+ 0x0E83, 0x0E83,
+ 0x0E85, 0x0E86,
+ 0x0E89, 0x0E89,
+ 0x0E8B, 0x0E8C,
+ 0x0E8E, 0x0E93,
+ 0x0E98, 0x0E98,
+ 0x0EA0, 0x0EA0,
+ 0x0EA4, 0x0EA4,
+ 0x0EA6, 0x0EA6,
+ 0x0EA8, 0x0EA9,
+ 0x0EAC, 0x0EAC,
+ 0x0EBA, 0x0EBA,
+ 0x0EBE, 0x0EBF,
+ 0x0EC5, 0x0EC5,
+ 0x0EC7, 0x0EC7,
+ 0x0ECE, 0x0ECF,
+ 0x0EDA, 0x0EDB,
+ 0x0EDE, 0x0EFF,
+ 0x0F48, 0x0F48,
+ 0x0F6B, 0x0F70,
+ 0x0F8C, 0x0F8F,
+ 0x0F98, 0x0F98,
+ 0x0FBD, 0x0FBD,
+ 0x0FCD, 0x0FCE,
+ 0x0FD0, 0x0FFF,
+ 0x1022, 0x1022,
+ 0x1028, 0x1028,
+ 0x102B, 0x102B,
+ 0x1033, 0x1035,
+ 0x103A, 0x103F,
+ 0x105A, 0x109F,
+ 0x10C6, 0x10CF,
+ 0x10F9, 0x10FA,
+ 0x10FC, 0x10FF,
+ 0x115A, 0x115E,
+ 0x11A3, 0x11A7,
+ 0x11FA, 0x11FF,
+ 0x1207, 0x1207,
+ 0x1247, 0x1247,
+ 0x1249, 0x1249,
+ 0x124E, 0x124F,
+ 0x1257, 0x1257,
+ 0x1259, 0x1259,
+ 0x125E, 0x125F,
+ 0x1287, 0x1287,
+ 0x1289, 0x1289,
+ 0x128E, 0x128F,
+ 0x12AF, 0x12AF,
+ 0x12B1, 0x12B1,
+ 0x12B6, 0x12B7,
+ 0x12BF, 0x12BF,
+ 0x12C1, 0x12C1,
+ 0x12C6, 0x12C7,
+ 0x12CF, 0x12CF,
+ 0x12D7, 0x12D7,
+ 0x12EF, 0x12EF,
+ 0x130F, 0x130F,
+ 0x1311, 0x1311,
+ 0x1316, 0x1317,
+ 0x131F, 0x131F,
+ 0x1347, 0x1347,
+ 0x135B, 0x1360,
+ 0x137D, 0x139F,
+ 0x13F5, 0x1400,
+ 0x1677, 0x167F,
+ 0x169D, 0x169F,
+ 0x16F1, 0x16FF,
+ 0x170D, 0x170D,
+ 0x1715, 0x171F,
+ 0x1737, 0x173F,
+ 0x1754, 0x175F,
+ 0x176D, 0x176D,
+ 0x1771, 0x1771,
+ 0x1774, 0x177F,
+ 0x17DD, 0x17DF,
+ 0x17EA, 0x17FF,
+ 0x180F, 0x180F,
+ 0x181A, 0x181F,
+ 0x1878, 0x187F,
+ 0x18AA, 0x1DFF,
+ 0x1E9C, 0x1E9F,
+ 0x1EFA, 0x1EFF,
+ 0x1F16, 0x1F17,
+ 0x1F1E, 0x1F1F,
+ 0x1F46, 0x1F47,
+ 0x1F4E, 0x1F4F,
+ 0x1F58, 0x1F58,
+ 0x1F5A, 0x1F5A,
+ 0x1F5C, 0x1F5C,
+ 0x1F5E, 0x1F5E,
+ 0x1F7E, 0x1F7F,
+ 0x1FB5, 0x1FB5,
+ 0x1FC5, 0x1FC5,
+ 0x1FD4, 0x1FD5,
+ 0x1FDC, 0x1FDC,
+ 0x1FF0, 0x1FF1,
+ 0x1FF5, 0x1FF5,
+ 0x1FFF, 0x1FFF,
+ 0x2053, 0x2056,
+ 0x2058, 0x205E,
+ 0x2064, 0x2069,
+ 0x2072, 0x2073,
+ 0x208F, 0x209F,
+ 0x20B2, 0x20CF,
+ 0x20EB, 0x20FF,
+ 0x213B, 0x213C,
+ 0x214C, 0x2152,
+ 0x2184, 0x218F,
+ 0x23CF, 0x23FF,
+ 0x2427, 0x243F,
+ 0x244B, 0x245F,
+ 0x24FF, 0x24FF,
+ 0x2614, 0x2615,
+ 0x2618, 0x2618,
+ 0x267E, 0x267F,
+ 0x268A, 0x2700,
+ 0x2705, 0x2705,
+ 0x270A, 0x270B,
+ 0x2728, 0x2728,
+ 0x274C, 0x274C,
+ 0x274E, 0x274E,
+ 0x2753, 0x2755,
+ 0x2757, 0x2757,
+ 0x275F, 0x2760,
+ 0x2795, 0x2797,
+ 0x27B0, 0x27B0,
+ 0x27BF, 0x27CF,
+ 0x27EC, 0x27EF,
+ 0x2B00, 0x2E7F,
+ 0x2E9A, 0x2E9A,
+ 0x2EF4, 0x2EFF,
+ 0x2FD6, 0x2FEF,
+ 0x2FFC, 0x2FFF,
+ 0x3040, 0x3040,
+ 0x3097, 0x3098,
+ 0x3100, 0x3104,
+ 0x312D, 0x3130,
+ 0x318F, 0x318F,
+ 0x31B8, 0x31EF,
+ 0x321D, 0x321F,
+ 0x3244, 0x3250,
+ 0x327C, 0x327E,
+ 0x32CC, 0x32CF,
+ 0x32FF, 0x32FF,
+ 0x3377, 0x337A,
+ 0x33DE, 0x33DF,
+ 0x33FF, 0x33FF,
+ 0x4DB6, 0x4DFF,
+ 0x9FA6, 0x9FFF,
+ 0xA48D, 0xA48F,
+ 0xA4C7, 0xABFF,
+ 0xD7A4, 0xD7FF,
+ 0xFA2E, 0xFA2F,
+ 0xFA6B, 0xFAFF,
+ 0xFB07, 0xFB12,
+ 0xFB18, 0xFB1C,
+ 0xFB37, 0xFB37,
+ 0xFB3D, 0xFB3D,
+ 0xFB3F, 0xFB3F,
+ 0xFB42, 0xFB42,
+ 0xFB45, 0xFB45,
+ 0xFBB2, 0xFBD2,
+ 0xFD40, 0xFD4F,
+ 0xFD90, 0xFD91,
+ 0xFDC8, 0xFDCF,
+ 0xFDFD, 0xFDFF,
+ 0xFE10, 0xFE1F,
+ 0xFE24, 0xFE2F,
+ 0xFE47, 0xFE48,
+ 0xFE53, 0xFE53,
+ 0xFE67, 0xFE67,
+ 0xFE6C, 0xFE6F,
+ 0xFE75, 0xFE75,
+ 0xFEFD, 0xFEFE,
+ 0xFF00, 0xFF00,
+ 0xFFBF, 0xFFC1,
+ 0xFFC8, 0xFFC9,
+ 0xFFD0, 0xFFD1,
+ 0xFFD8, 0xFFD9,
+ 0xFFDD, 0xFFDF,
+ 0xFFE7, 0xFFE7,
+ 0xFFEF, 0xFFF8,
+ 0x10000, 0x102FF,
+ 0x1031F, 0x1031F,
+ 0x10324, 0x1032F,
+ 0x1034B, 0x103FF,
+ 0x10426, 0x10427,
+ 0x1044E, 0x1CFFF,
+ 0x1D0F6, 0x1D0FF,
+ 0x1D127, 0x1D129,
+ 0x1D1DE, 0x1D3FF,
+ 0x1D455, 0x1D455,
+ 0x1D49D, 0x1D49D,
+ 0x1D4A0, 0x1D4A1,
+ 0x1D4A3, 0x1D4A4,
+ 0x1D4A7, 0x1D4A8,
+ 0x1D4AD, 0x1D4AD,
+ 0x1D4BA, 0x1D4BA,
+ 0x1D4BC, 0x1D4BC,
+ 0x1D4C1, 0x1D4C1,
+ 0x1D4C4, 0x1D4C4,
+ 0x1D506, 0x1D506,
+ 0x1D50B, 0x1D50C,
+ 0x1D515, 0x1D515,
+ 0x1D51D, 0x1D51D,
+ 0x1D53A, 0x1D53A,
+ 0x1D53F, 0x1D53F,
+ 0x1D545, 0x1D545,
+ 0x1D547, 0x1D549,
+ 0x1D551, 0x1D551,
+ 0x1D6A4, 0x1D6A7,
+ 0x1D7CA, 0x1D7CD,
+ 0x1D800, 0x1FFFD,
+ 0x2A6D7, 0x2F7FF,
+ 0x2FA1E, 0x2FFFD,
+ 0x30000, 0x3FFFD,
+ 0x40000, 0x4FFFD,
+ 0x50000, 0x5FFFD,
+ 0x60000, 0x6FFFD,
+ 0x70000, 0x7FFFD,
+ 0x80000, 0x8FFFD,
+ 0x90000, 0x9FFFD,
+ 0xA0000, 0xAFFFD,
+ 0xB0000, 0xBFFFD,
+ 0xC0000, 0xCFFFD,
+ 0xD0000, 0xDFFFD,
+ 0xE0000, 0xE0000,
+ 0xE0002, 0xE001F,
+ 0xE0080, 0xEFFFD
+};
+
+/* D.1 Characters with bidirectional property "R" or "AL" */
+static const pg_wchar RandALCat_codepoint_ranges[] =
+{
+ 0x05BE, 0x05BE,
+ 0x05C0, 0x05C0,
+ 0x05C3, 0x05C3,
+ 0x05D0, 0x05EA,
+ 0x05F0, 0x05F4,
+ 0x061B, 0x061B,
+ 0x061F, 0x061F,
+ 0x0621, 0x063A,
+ 0x0640, 0x064A,
+ 0x066D, 0x066F,
+ 0x0671, 0x06D5,
+ 0x06DD, 0x06DD,
+ 0x06E5, 0x06E6,
+ 0x06FA, 0x06FE,
+ 0x0700, 0x070D,
+ 0x0710, 0x0710,
+ 0x0712, 0x072C,
+ 0x0780, 0x07A5,
+ 0x07B1, 0x07B1,
+ 0x200F, 0x200F,
+ 0xFB1D, 0xFB1D,
+ 0xFB1F, 0xFB28,
+ 0xFB2A, 0xFB36,
+ 0xFB38, 0xFB3C,
+ 0xFB3E, 0xFB3E,
+ 0xFB40, 0xFB41,
+ 0xFB43, 0xFB44,
+ 0xFB46, 0xFBB1,
+ 0xFBD3, 0xFD3D,
+ 0xFD50, 0xFD8F,
+ 0xFD92, 0xFDC7,
+ 0xFDF0, 0xFDFC,
+ 0xFE70, 0xFE74,
+ 0xFE76, 0xFEFC
+};
+
+/* D.2 Characters with bidirectional property "L" */
+static const pg_wchar LCat_codepoint_ranges[] =
+{
+ 0x0041, 0x005A,
+ 0x0061, 0x007A,
+ 0x00AA, 0x00AA,
+ 0x00B5, 0x00B5,
+ 0x00BA, 0x00BA,
+ 0x00C0, 0x00D6,
+ 0x00D8, 0x00F6,
+ 0x00F8, 0x0220,
+ 0x0222, 0x0233,
+ 0x0250, 0x02AD,
+ 0x02B0, 0x02B8,
+ 0x02BB, 0x02C1,
+ 0x02D0, 0x02D1,
+ 0x02E0, 0x02E4,
+ 0x02EE, 0x02EE,
+ 0x037A, 0x037A,
+ 0x0386, 0x0386,
+ 0x0388, 0x038A,
+ 0x038C, 0x038C,
+ 0x038E, 0x03A1,
+ 0x03A3, 0x03CE,
+ 0x03D0, 0x03F5,
+ 0x0400, 0x0482,
+ 0x048A, 0x04CE,
+ 0x04D0, 0x04F5,
+ 0x04F8, 0x04F9,
+ 0x0500, 0x050F,
+ 0x0531, 0x0556,
+ 0x0559, 0x055F,
+ 0x0561, 0x0587,
+ 0x0589, 0x0589,
+ 0x0903, 0x0903,
+ 0x0905, 0x0939,
+ 0x093D, 0x0940,
+ 0x0949, 0x094C,
+ 0x0950, 0x0950,
+ 0x0958, 0x0961,
+ 0x0964, 0x0970,
+ 0x0982, 0x0983,
+ 0x0985, 0x098C,
+ 0x098F, 0x0990,
+ 0x0993, 0x09A8,
+ 0x09AA, 0x09B0,
+ 0x09B2, 0x09B2,
+ 0x09B6, 0x09B9,
+ 0x09BE, 0x09C0,
+ 0x09C7, 0x09C8,
+ 0x09CB, 0x09CC,
+ 0x09D7, 0x09D7,
+ 0x09DC, 0x09DD,
+ 0x09DF, 0x09E1,
+ 0x09E6, 0x09F1,
+ 0x09F4, 0x09FA,
+ 0x0A05, 0x0A0A,
+ 0x0A0F, 0x0A10,
+ 0x0A13, 0x0A28,
+ 0x0A2A, 0x0A30,
+ 0x0A32, 0x0A33,
+ 0x0A35, 0x0A36,
+ 0x0A38, 0x0A39,
+ 0x0A3E, 0x0A40,
+ 0x0A59, 0x0A5C,
+ 0x0A5E, 0x0A5E,
+ 0x0A66, 0x0A6F,
+ 0x0A72, 0x0A74,
+ 0x0A83, 0x0A83,
+ 0x0A85, 0x0A8B,
+ 0x0A8D, 0x0A8D,
+ 0x0A8F, 0x0A91,
+ 0x0A93, 0x0AA8,
+ 0x0AAA, 0x0AB0,
+ 0x0AB2, 0x0AB3,
+ 0x0AB5, 0x0AB9,
+ 0x0ABD, 0x0AC0,
+ 0x0AC9, 0x0AC9,
+ 0x0ACB, 0x0ACC,
+ 0x0AD0, 0x0AD0,
+ 0x0AE0, 0x0AE0,
+ 0x0AE6, 0x0AEF,
+ 0x0B02, 0x0B03,
+ 0x0B05, 0x0B0C,
+ 0x0B0F, 0x0B10,
+ 0x0B13, 0x0B28,
+ 0x0B2A, 0x0B30,
+ 0x0B32, 0x0B33,
+ 0x0B36, 0x0B39,
+ 0x0B3D, 0x0B3E,
+ 0x0B40, 0x0B40,
+ 0x0B47, 0x0B48,
+ 0x0B4B, 0x0B4C,
+ 0x0B57, 0x0B57,
+ 0x0B5C, 0x0B5D,
+ 0x0B5F, 0x0B61,
+ 0x0B66, 0x0B70,
+ 0x0B83, 0x0B83,
+ 0x0B85, 0x0B8A,
+ 0x0B8E, 0x0B90,
+ 0x0B92, 0x0B95,
+ 0x0B99, 0x0B9A,
+ 0x0B9C, 0x0B9C,
+ 0x0B9E, 0x0B9F,
+ 0x0BA3, 0x0BA4,
+ 0x0BA8, 0x0BAA,
+ 0x0BAE, 0x0BB5,
+ 0x0BB7, 0x0BB9,
+ 0x0BBE, 0x0BBF,
+ 0x0BC1, 0x0BC2,
+ 0x0BC6, 0x0BC8,
+ 0x0BCA, 0x0BCC,
+ 0x0BD7, 0x0BD7,
+ 0x0BE7, 0x0BF2,
+ 0x0C01, 0x0C03,
+ 0x0C05, 0x0C0C,
+ 0x0C0E, 0x0C10,
+ 0x0C12, 0x0C28,
+ 0x0C2A, 0x0C33,
+ 0x0C35, 0x0C39,
+ 0x0C41, 0x0C44,
+ 0x0C60, 0x0C61,
+ 0x0C66, 0x0C6F,
+ 0x0C82, 0x0C83,
+ 0x0C85, 0x0C8C,
+ 0x0C8E, 0x0C90,
+ 0x0C92, 0x0CA8,
+ 0x0CAA, 0x0CB3,
+ 0x0CB5, 0x0CB9,
+ 0x0CBE, 0x0CBE,
+ 0x0CC0, 0x0CC4,
+ 0x0CC7, 0x0CC8,
+ 0x0CCA, 0x0CCB,
+ 0x0CD5, 0x0CD6,
+ 0x0CDE, 0x0CDE,
+ 0x0CE0, 0x0CE1,
+ 0x0CE6, 0x0CEF,
+ 0x0D02, 0x0D03,
+ 0x0D05, 0x0D0C,
+ 0x0D0E, 0x0D10,
+ 0x0D12, 0x0D28,
+ 0x0D2A, 0x0D39,
+ 0x0D3E, 0x0D40,
+ 0x0D46, 0x0D48,
+ 0x0D4A, 0x0D4C,
+ 0x0D57, 0x0D57,
+ 0x0D60, 0x0D61,
+ 0x0D66, 0x0D6F,
+ 0x0D82, 0x0D83,
+ 0x0D85, 0x0D96,
+ 0x0D9A, 0x0DB1,
+ 0x0DB3, 0x0DBB,
+ 0x0DBD, 0x0DBD,
+ 0x0DC0, 0x0DC6,
+ 0x0DCF, 0x0DD1,
+ 0x0DD8, 0x0DDF,
+ 0x0DF2, 0x0DF4,
+ 0x0E01, 0x0E30,
+ 0x0E32, 0x0E33,
+ 0x0E40, 0x0E46,
+ 0x0E4F, 0x0E5B,
+ 0x0E81, 0x0E82,
+ 0x0E84, 0x0E84,
+ 0x0E87, 0x0E88,
+ 0x0E8A, 0x0E8A,
+ 0x0E8D, 0x0E8D,
+ 0x0E94, 0x0E97,
+ 0x0E99, 0x0E9F,
+ 0x0EA1, 0x0EA3,
+ 0x0EA5, 0x0EA5,
+ 0x0EA7, 0x0EA7,
+ 0x0EAA, 0x0EAB,
+ 0x0EAD, 0x0EB0,
+ 0x0EB2, 0x0EB3,
+ 0x0EBD, 0x0EBD,
+ 0x0EC0, 0x0EC4,
+ 0x0EC6, 0x0EC6,
+ 0x0ED0, 0x0ED9,
+ 0x0EDC, 0x0EDD,
+ 0x0F00, 0x0F17,
+ 0x0F1A, 0x0F34,
+ 0x0F36, 0x0F36,
+ 0x0F38, 0x0F38,
+ 0x0F3E, 0x0F47,
+ 0x0F49, 0x0F6A,
+ 0x0F7F, 0x0F7F,
+ 0x0F85, 0x0F85,
+ 0x0F88, 0x0F8B,
+ 0x0FBE, 0x0FC5,
+ 0x0FC7, 0x0FCC,
+ 0x0FCF, 0x0FCF,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102A,
+ 0x102C, 0x102C,
+ 0x1031, 0x1031,
+ 0x1038, 0x1038,
+ 0x1040, 0x1057,
+ 0x10A0, 0x10C5,
+ 0x10D0, 0x10F8,
+ 0x10FB, 0x10FB,
+ 0x1100, 0x1159,
+ 0x115F, 0x11A2,
+ 0x11A8, 0x11F9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124A, 0x124D,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125A, 0x125D,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128A, 0x128D,
+ 0x1290, 0x12AE,
+ 0x12B0, 0x12B0,
+ 0x12B2, 0x12B5,
+ 0x12B8, 0x12BE,
+ 0x12C0, 0x12C0,
+ 0x12C2, 0x12C5,
+ 0x12C8, 0x12CE,
+ 0x12D0, 0x12D6,
+ 0x12D8, 0x12EE,
+ 0x12F0, 0x130E,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131E,
+ 0x1320, 0x1346,
+ 0x1348, 0x135A,
+ 0x1361, 0x137C,
+ 0x13A0, 0x13F4,
+ 0x1401, 0x1676,
+ 0x1681, 0x169A,
+ 0x16A0, 0x16F0,
+ 0x1700, 0x170C,
+ 0x170E, 0x1711,
+ 0x1720, 0x1731,
+ 0x1735, 0x1736,
+ 0x1740, 0x1751,
+ 0x1760, 0x176C,
+ 0x176E, 0x1770,
+ 0x1780, 0x17B6,
+ 0x17BE, 0x17C5,
+ 0x17C7, 0x17C8,
+ 0x17D4, 0x17DA,
+ 0x17DC, 0x17DC,
+ 0x17E0, 0x17E9,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18A8,
+ 0x1E00, 0x1E9B,
+ 0x1EA0, 0x1EF9,
+ 0x1F00, 0x1F15,
+ 0x1F18, 0x1F1D,
+ 0x1F20, 0x1F45,
+ 0x1F48, 0x1F4D,
+ 0x1F50, 0x1F57,
+ 0x1F59, 0x1F59,
+ 0x1F5B, 0x1F5B,
+ 0x1F5D, 0x1F5D,
+ 0x1F5F, 0x1F7D,
+ 0x1F80, 0x1FB4,
+ 0x1FB6, 0x1FBC,
+ 0x1FBE, 0x1FBE,
+ 0x1FC2, 0x1FC4,
+ 0x1FC6, 0x1FCC,
+ 0x1FD0, 0x1FD3,
+ 0x1FD6, 0x1FDB,
+ 0x1FE0, 0x1FEC,
+ 0x1FF2, 0x1FF4,
+ 0x1FF6, 0x1FFC,
+ 0x200E, 0x200E,
+ 0x2071, 0x2071,
+ 0x207F, 0x207F,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210A, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211D,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212A, 0x212D,
+ 0x212F, 0x2131,
+ 0x2133, 0x2139,
+ 0x213D, 0x213F,
+ 0x2145, 0x2149,
+ 0x2160, 0x2183,
+ 0x2336, 0x237A,
+ 0x2395, 0x2395,
+ 0x249C, 0x24E9,
+ 0x3005, 0x3007,
+ 0x3021, 0x3029,
+ 0x3031, 0x3035,
+ 0x3038, 0x303C,
+ 0x3041, 0x3096,
+ 0x309D, 0x309F,
+ 0x30A1, 0x30FA,
+ 0x30FC, 0x30FF,
+ 0x3105, 0x312C,
+ 0x3131, 0x318E,
+ 0x3190, 0x31B7,
+ 0x31F0, 0x321C,
+ 0x3220, 0x3243,
+ 0x3260, 0x327B,
+ 0x327F, 0x32B0,
+ 0x32C0, 0x32CB,
+ 0x32D0, 0x32FE,
+ 0x3300, 0x3376,
+ 0x337B, 0x33DD,
+ 0x33E0, 0x33FE,
+ 0x3400, 0x4DB5,
+ 0x4E00, 0x9FA5,
+ 0xA000, 0xA48C,
+ 0xAC00, 0xD7A3,
+ 0xD800, 0xFA2D,
+ 0xFA30, 0xFA6A,
+ 0xFB00, 0xFB06,
+ 0xFB13, 0xFB17,
+ 0xFF21, 0xFF3A,
+ 0xFF41, 0xFF5A,
+ 0xFF66, 0xFFBE,
+ 0xFFC2, 0xFFC7,
+ 0xFFCA, 0xFFCF,
+ 0xFFD2, 0xFFD7,
+ 0xFFDA, 0xFFDC,
+ 0x10300, 0x1031E,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034A,
+ 0x10400, 0x10425,
+ 0x10428, 0x1044D,
+ 0x1D000, 0x1D0F5,
+ 0x1D100, 0x1D126,
+ 0x1D12A, 0x1D166,
+ 0x1D16A, 0x1D172,
+ 0x1D183, 0x1D184,
+ 0x1D18C, 0x1D1A9,
+ 0x1D1AE, 0x1D1DD,
+ 0x1D400, 0x1D454,
+ 0x1D456, 0x1D49C,
+ 0x1D49E, 0x1D49F,
+ 0x1D4A2, 0x1D4A2,
+ 0x1D4A5, 0x1D4A6,
+ 0x1D4A9, 0x1D4AC,
+ 0x1D4AE, 0x1D4B9,
+ 0x1D4BB, 0x1D4BB,
+ 0x1D4BD, 0x1D4C0,
+ 0x1D4C2, 0x1D4C3,
+ 0x1D4C5, 0x1D505,
+ 0x1D507, 0x1D50A,
+ 0x1D50D, 0x1D514,
+ 0x1D516, 0x1D51C,
+ 0x1D51E, 0x1D539,
+ 0x1D53B, 0x1D53E,
+ 0x1D540, 0x1D544,
+ 0x1D546, 0x1D546,
+ 0x1D54A, 0x1D550,
+ 0x1D552, 0x1D6A3,
+ 0x1D6A8, 0x1D7C9,
+ 0x20000, 0x2A6D6,
+ 0x2F800, 0x2FA1D,
+ 0xF0000, 0xFFFFD,
+ 0x100000, 0x10FFFD
+};
+
+/* End of stringprep tables */
+
+
+/* Is the given Unicode codepoint in the given table of ranges? */
+#define IS_CODE_IN_TABLE(code, map) is_code_in_table(code, map, lengthof(map))
+
+static int
+codepoint_range_cmp(const void *a, const void *b)
+{
+ const pg_wchar *key = (const pg_wchar *) a;
+ const pg_wchar *range = (const pg_wchar *) b;
+
+ if (*key < range[0])
+ return -1; /* less than lower bound */
+ if (*key > range[1])
+ return 1; /* greater than upper bound */
+
+ return 0; /* within range */
+}
+
+static bool
+is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize)
+{
+ Assert(mapsize % 2 == 0);
+
+ if (code < map[0] || code > map[mapsize - 1])
+ return false;
+
+ if (bsearch(&code, map, mapsize / 2, sizeof(pg_wchar) * 2,
+ codepoint_range_cmp))
+ return true;
+ else
+ return false;
+}
+
+/*
+ * Calculate the length in characters of a null-terminated UTF-8 string.
+ *
+ * Returns -1 if the input is not valid UTF-8.
+ */
+static int
+pg_utf8_string_len(const char *source)
+{
+ const unsigned char *p = (const unsigned char *) source;
+ int l;
+ int num_chars = 0;
+
+ while (*p)
+ {
+ l = pg_utf_mblen(p);
+
+ if (!pg_utf8_islegal(p, l))
+ return -1;
+
+ p += l;
+ num_chars++;
+ }
+
+ return num_chars;
+}
+
+
+/*
+ * pg_saslprep - Normalize a password with SASLprep.
+ *
+ * SASLprep requires the input to be in UTF-8 encoding, but PostgreSQL
+ * supports many encodings, so we don't blindly assume that. pg_saslprep
+ * will check if the input looks like valid UTF-8, and returns
+ * SASLPREP_INVALID_UTF8 if not.
+ *
+ * If the string contains prohibited characters (or more precisely, if the
+ * output string would contain prohibited characters after normalization),
+ * returns SASLPREP_PROHIBITED.
+ *
+ * On success, returns SASLPREP_SUCCESS, and the normalized string in
+ * *output.
+ *
+ * In frontend, the normalized string is malloc'd, and the caller is
+ * responsible for freeing it. If an allocation fails, returns
+ * SASLPREP_OOM. In backend, the normalized string is palloc'd instead,
+ * and a failed allocation leads to ereport(ERROR).
+ */
+pg_saslprep_rc
+pg_saslprep(const char *input, char **output)
+{
+ pg_wchar *input_chars = NULL;
+ pg_wchar *output_chars = NULL;
+ int input_size;
+ char *result;
+ int result_size;
+ int count;
+ int i;
+ bool contains_RandALCat;
+ unsigned char *p;
+ pg_wchar *wp;
+
+ /* Ensure we return *output as NULL on failure */
+ *output = NULL;
+
+ /*
+ * Quick check if the input is pure ASCII. An ASCII string requires no
+ * further processing.
+ */
+ if (pg_is_ascii(input))
+ {
+ *output = STRDUP(input);
+ if (!(*output))
+ goto oom;
+ return SASLPREP_SUCCESS;
+ }
+
+ /*
+ * Convert the input from UTF-8 to an array of Unicode codepoints.
+ *
+ * This also checks that the input is a legal UTF-8 string.
+ */
+ input_size = pg_utf8_string_len(input);
+ if (input_size < 0)
+ return SASLPREP_INVALID_UTF8;
+
+ input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar));
+ if (!input_chars)
+ goto oom;
+
+ p = (unsigned char *) input;
+ for (i = 0; i < input_size; i++)
+ {
+ input_chars[i] = utf8_to_unicode(p);
+ p += pg_utf_mblen(p);
+ }
+ input_chars[i] = (pg_wchar) '\0';
+
+ /*
+ * The steps below correspond to the steps listed in [RFC3454], Section
+ * "2. Preparation Overview"
+ */
+
+ /*
+ * 1) Map -- For each character in the input, check if it has a mapping
+ * and, if so, replace it with its mapping.
+ */
+ count = 0;
+ for (i = 0; i < input_size; i++)
+ {
+ pg_wchar code = input_chars[i];
+
+ if (IS_CODE_IN_TABLE(code, non_ascii_space_ranges))
+ input_chars[count++] = 0x0020;
+ else if (IS_CODE_IN_TABLE(code, commonly_mapped_to_nothing_ranges))
+ {
+ /* map to nothing */
+ }
+ else
+ input_chars[count++] = code;
+ }
+ input_chars[count] = (pg_wchar) '\0';
+ input_size = count;
+
+ if (input_size == 0)
+ goto prohibited; /* don't allow empty password */
+
+ /*
+ * 2) Normalize -- Normalize the result of step 1 using Unicode
+ * normalization.
+ */
+ output_chars = unicode_normalize(UNICODE_NFKC, input_chars);
+ if (!output_chars)
+ goto oom;
+
+ /*
+ * 3) Prohibit -- Check for any characters that are not allowed in the
+ * output. If any are found, return an error.
+ */
+ for (i = 0; i < input_size; i++)
+ {
+ pg_wchar code = input_chars[i];
+
+ if (IS_CODE_IN_TABLE(code, prohibited_output_ranges))
+ goto prohibited;
+ if (IS_CODE_IN_TABLE(code, unassigned_codepoint_ranges))
+ goto prohibited;
+ }
+
+ /*
+ * 4) Check bidi -- Possibly check for right-to-left characters, and if
+ * any are found, make sure that the whole string satisfies the
+ * requirements for bidirectional strings. If the string does not satisfy
+ * the requirements for bidirectional strings, return an error.
+ *
+ * [RFC3454], Section "6. Bidirectional Characters" explains in more
+ * detail what that means:
+ *
+ * "In any profile that specifies bidirectional character handling, all
+ * three of the following requirements MUST be met:
+ *
+ * 1) The characters in section 5.8 MUST be prohibited.
+ *
+ * 2) If a string contains any RandALCat character, the string MUST NOT
+ * contain any LCat character.
+ *
+ * 3) If a string contains any RandALCat character, a RandALCat character
+ * MUST be the first character of the string, and a RandALCat character
+ * MUST be the last character of the string."
+ */
+ contains_RandALCat = false;
+ for (i = 0; i < input_size; i++)
+ {
+ pg_wchar code = input_chars[i];
+
+ if (IS_CODE_IN_TABLE(code, RandALCat_codepoint_ranges))
+ {
+ contains_RandALCat = true;
+ break;
+ }
+ }
+
+ if (contains_RandALCat)
+ {
+ pg_wchar first = input_chars[0];
+ pg_wchar last = input_chars[input_size - 1];
+
+ for (i = 0; i < input_size; i++)
+ {
+ pg_wchar code = input_chars[i];
+
+ if (IS_CODE_IN_TABLE(code, LCat_codepoint_ranges))
+ goto prohibited;
+ }
+
+ if (!IS_CODE_IN_TABLE(first, RandALCat_codepoint_ranges) ||
+ !IS_CODE_IN_TABLE(last, RandALCat_codepoint_ranges))
+ goto prohibited;
+ }
+
+ /*
+ * Finally, convert the result back to UTF-8.
+ */
+ result_size = 0;
+ for (wp = output_chars; *wp; wp++)
+ {
+ unsigned char buf[4];
+
+ unicode_to_utf8(*wp, buf);
+ result_size += pg_utf_mblen(buf);
+ }
+
+ result = ALLOC(result_size + 1);
+ if (!result)
+ goto oom;
+
+ /*
+ * There are no error exits below here, so the error exit paths don't need
+ * to worry about possibly freeing "result".
+ */
+ p = (unsigned char *) result;
+ for (wp = output_chars; *wp; wp++)
+ {
+ unicode_to_utf8(*wp, p);
+ p += pg_utf_mblen(p);
+ }
+ Assert((char *) p == result + result_size);
+ *p = '\0';
+
+ FREE(input_chars);
+ FREE(output_chars);
+
+ *output = result;
+ return SASLPREP_SUCCESS;
+
+prohibited:
+ if (input_chars)
+ FREE(input_chars);
+ if (output_chars)
+ FREE(output_chars);
+
+ return SASLPREP_PROHIBITED;
+
+oom:
+ if (input_chars)
+ FREE(input_chars);
+ if (output_chars)
+ FREE(output_chars);
+
+ return SASLPREP_OOM;
+}
diff --git a/src/common/scram-common.c b/src/common/scram-common.c
new file mode 100644
index 0000000..6448564
--- /dev/null
+++ b/src/common/scram-common.c
@@ -0,0 +1,330 @@
+/*-------------------------------------------------------------------------
+ * scram-common.c
+ * Shared frontend/backend code for SCRAM authentication
+ *
+ * This contains the common low-level functions needed in both frontend and
+ * backend, for implement the Salted Challenge Response Authentication
+ * Mechanism (SCRAM), per IETF's RFC 5802.
+ *
+ * Portions Copyright (c) 2017-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/scram-common.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/base64.h"
+#include "common/hmac.h"
+#include "common/scram-common.h"
+#ifndef FRONTEND
+#include "miscadmin.h"
+#endif
+#include "port/pg_bswap.h"
+
+/*
+ * Calculate SaltedPassword.
+ *
+ * The password should already be normalized by SASLprep. Returns 0 on
+ * success, -1 on failure with *errstr pointing to a message about the
+ * error details.
+ */
+int
+scram_SaltedPassword(const char *password,
+ pg_cryptohash_type hash_type, int key_length,
+ const char *salt, int saltlen, int iterations,
+ uint8 *result, const char **errstr)
+{
+ int password_len = strlen(password);
+ uint32 one = pg_hton32(1);
+ int i,
+ j;
+ uint8 Ui[SCRAM_MAX_KEY_LEN];
+ uint8 Ui_prev[SCRAM_MAX_KEY_LEN];
+ pg_hmac_ctx *hmac_ctx = pg_hmac_create(hash_type);
+
+ if (hmac_ctx == NULL)
+ {
+ *errstr = pg_hmac_error(NULL); /* returns OOM */
+ return -1;
+ }
+
+ /*
+ * Iterate hash calculation of HMAC entry using given salt. This is
+ * essentially PBKDF2 (see RFC2898) with HMAC() as the pseudorandom
+ * function.
+ */
+
+ /* First iteration */
+ if (pg_hmac_init(hmac_ctx, (uint8 *) password, password_len) < 0 ||
+ pg_hmac_update(hmac_ctx, (uint8 *) salt, saltlen) < 0 ||
+ pg_hmac_update(hmac_ctx, (uint8 *) &one, sizeof(uint32)) < 0 ||
+ pg_hmac_final(hmac_ctx, Ui_prev, key_length) < 0)
+ {
+ *errstr = pg_hmac_error(hmac_ctx);
+ pg_hmac_free(hmac_ctx);
+ return -1;
+ }
+
+ memcpy(result, Ui_prev, key_length);
+
+ /* Subsequent iterations */
+ for (i = 2; i <= iterations; i++)
+ {
+#ifndef FRONTEND
+ /*
+ * Make sure that this is interruptible as scram_iterations could be
+ * set to a large value.
+ */
+ CHECK_FOR_INTERRUPTS();
+#endif
+
+ if (pg_hmac_init(hmac_ctx, (uint8 *) password, password_len) < 0 ||
+ pg_hmac_update(hmac_ctx, (uint8 *) Ui_prev, key_length) < 0 ||
+ pg_hmac_final(hmac_ctx, Ui, key_length) < 0)
+ {
+ *errstr = pg_hmac_error(hmac_ctx);
+ pg_hmac_free(hmac_ctx);
+ return -1;
+ }
+
+ for (j = 0; j < key_length; j++)
+ result[j] ^= Ui[j];
+ memcpy(Ui_prev, Ui, key_length);
+ }
+
+ pg_hmac_free(hmac_ctx);
+ return 0;
+}
+
+
+/*
+ * Calculate hash for a NULL-terminated string. (The NULL terminator is
+ * not included in the hash). Returns 0 on success, -1 on failure with *errstr
+ * pointing to a message about the error details.
+ */
+int
+scram_H(const uint8 *input, pg_cryptohash_type hash_type, int key_length,
+ uint8 *result, const char **errstr)
+{
+ pg_cryptohash_ctx *ctx;
+
+ ctx = pg_cryptohash_create(hash_type);
+ if (ctx == NULL)
+ {
+ *errstr = pg_cryptohash_error(NULL); /* returns OOM */
+ return -1;
+ }
+
+ if (pg_cryptohash_init(ctx) < 0 ||
+ pg_cryptohash_update(ctx, input, key_length) < 0 ||
+ pg_cryptohash_final(ctx, result, key_length) < 0)
+ {
+ *errstr = pg_cryptohash_error(ctx);
+ pg_cryptohash_free(ctx);
+ return -1;
+ }
+
+ pg_cryptohash_free(ctx);
+ return 0;
+}
+
+/*
+ * Calculate ClientKey. Returns 0 on success, -1 on failure with *errstr
+ * pointing to a message about the error details.
+ */
+int
+scram_ClientKey(const uint8 *salted_password,
+ pg_cryptohash_type hash_type, int key_length,
+ uint8 *result, const char **errstr)
+{
+ pg_hmac_ctx *ctx = pg_hmac_create(hash_type);
+
+ if (ctx == NULL)
+ {
+ *errstr = pg_hmac_error(NULL); /* returns OOM */
+ return -1;
+ }
+
+ if (pg_hmac_init(ctx, salted_password, key_length) < 0 ||
+ pg_hmac_update(ctx, (uint8 *) "Client Key", strlen("Client Key")) < 0 ||
+ pg_hmac_final(ctx, result, key_length) < 0)
+ {
+ *errstr = pg_hmac_error(ctx);
+ pg_hmac_free(ctx);
+ return -1;
+ }
+
+ pg_hmac_free(ctx);
+ return 0;
+}
+
+/*
+ * Calculate ServerKey. Returns 0 on success, -1 on failure with *errstr
+ * pointing to a message about the error details.
+ */
+int
+scram_ServerKey(const uint8 *salted_password,
+ pg_cryptohash_type hash_type, int key_length,
+ uint8 *result, const char **errstr)
+{
+ pg_hmac_ctx *ctx = pg_hmac_create(hash_type);
+
+ if (ctx == NULL)
+ {
+ *errstr = pg_hmac_error(NULL); /* returns OOM */
+ return -1;
+ }
+
+ if (pg_hmac_init(ctx, salted_password, key_length) < 0 ||
+ pg_hmac_update(ctx, (uint8 *) "Server Key", strlen("Server Key")) < 0 ||
+ pg_hmac_final(ctx, result, key_length) < 0)
+ {
+ *errstr = pg_hmac_error(ctx);
+ pg_hmac_free(ctx);
+ return -1;
+ }
+
+ pg_hmac_free(ctx);
+ return 0;
+}
+
+
+/*
+ * Construct a SCRAM secret, for storing in pg_authid.rolpassword.
+ *
+ * The password should already have been processed with SASLprep, if necessary!
+ *
+ * If iterations is 0, default number of iterations is used. The result is
+ * palloc'd or malloc'd, so caller is responsible for freeing it.
+ *
+ * On error, returns NULL and sets *errstr to point to a message about the
+ * error details.
+ */
+char *
+scram_build_secret(pg_cryptohash_type hash_type, int key_length,
+ const char *salt, int saltlen, int iterations,
+ const char *password, const char **errstr)
+{
+ uint8 salted_password[SCRAM_MAX_KEY_LEN];
+ uint8 stored_key[SCRAM_MAX_KEY_LEN];
+ uint8 server_key[SCRAM_MAX_KEY_LEN];
+ char *result;
+ char *p;
+ int maxlen;
+ int encoded_salt_len;
+ int encoded_stored_len;
+ int encoded_server_len;
+ int encoded_result;
+
+ /* Only this hash method is supported currently */
+ Assert(hash_type == PG_SHA256);
+
+ Assert(iterations > 0);
+
+ /* Calculate StoredKey and ServerKey */
+ if (scram_SaltedPassword(password, hash_type, key_length,
+ salt, saltlen, iterations,
+ salted_password, errstr) < 0 ||
+ scram_ClientKey(salted_password, hash_type, key_length,
+ stored_key, errstr) < 0 ||
+ scram_H(stored_key, hash_type, key_length,
+ stored_key, errstr) < 0 ||
+ scram_ServerKey(salted_password, hash_type, key_length,
+ server_key, errstr) < 0)
+ {
+ /* errstr is filled already here */
+#ifdef FRONTEND
+ return NULL;
+#else
+ elog(ERROR, "could not calculate stored key and server key: %s",
+ *errstr);
+#endif
+ }
+
+ /*----------
+ * The format is:
+ * SCRAM-SHA-256$<iteration count>:<salt>$<StoredKey>:<ServerKey>
+ *----------
+ */
+ encoded_salt_len = pg_b64_enc_len(saltlen);
+ encoded_stored_len = pg_b64_enc_len(key_length);
+ encoded_server_len = pg_b64_enc_len(key_length);
+
+ maxlen = strlen("SCRAM-SHA-256") + 1
+ + 10 + 1 /* iteration count */
+ + encoded_salt_len + 1 /* Base64-encoded salt */
+ + encoded_stored_len + 1 /* Base64-encoded StoredKey */
+ + encoded_server_len + 1; /* Base64-encoded ServerKey */
+
+#ifdef FRONTEND
+ result = malloc(maxlen);
+ if (!result)
+ {
+ *errstr = _("out of memory");
+ return NULL;
+ }
+#else
+ result = palloc(maxlen);
+#endif
+
+ p = result + sprintf(result, "SCRAM-SHA-256$%d:", iterations);
+
+ /* salt */
+ encoded_result = pg_b64_encode(salt, saltlen, p, encoded_salt_len);
+ if (encoded_result < 0)
+ {
+ *errstr = _("could not encode salt");
+#ifdef FRONTEND
+ free(result);
+ return NULL;
+#else
+ elog(ERROR, "%s", *errstr);
+#endif
+ }
+ p += encoded_result;
+ *(p++) = '$';
+
+ /* stored key */
+ encoded_result = pg_b64_encode((char *) stored_key, key_length, p,
+ encoded_stored_len);
+ if (encoded_result < 0)
+ {
+ *errstr = _("could not encode stored key");
+#ifdef FRONTEND
+ free(result);
+ return NULL;
+#else
+ elog(ERROR, "%s", *errstr);
+#endif
+ }
+
+ p += encoded_result;
+ *(p++) = ':';
+
+ /* server key */
+ encoded_result = pg_b64_encode((char *) server_key, key_length, p,
+ encoded_server_len);
+ if (encoded_result < 0)
+ {
+ *errstr = _("could not encode server key");
+#ifdef FRONTEND
+ free(result);
+ return NULL;
+#else
+ elog(ERROR, "%s", *errstr);
+#endif
+ }
+
+ p += encoded_result;
+ *(p++) = '\0';
+
+ Assert(p - result <= maxlen);
+
+ return result;
+}
diff --git a/src/common/sha1.c b/src/common/sha1.c
new file mode 100644
index 0000000..29dc55e
--- /dev/null
+++ b/src/common/sha1.c
@@ -0,0 +1,369 @@
+/*-------------------------------------------------------------------------
+ *
+ * sha1.c
+ * Implements the SHA1 Secure Hash Algorithm
+ *
+ * Fallback implementation of SHA1, as specified in RFC 3174.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/sha1.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* $KAME: sha1.c,v 1.3 2000/02/22 14:01:18 itojun Exp $ */
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * FIPS pub 180-1: Secure Hash Algorithm (SHA-1)
+ * based on: http://www.itl.nist.gov/fipspubs/fip180-1.htm
+ * implemented by Jun-ichiro itojun Itoh <itojun@itojun.org>
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <sys/param.h>
+
+#include "sha1_int.h"
+
+/* constant table */
+static uint32 _K[] = {0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6};
+
+#define K(t) _K[(t) / 20]
+
+#define F0(b, c, d) (((b) & (c)) | ((~(b)) & (d)))
+#define F1(b, c, d) (((b) ^ (c)) ^ (d))
+#define F2(b, c, d) (((b) & (c)) | ((b) & (d)) | ((c) & (d)))
+#define F3(b, c, d) (((b) ^ (c)) ^ (d))
+
+#define S(n, x) (((x) << (n)) | ((x) >> (32 - (n))))
+
+#define H(n) (ctx->h.b32[(n)])
+#define COUNT (ctx->count)
+#define BCOUNT (ctx->c.b64[0] / 8)
+#define W(n) (ctx->m.b32[(n)])
+
+#define PUTPAD(x) \
+do { \
+ ctx->m.b8[(COUNT % 64)] = (x); \
+ COUNT++; \
+ COUNT %= 64; \
+ if (COUNT % 64 == 0) \
+ sha1_step(ctx); \
+} while (0)
+
+static void
+sha1_step(pg_sha1_ctx *ctx)
+{
+ uint32 a,
+ b,
+ c,
+ d,
+ e;
+ size_t t,
+ s;
+ uint32 tmp;
+
+#ifndef WORDS_BIGENDIAN
+ pg_sha1_ctx tctx;
+
+ memmove(&tctx.m.b8[0], &ctx->m.b8[0], 64);
+ ctx->m.b8[0] = tctx.m.b8[3];
+ ctx->m.b8[1] = tctx.m.b8[2];
+ ctx->m.b8[2] = tctx.m.b8[1];
+ ctx->m.b8[3] = tctx.m.b8[0];
+ ctx->m.b8[4] = tctx.m.b8[7];
+ ctx->m.b8[5] = tctx.m.b8[6];
+ ctx->m.b8[6] = tctx.m.b8[5];
+ ctx->m.b8[7] = tctx.m.b8[4];
+ ctx->m.b8[8] = tctx.m.b8[11];
+ ctx->m.b8[9] = tctx.m.b8[10];
+ ctx->m.b8[10] = tctx.m.b8[9];
+ ctx->m.b8[11] = tctx.m.b8[8];
+ ctx->m.b8[12] = tctx.m.b8[15];
+ ctx->m.b8[13] = tctx.m.b8[14];
+ ctx->m.b8[14] = tctx.m.b8[13];
+ ctx->m.b8[15] = tctx.m.b8[12];
+ ctx->m.b8[16] = tctx.m.b8[19];
+ ctx->m.b8[17] = tctx.m.b8[18];
+ ctx->m.b8[18] = tctx.m.b8[17];
+ ctx->m.b8[19] = tctx.m.b8[16];
+ ctx->m.b8[20] = tctx.m.b8[23];
+ ctx->m.b8[21] = tctx.m.b8[22];
+ ctx->m.b8[22] = tctx.m.b8[21];
+ ctx->m.b8[23] = tctx.m.b8[20];
+ ctx->m.b8[24] = tctx.m.b8[27];
+ ctx->m.b8[25] = tctx.m.b8[26];
+ ctx->m.b8[26] = tctx.m.b8[25];
+ ctx->m.b8[27] = tctx.m.b8[24];
+ ctx->m.b8[28] = tctx.m.b8[31];
+ ctx->m.b8[29] = tctx.m.b8[30];
+ ctx->m.b8[30] = tctx.m.b8[29];
+ ctx->m.b8[31] = tctx.m.b8[28];
+ ctx->m.b8[32] = tctx.m.b8[35];
+ ctx->m.b8[33] = tctx.m.b8[34];
+ ctx->m.b8[34] = tctx.m.b8[33];
+ ctx->m.b8[35] = tctx.m.b8[32];
+ ctx->m.b8[36] = tctx.m.b8[39];
+ ctx->m.b8[37] = tctx.m.b8[38];
+ ctx->m.b8[38] = tctx.m.b8[37];
+ ctx->m.b8[39] = tctx.m.b8[36];
+ ctx->m.b8[40] = tctx.m.b8[43];
+ ctx->m.b8[41] = tctx.m.b8[42];
+ ctx->m.b8[42] = tctx.m.b8[41];
+ ctx->m.b8[43] = tctx.m.b8[40];
+ ctx->m.b8[44] = tctx.m.b8[47];
+ ctx->m.b8[45] = tctx.m.b8[46];
+ ctx->m.b8[46] = tctx.m.b8[45];
+ ctx->m.b8[47] = tctx.m.b8[44];
+ ctx->m.b8[48] = tctx.m.b8[51];
+ ctx->m.b8[49] = tctx.m.b8[50];
+ ctx->m.b8[50] = tctx.m.b8[49];
+ ctx->m.b8[51] = tctx.m.b8[48];
+ ctx->m.b8[52] = tctx.m.b8[55];
+ ctx->m.b8[53] = tctx.m.b8[54];
+ ctx->m.b8[54] = tctx.m.b8[53];
+ ctx->m.b8[55] = tctx.m.b8[52];
+ ctx->m.b8[56] = tctx.m.b8[59];
+ ctx->m.b8[57] = tctx.m.b8[58];
+ ctx->m.b8[58] = tctx.m.b8[57];
+ ctx->m.b8[59] = tctx.m.b8[56];
+ ctx->m.b8[60] = tctx.m.b8[63];
+ ctx->m.b8[61] = tctx.m.b8[62];
+ ctx->m.b8[62] = tctx.m.b8[61];
+ ctx->m.b8[63] = tctx.m.b8[60];
+#endif
+
+ a = H(0);
+ b = H(1);
+ c = H(2);
+ d = H(3);
+ e = H(4);
+
+ for (t = 0; t < 20; t++)
+ {
+ s = t & 0x0f;
+ if (t >= 16)
+ W(s) = S(1, W((s + 13) & 0x0f) ^ W((s + 8) & 0x0f) ^ W((s + 2) & 0x0f) ^ W(s));
+ tmp = S(5, a) + F0(b, c, d) + e + W(s) + K(t);
+ e = d;
+ d = c;
+ c = S(30, b);
+ b = a;
+ a = tmp;
+ }
+ for (t = 20; t < 40; t++)
+ {
+ s = t & 0x0f;
+ W(s) = S(1, W((s + 13) & 0x0f) ^ W((s + 8) & 0x0f) ^ W((s + 2) & 0x0f) ^ W(s));
+ tmp = S(5, a) + F1(b, c, d) + e + W(s) + K(t);
+ e = d;
+ d = c;
+ c = S(30, b);
+ b = a;
+ a = tmp;
+ }
+ for (t = 40; t < 60; t++)
+ {
+ s = t & 0x0f;
+ W(s) = S(1, W((s + 13) & 0x0f) ^ W((s + 8) & 0x0f) ^ W((s + 2) & 0x0f) ^ W(s));
+ tmp = S(5, a) + F2(b, c, d) + e + W(s) + K(t);
+ e = d;
+ d = c;
+ c = S(30, b);
+ b = a;
+ a = tmp;
+ }
+ for (t = 60; t < 80; t++)
+ {
+ s = t & 0x0f;
+ W(s) = S(1, W((s + 13) & 0x0f) ^ W((s + 8) & 0x0f) ^ W((s + 2) & 0x0f) ^ W(s));
+ tmp = S(5, a) + F3(b, c, d) + e + W(s) + K(t);
+ e = d;
+ d = c;
+ c = S(30, b);
+ b = a;
+ a = tmp;
+ }
+
+ H(0) = H(0) + a;
+ H(1) = H(1) + b;
+ H(2) = H(2) + c;
+ H(3) = H(3) + d;
+ H(4) = H(4) + e;
+
+ memset(&ctx->m.b8[0], 0, 64);
+}
+
+static void
+sha1_pad(pg_sha1_ctx *ctx)
+{
+ size_t padlen; /* pad length in bytes */
+ size_t padstart;
+
+ PUTPAD(0x80);
+
+ padstart = COUNT % 64;
+ padlen = 64 - padstart;
+ if (padlen < 8)
+ {
+ memset(&ctx->m.b8[padstart], 0, padlen);
+ COUNT += padlen;
+ COUNT %= 64;
+ sha1_step(ctx);
+ padstart = COUNT % 64; /* should be 0 */
+ padlen = 64 - padstart; /* should be 64 */
+ }
+ memset(&ctx->m.b8[padstart], 0, padlen - 8);
+ COUNT += (padlen - 8);
+ COUNT %= 64;
+#ifdef WORDS_BIGENDIAN
+ PUTPAD(ctx->c.b8[0]);
+ PUTPAD(ctx->c.b8[1]);
+ PUTPAD(ctx->c.b8[2]);
+ PUTPAD(ctx->c.b8[3]);
+ PUTPAD(ctx->c.b8[4]);
+ PUTPAD(ctx->c.b8[5]);
+ PUTPAD(ctx->c.b8[6]);
+ PUTPAD(ctx->c.b8[7]);
+#else
+ PUTPAD(ctx->c.b8[7]);
+ PUTPAD(ctx->c.b8[6]);
+ PUTPAD(ctx->c.b8[5]);
+ PUTPAD(ctx->c.b8[4]);
+ PUTPAD(ctx->c.b8[3]);
+ PUTPAD(ctx->c.b8[2]);
+ PUTPAD(ctx->c.b8[1]);
+ PUTPAD(ctx->c.b8[0]);
+#endif
+}
+
+static void
+sha1_result(uint8 *digest0, pg_sha1_ctx *ctx)
+{
+ uint8 *digest;
+
+ digest = (uint8 *) digest0;
+
+#ifdef WORDS_BIGENDIAN
+ memmove(digest, &ctx->h.b8[0], 20);
+#else
+ digest[0] = ctx->h.b8[3];
+ digest[1] = ctx->h.b8[2];
+ digest[2] = ctx->h.b8[1];
+ digest[3] = ctx->h.b8[0];
+ digest[4] = ctx->h.b8[7];
+ digest[5] = ctx->h.b8[6];
+ digest[6] = ctx->h.b8[5];
+ digest[7] = ctx->h.b8[4];
+ digest[8] = ctx->h.b8[11];
+ digest[9] = ctx->h.b8[10];
+ digest[10] = ctx->h.b8[9];
+ digest[11] = ctx->h.b8[8];
+ digest[12] = ctx->h.b8[15];
+ digest[13] = ctx->h.b8[14];
+ digest[14] = ctx->h.b8[13];
+ digest[15] = ctx->h.b8[12];
+ digest[16] = ctx->h.b8[19];
+ digest[17] = ctx->h.b8[18];
+ digest[18] = ctx->h.b8[17];
+ digest[19] = ctx->h.b8[16];
+#endif
+}
+
+/* External routines for this SHA1 implementation */
+
+/*
+ * pg_sha1_init
+ *
+ * Initialize a SHA1 context.
+ */
+void
+pg_sha1_init(pg_sha1_ctx *ctx)
+{
+ memset(ctx, 0, sizeof(pg_sha1_ctx));
+ H(0) = 0x67452301;
+ H(1) = 0xefcdab89;
+ H(2) = 0x98badcfe;
+ H(3) = 0x10325476;
+ H(4) = 0xc3d2e1f0;
+}
+
+/*
+ * pg_sha1_update
+ *
+ * Update a SHA1 context.
+ */
+void
+pg_sha1_update(pg_sha1_ctx *ctx, const uint8 *data, size_t len)
+{
+ const uint8 *input;
+ size_t gaplen;
+ size_t gapstart;
+ size_t off;
+ size_t copysiz;
+
+ input = (const uint8 *) data;
+ off = 0;
+
+ while (off < len)
+ {
+ gapstart = COUNT % 64;
+ gaplen = 64 - gapstart;
+
+ copysiz = (gaplen < len - off) ? gaplen : len - off;
+ memmove(&ctx->m.b8[gapstart], &input[off], copysiz);
+ COUNT += copysiz;
+ COUNT %= 64;
+ ctx->c.b64[0] += copysiz * 8;
+ if (COUNT % 64 == 0)
+ sha1_step(ctx);
+ off += copysiz;
+ }
+}
+
+/*
+ * pg_sha1_final
+ *
+ * Finalize a SHA1 context.
+ */
+void
+pg_sha1_final(pg_sha1_ctx *ctx, uint8 *dest)
+{
+ sha1_pad(ctx);
+ sha1_result(dest, ctx);
+}
diff --git a/src/common/sha1_int.h b/src/common/sha1_int.h
new file mode 100644
index 0000000..0ec2c69
--- /dev/null
+++ b/src/common/sha1_int.h
@@ -0,0 +1,81 @@
+/*-------------------------------------------------------------------------
+ *
+ * sha1_int.h
+ * Internal headers for fallback implementation of SHA1
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/sha1_int.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* $KAME: sha1.h,v 1.4 2000/02/22 14:01:18 itojun Exp $ */
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * FIPS pub 180-1: Secure Hash Algorithm (SHA-1)
+ * based on: http://www.itl.nist.gov/fipspubs/fip180-1.htm
+ * implemented by Jun-ichiro itojun Itoh <itojun@itojun.org>
+ */
+
+#ifndef PG_SHA1_INT_H
+#define PG_SHA1_INT_H
+
+#include "common/sha1.h"
+
+typedef struct
+{
+ union
+ {
+ uint8 b8[20];
+ uint32 b32[5];
+ } h;
+ union
+ {
+ uint8 b8[8];
+ uint64 b64[1];
+ } c;
+ union
+ {
+ uint8 b8[64];
+ uint32 b32[16];
+ } m;
+ uint8 count;
+} pg_sha1_ctx;
+
+/* Interface routines for SHA1 */
+extern void pg_sha1_init(pg_sha1_ctx *ctx);
+extern void pg_sha1_update(pg_sha1_ctx *ctx, const uint8 *data, size_t len);
+extern void pg_sha1_final(pg_sha1_ctx *ctx, uint8 *dest);
+
+#endif /* PG_SHA1_INT_H */
diff --git a/src/common/sha2.c b/src/common/sha2.c
new file mode 100644
index 0000000..89b28b8
--- /dev/null
+++ b/src/common/sha2.c
@@ -0,0 +1,1017 @@
+/*-------------------------------------------------------------------------
+ *
+ * sha2.c
+ * SHA functions for SHA-224, SHA-256, SHA-384 and SHA-512.
+ *
+ * This includes the fallback implementation for SHA2 cryptographic
+ * hashes.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/sha2.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* $OpenBSD: sha2.c,v 1.6 2004/05/03 02:57:36 millert Exp $ */
+/*
+ * FILE: sha2.c
+ * AUTHOR: Aaron D. Gifford <me@aarongifford.com>
+ *
+ * Copyright (c) 2000-2001, Aaron D. Gifford
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the copyright holder nor the names of contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTOR(S) ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTOR(S) BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $From: sha2.c,v 1.1 2001/11/08 00:01:51 adg Exp adg $
+ */
+
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "sha2_int.h"
+
+/*
+ * In backend, use palloc/pfree to ease the error handling. In frontend,
+ * use malloc to be able to return a failure status back to the caller.
+ */
+#ifndef FRONTEND
+#define ALLOC(size) palloc(size)
+#define FREE(ptr) pfree(ptr)
+#else
+#define ALLOC(size) malloc(size)
+#define FREE(ptr) free(ptr)
+#endif
+
+/*
+ * UNROLLED TRANSFORM LOOP NOTE:
+ * You can define SHA2_UNROLL_TRANSFORM to use the unrolled transform
+ * loop version for the hash transform rounds (defined using macros
+ * later in this file). Either define on the command line, for example:
+ *
+ * cc -DSHA2_UNROLL_TRANSFORM -o sha2 sha2.c sha2prog.c
+ *
+ * or define below:
+ *
+ * #define SHA2_UNROLL_TRANSFORM
+ *
+ */
+
+/*** SHA-256/384/512 Various Length Definitions ***********************/
+#define PG_SHA256_SHORT_BLOCK_LENGTH (PG_SHA256_BLOCK_LENGTH - 8)
+#define PG_SHA384_SHORT_BLOCK_LENGTH (PG_SHA384_BLOCK_LENGTH - 16)
+#define PG_SHA512_SHORT_BLOCK_LENGTH (PG_SHA512_BLOCK_LENGTH - 16)
+
+/*** ENDIAN REVERSAL MACROS *******************************************/
+#ifndef WORDS_BIGENDIAN
+#define REVERSE32(w,x) { \
+ uint32 tmp = (w); \
+ tmp = (tmp >> 16) | (tmp << 16); \
+ (x) = ((tmp & 0xff00ff00UL) >> 8) | ((tmp & 0x00ff00ffUL) << 8); \
+}
+#define REVERSE64(w,x) { \
+ uint64 tmp = (w); \
+ tmp = (tmp >> 32) | (tmp << 32); \
+ tmp = ((tmp & 0xff00ff00ff00ff00ULL) >> 8) | \
+ ((tmp & 0x00ff00ff00ff00ffULL) << 8); \
+ (x) = ((tmp & 0xffff0000ffff0000ULL) >> 16) | \
+ ((tmp & 0x0000ffff0000ffffULL) << 16); \
+}
+#endif /* not bigendian */
+
+/*
+ * Macro for incrementally adding the unsigned 64-bit integer n to the
+ * unsigned 128-bit integer (represented using a two-element array of
+ * 64-bit words):
+ */
+#define ADDINC128(w,n) { \
+ (w)[0] += (uint64)(n); \
+ if ((w)[0] < (n)) { \
+ (w)[1]++; \
+ } \
+}
+
+/*** THE SIX LOGICAL FUNCTIONS ****************************************/
+/*
+ * Bit shifting and rotation (used by the six SHA-XYZ logical functions:
+ *
+ * NOTE: The naming of R and S appears backwards here (R is a SHIFT and
+ * S is a ROTATION) because the SHA-256/384/512 description document
+ * (see http://www.iwar.org.uk/comsec/resources/cipher/sha256-384-512.pdf)
+ * uses this same "backwards" definition.
+ */
+/* Shift-right (used in SHA-256, SHA-384, and SHA-512): */
+#define R(b,x) ((x) >> (b))
+/* 32-bit Rotate-right (used in SHA-256): */
+#define S32(b,x) (((x) >> (b)) | ((x) << (32 - (b))))
+/* 64-bit Rotate-right (used in SHA-384 and SHA-512): */
+#define S64(b,x) (((x) >> (b)) | ((x) << (64 - (b))))
+
+/* Two of six logical functions used in SHA-256, SHA-384, and SHA-512: */
+#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
+#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+
+/* Four of six logical functions used in SHA-256: */
+#define Sigma0_256(x) (S32(2, (x)) ^ S32(13, (x)) ^ S32(22, (x)))
+#define Sigma1_256(x) (S32(6, (x)) ^ S32(11, (x)) ^ S32(25, (x)))
+#define sigma0_256(x) (S32(7, (x)) ^ S32(18, (x)) ^ R(3 , (x)))
+#define sigma1_256(x) (S32(17, (x)) ^ S32(19, (x)) ^ R(10, (x)))
+
+/* Four of six logical functions used in SHA-384 and SHA-512: */
+#define Sigma0_512(x) (S64(28, (x)) ^ S64(34, (x)) ^ S64(39, (x)))
+#define Sigma1_512(x) (S64(14, (x)) ^ S64(18, (x)) ^ S64(41, (x)))
+#define sigma0_512(x) (S64( 1, (x)) ^ S64( 8, (x)) ^ R( 7, (x)))
+#define sigma1_512(x) (S64(19, (x)) ^ S64(61, (x)) ^ R( 6, (x)))
+
+/*** INTERNAL FUNCTION PROTOTYPES *************************************/
+/* NOTE: These should not be accessed directly from outside this
+ * library -- they are intended for private internal visibility/use
+ * only.
+ */
+static void SHA512_Last(pg_sha512_ctx *context);
+static void SHA256_Transform(pg_sha256_ctx *context, const uint8 *data);
+static void SHA512_Transform(pg_sha512_ctx *context, const uint8 *data);
+
+/*** SHA-XYZ INITIAL HASH VALUES AND CONSTANTS ************************/
+/* Hash constant words K for SHA-256: */
+static const uint32 K256[64] = {
+ 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
+ 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
+ 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
+ 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
+ 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
+ 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
+ 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL,
+ 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
+ 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL,
+ 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
+ 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL,
+ 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
+ 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL,
+ 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
+ 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
+ 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
+};
+
+/* Initial hash value H for SHA-224: */
+static const uint32 sha224_initial_hash_value[8] = {
+ 0xc1059ed8UL,
+ 0x367cd507UL,
+ 0x3070dd17UL,
+ 0xf70e5939UL,
+ 0xffc00b31UL,
+ 0x68581511UL,
+ 0x64f98fa7UL,
+ 0xbefa4fa4UL
+};
+
+/* Initial hash value H for SHA-256: */
+static const uint32 sha256_initial_hash_value[8] = {
+ 0x6a09e667UL,
+ 0xbb67ae85UL,
+ 0x3c6ef372UL,
+ 0xa54ff53aUL,
+ 0x510e527fUL,
+ 0x9b05688cUL,
+ 0x1f83d9abUL,
+ 0x5be0cd19UL
+};
+
+/* Hash constant words K for SHA-384 and SHA-512: */
+static const uint64 K512[80] = {
+ 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
+ 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
+ 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+ 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
+ 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
+ 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+ 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
+ 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
+ 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+ 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
+ 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
+ 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+ 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
+ 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
+ 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+ 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
+ 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
+ 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+ 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
+ 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
+ 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+ 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
+ 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
+ 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+ 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
+ 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
+ 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+ 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
+ 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
+ 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+ 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
+ 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
+ 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+ 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
+ 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
+ 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+ 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
+ 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
+ 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+ 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
+};
+
+/* Initial hash value H for SHA-384 */
+static const uint64 sha384_initial_hash_value[8] = {
+ 0xcbbb9d5dc1059ed8ULL,
+ 0x629a292a367cd507ULL,
+ 0x9159015a3070dd17ULL,
+ 0x152fecd8f70e5939ULL,
+ 0x67332667ffc00b31ULL,
+ 0x8eb44a8768581511ULL,
+ 0xdb0c2e0d64f98fa7ULL,
+ 0x47b5481dbefa4fa4ULL
+};
+
+/* Initial hash value H for SHA-512 */
+static const uint64 sha512_initial_hash_value[8] = {
+ 0x6a09e667f3bcc908ULL,
+ 0xbb67ae8584caa73bULL,
+ 0x3c6ef372fe94f82bULL,
+ 0xa54ff53a5f1d36f1ULL,
+ 0x510e527fade682d1ULL,
+ 0x9b05688c2b3e6c1fULL,
+ 0x1f83d9abfb41bd6bULL,
+ 0x5be0cd19137e2179ULL
+};
+
+
+/*** SHA-256: *********************************************************/
+void
+pg_sha256_init(pg_sha256_ctx *context)
+{
+ if (context == NULL)
+ return;
+ memcpy(context->state, sha256_initial_hash_value, PG_SHA256_DIGEST_LENGTH);
+ memset(context->buffer, 0, PG_SHA256_BLOCK_LENGTH);
+ context->bitcount = 0;
+}
+
+#ifdef SHA2_UNROLL_TRANSFORM
+
+/* Unrolled SHA-256 round macros: */
+
+#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h) do { \
+ W256[j] = (uint32)data[3] | ((uint32)data[2] << 8) | \
+ ((uint32)data[1] << 16) | ((uint32)data[0] << 24); \
+ data += 4; \
+ T1 = (h) + Sigma1_256((e)) + Ch((e), (f), (g)) + K256[j] + W256[j]; \
+ (d) += T1; \
+ (h) = T1 + Sigma0_256((a)) + Maj((a), (b), (c)); \
+ j++; \
+} while(0)
+
+#define ROUND256(a,b,c,d,e,f,g,h) do { \
+ s0 = W256[(j+1)&0x0f]; \
+ s0 = sigma0_256(s0); \
+ s1 = W256[(j+14)&0x0f]; \
+ s1 = sigma1_256(s1); \
+ T1 = (h) + Sigma1_256((e)) + Ch((e), (f), (g)) + K256[j] + \
+ (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); \
+ (d) += T1; \
+ (h) = T1 + Sigma0_256((a)) + Maj((a), (b), (c)); \
+ j++; \
+} while(0)
+
+static void
+SHA256_Transform(pg_sha256_ctx *context, const uint8 *data)
+{
+ uint32 a,
+ b,
+ c,
+ d,
+ e,
+ f,
+ g,
+ h,
+ s0,
+ s1;
+ uint32 T1,
+ *W256;
+ int j;
+
+ W256 = (uint32 *) context->buffer;
+
+ /* Initialize registers with the prev. intermediate value */
+ a = context->state[0];
+ b = context->state[1];
+ c = context->state[2];
+ d = context->state[3];
+ e = context->state[4];
+ f = context->state[5];
+ g = context->state[6];
+ h = context->state[7];
+
+ j = 0;
+ do
+ {
+ /* Rounds 0 to 15 (unrolled): */
+ ROUND256_0_TO_15(a, b, c, d, e, f, g, h);
+ ROUND256_0_TO_15(h, a, b, c, d, e, f, g);
+ ROUND256_0_TO_15(g, h, a, b, c, d, e, f);
+ ROUND256_0_TO_15(f, g, h, a, b, c, d, e);
+ ROUND256_0_TO_15(e, f, g, h, a, b, c, d);
+ ROUND256_0_TO_15(d, e, f, g, h, a, b, c);
+ ROUND256_0_TO_15(c, d, e, f, g, h, a, b);
+ ROUND256_0_TO_15(b, c, d, e, f, g, h, a);
+ } while (j < 16);
+
+ /* Now for the remaining rounds to 64: */
+ do
+ {
+ ROUND256(a, b, c, d, e, f, g, h);
+ ROUND256(h, a, b, c, d, e, f, g);
+ ROUND256(g, h, a, b, c, d, e, f);
+ ROUND256(f, g, h, a, b, c, d, e);
+ ROUND256(e, f, g, h, a, b, c, d);
+ ROUND256(d, e, f, g, h, a, b, c);
+ ROUND256(c, d, e, f, g, h, a, b);
+ ROUND256(b, c, d, e, f, g, h, a);
+ } while (j < 64);
+
+ /* Compute the current intermediate hash value */
+ context->state[0] += a;
+ context->state[1] += b;
+ context->state[2] += c;
+ context->state[3] += d;
+ context->state[4] += e;
+ context->state[5] += f;
+ context->state[6] += g;
+ context->state[7] += h;
+
+ /* Clean up */
+ a = b = c = d = e = f = g = h = T1 = 0;
+}
+#else /* SHA2_UNROLL_TRANSFORM */
+
+static void
+SHA256_Transform(pg_sha256_ctx *context, const uint8 *data)
+{
+ uint32 a,
+ b,
+ c,
+ d,
+ e,
+ f,
+ g,
+ h,
+ s0,
+ s1;
+ uint32 T1,
+ T2,
+ *W256;
+ int j;
+
+ W256 = (uint32 *) context->buffer;
+
+ /* Initialize registers with the prev. intermediate value */
+ a = context->state[0];
+ b = context->state[1];
+ c = context->state[2];
+ d = context->state[3];
+ e = context->state[4];
+ f = context->state[5];
+ g = context->state[6];
+ h = context->state[7];
+
+ j = 0;
+ do
+ {
+ W256[j] = (uint32) data[3] | ((uint32) data[2] << 8) |
+ ((uint32) data[1] << 16) | ((uint32) data[0] << 24);
+ data += 4;
+ /* Apply the SHA-256 compression function to update a..h */
+ T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + W256[j];
+ T2 = Sigma0_256(a) + Maj(a, b, c);
+ h = g;
+ g = f;
+ f = e;
+ e = d + T1;
+ d = c;
+ c = b;
+ b = a;
+ a = T1 + T2;
+
+ j++;
+ } while (j < 16);
+
+ do
+ {
+ /* Part of the message block expansion: */
+ s0 = W256[(j + 1) & 0x0f];
+ s0 = sigma0_256(s0);
+ s1 = W256[(j + 14) & 0x0f];
+ s1 = sigma1_256(s1);
+
+ /* Apply the SHA-256 compression function to update a..h */
+ T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] +
+ (W256[j & 0x0f] += s1 + W256[(j + 9) & 0x0f] + s0);
+ T2 = Sigma0_256(a) + Maj(a, b, c);
+ h = g;
+ g = f;
+ f = e;
+ e = d + T1;
+ d = c;
+ c = b;
+ b = a;
+ a = T1 + T2;
+
+ j++;
+ } while (j < 64);
+
+ /* Compute the current intermediate hash value */
+ context->state[0] += a;
+ context->state[1] += b;
+ context->state[2] += c;
+ context->state[3] += d;
+ context->state[4] += e;
+ context->state[5] += f;
+ context->state[6] += g;
+ context->state[7] += h;
+
+ /* Clean up */
+ a = b = c = d = e = f = g = h = T1 = T2 = 0;
+}
+#endif /* SHA2_UNROLL_TRANSFORM */
+
+void
+pg_sha256_update(pg_sha256_ctx *context, const uint8 *data, size_t len)
+{
+ size_t freespace,
+ usedspace;
+
+ /* Calling with no data is valid (we do nothing) */
+ if (len == 0)
+ return;
+
+ usedspace = (context->bitcount >> 3) % PG_SHA256_BLOCK_LENGTH;
+ if (usedspace > 0)
+ {
+ /* Calculate how much free space is available in the buffer */
+ freespace = PG_SHA256_BLOCK_LENGTH - usedspace;
+
+ if (len >= freespace)
+ {
+ /* Fill the buffer completely and process it */
+ memcpy(&context->buffer[usedspace], data, freespace);
+ context->bitcount += freespace << 3;
+ len -= freespace;
+ data += freespace;
+ SHA256_Transform(context, context->buffer);
+ }
+ else
+ {
+ /* The buffer is not yet full */
+ memcpy(&context->buffer[usedspace], data, len);
+ context->bitcount += len << 3;
+ /* Clean up: */
+ usedspace = freespace = 0;
+ return;
+ }
+ }
+ while (len >= PG_SHA256_BLOCK_LENGTH)
+ {
+ /* Process as many complete blocks as we can */
+ SHA256_Transform(context, data);
+ context->bitcount += PG_SHA256_BLOCK_LENGTH << 3;
+ len -= PG_SHA256_BLOCK_LENGTH;
+ data += PG_SHA256_BLOCK_LENGTH;
+ }
+ if (len > 0)
+ {
+ /* There's left-overs, so save 'em */
+ memcpy(context->buffer, data, len);
+ context->bitcount += len << 3;
+ }
+ /* Clean up: */
+ usedspace = freespace = 0;
+}
+
+static void
+SHA256_Last(pg_sha256_ctx *context)
+{
+ unsigned int usedspace;
+
+ usedspace = (context->bitcount >> 3) % PG_SHA256_BLOCK_LENGTH;
+#ifndef WORDS_BIGENDIAN
+ /* Convert FROM host byte order */
+ REVERSE64(context->bitcount, context->bitcount);
+#endif
+ if (usedspace > 0)
+ {
+ /* Begin padding with a 1 bit: */
+ context->buffer[usedspace++] = 0x80;
+
+ if (usedspace <= PG_SHA256_SHORT_BLOCK_LENGTH)
+ {
+ /* Set-up for the last transform: */
+ memset(&context->buffer[usedspace], 0, PG_SHA256_SHORT_BLOCK_LENGTH - usedspace);
+ }
+ else
+ {
+ if (usedspace < PG_SHA256_BLOCK_LENGTH)
+ {
+ memset(&context->buffer[usedspace], 0, PG_SHA256_BLOCK_LENGTH - usedspace);
+ }
+ /* Do second-to-last transform: */
+ SHA256_Transform(context, context->buffer);
+
+ /* And set-up for the last transform: */
+ memset(context->buffer, 0, PG_SHA256_SHORT_BLOCK_LENGTH);
+ }
+ }
+ else
+ {
+ /* Set-up for the last transform: */
+ memset(context->buffer, 0, PG_SHA256_SHORT_BLOCK_LENGTH);
+
+ /* Begin padding with a 1 bit: */
+ *context->buffer = 0x80;
+ }
+ /* Set the bit count: */
+ *(uint64 *) &context->buffer[PG_SHA256_SHORT_BLOCK_LENGTH] = context->bitcount;
+
+ /* Final transform: */
+ SHA256_Transform(context, context->buffer);
+}
+
+void
+pg_sha256_final(pg_sha256_ctx *context, uint8 *digest)
+{
+ /* If no digest buffer is passed, we don't bother doing this: */
+ if (digest != NULL)
+ {
+ SHA256_Last(context);
+
+#ifndef WORDS_BIGENDIAN
+ {
+ /* Convert TO host byte order */
+ int j;
+
+ for (j = 0; j < 8; j++)
+ {
+ REVERSE32(context->state[j], context->state[j]);
+ }
+ }
+#endif
+ memcpy(digest, context->state, PG_SHA256_DIGEST_LENGTH);
+ }
+
+ /* Clean up state data: */
+ memset(context, 0, sizeof(pg_sha256_ctx));
+}
+
+
+/*** SHA-512: *********************************************************/
+void
+pg_sha512_init(pg_sha512_ctx *context)
+{
+ if (context == NULL)
+ return;
+ memcpy(context->state, sha512_initial_hash_value, PG_SHA512_DIGEST_LENGTH);
+ memset(context->buffer, 0, PG_SHA512_BLOCK_LENGTH);
+ context->bitcount[0] = context->bitcount[1] = 0;
+}
+
+#ifdef SHA2_UNROLL_TRANSFORM
+
+/* Unrolled SHA-512 round macros: */
+
+#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h) do { \
+ W512[j] = (uint64)data[7] | ((uint64)data[6] << 8) | \
+ ((uint64)data[5] << 16) | ((uint64)data[4] << 24) | \
+ ((uint64)data[3] << 32) | ((uint64)data[2] << 40) | \
+ ((uint64)data[1] << 48) | ((uint64)data[0] << 56); \
+ data += 8; \
+ T1 = (h) + Sigma1_512((e)) + Ch((e), (f), (g)) + K512[j] + W512[j]; \
+ (d) += T1; \
+ (h) = T1 + Sigma0_512((a)) + Maj((a), (b), (c)); \
+ j++; \
+} while(0)
+
+
+#define ROUND512(a,b,c,d,e,f,g,h) do { \
+ s0 = W512[(j+1)&0x0f]; \
+ s0 = sigma0_512(s0); \
+ s1 = W512[(j+14)&0x0f]; \
+ s1 = sigma1_512(s1); \
+ T1 = (h) + Sigma1_512((e)) + Ch((e), (f), (g)) + K512[j] + \
+ (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0); \
+ (d) += T1; \
+ (h) = T1 + Sigma0_512((a)) + Maj((a), (b), (c)); \
+ j++; \
+} while(0)
+
+static void
+SHA512_Transform(pg_sha512_ctx *context, const uint8 *data)
+{
+ uint64 a,
+ b,
+ c,
+ d,
+ e,
+ f,
+ g,
+ h,
+ s0,
+ s1;
+ uint64 T1,
+ *W512 = (uint64 *) context->buffer;
+ int j;
+
+ /* Initialize registers with the prev. intermediate value */
+ a = context->state[0];
+ b = context->state[1];
+ c = context->state[2];
+ d = context->state[3];
+ e = context->state[4];
+ f = context->state[5];
+ g = context->state[6];
+ h = context->state[7];
+
+ j = 0;
+ do
+ {
+ ROUND512_0_TO_15(a, b, c, d, e, f, g, h);
+ ROUND512_0_TO_15(h, a, b, c, d, e, f, g);
+ ROUND512_0_TO_15(g, h, a, b, c, d, e, f);
+ ROUND512_0_TO_15(f, g, h, a, b, c, d, e);
+ ROUND512_0_TO_15(e, f, g, h, a, b, c, d);
+ ROUND512_0_TO_15(d, e, f, g, h, a, b, c);
+ ROUND512_0_TO_15(c, d, e, f, g, h, a, b);
+ ROUND512_0_TO_15(b, c, d, e, f, g, h, a);
+ } while (j < 16);
+
+ /* Now for the remaining rounds up to 79: */
+ do
+ {
+ ROUND512(a, b, c, d, e, f, g, h);
+ ROUND512(h, a, b, c, d, e, f, g);
+ ROUND512(g, h, a, b, c, d, e, f);
+ ROUND512(f, g, h, a, b, c, d, e);
+ ROUND512(e, f, g, h, a, b, c, d);
+ ROUND512(d, e, f, g, h, a, b, c);
+ ROUND512(c, d, e, f, g, h, a, b);
+ ROUND512(b, c, d, e, f, g, h, a);
+ } while (j < 80);
+
+ /* Compute the current intermediate hash value */
+ context->state[0] += a;
+ context->state[1] += b;
+ context->state[2] += c;
+ context->state[3] += d;
+ context->state[4] += e;
+ context->state[5] += f;
+ context->state[6] += g;
+ context->state[7] += h;
+
+ /* Clean up */
+ a = b = c = d = e = f = g = h = T1 = 0;
+}
+#else /* SHA2_UNROLL_TRANSFORM */
+
+static void
+SHA512_Transform(pg_sha512_ctx *context, const uint8 *data)
+{
+ uint64 a,
+ b,
+ c,
+ d,
+ e,
+ f,
+ g,
+ h,
+ s0,
+ s1;
+ uint64 T1,
+ T2,
+ *W512 = (uint64 *) context->buffer;
+ int j;
+
+ /* Initialize registers with the prev. intermediate value */
+ a = context->state[0];
+ b = context->state[1];
+ c = context->state[2];
+ d = context->state[3];
+ e = context->state[4];
+ f = context->state[5];
+ g = context->state[6];
+ h = context->state[7];
+
+ j = 0;
+ do
+ {
+ W512[j] = (uint64) data[7] | ((uint64) data[6] << 8) |
+ ((uint64) data[5] << 16) | ((uint64) data[4] << 24) |
+ ((uint64) data[3] << 32) | ((uint64) data[2] << 40) |
+ ((uint64) data[1] << 48) | ((uint64) data[0] << 56);
+ data += 8;
+ /* Apply the SHA-512 compression function to update a..h */
+ T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + W512[j];
+ T2 = Sigma0_512(a) + Maj(a, b, c);
+ h = g;
+ g = f;
+ f = e;
+ e = d + T1;
+ d = c;
+ c = b;
+ b = a;
+ a = T1 + T2;
+
+ j++;
+ } while (j < 16);
+
+ do
+ {
+ /* Part of the message block expansion: */
+ s0 = W512[(j + 1) & 0x0f];
+ s0 = sigma0_512(s0);
+ s1 = W512[(j + 14) & 0x0f];
+ s1 = sigma1_512(s1);
+
+ /* Apply the SHA-512 compression function to update a..h */
+ T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] +
+ (W512[j & 0x0f] += s1 + W512[(j + 9) & 0x0f] + s0);
+ T2 = Sigma0_512(a) + Maj(a, b, c);
+ h = g;
+ g = f;
+ f = e;
+ e = d + T1;
+ d = c;
+ c = b;
+ b = a;
+ a = T1 + T2;
+
+ j++;
+ } while (j < 80);
+
+ /* Compute the current intermediate hash value */
+ context->state[0] += a;
+ context->state[1] += b;
+ context->state[2] += c;
+ context->state[3] += d;
+ context->state[4] += e;
+ context->state[5] += f;
+ context->state[6] += g;
+ context->state[7] += h;
+
+ /* Clean up */
+ a = b = c = d = e = f = g = h = T1 = T2 = 0;
+}
+#endif /* SHA2_UNROLL_TRANSFORM */
+
+void
+pg_sha512_update(pg_sha512_ctx *context, const uint8 *data, size_t len)
+{
+ size_t freespace,
+ usedspace;
+
+ /* Calling with no data is valid (we do nothing) */
+ if (len == 0)
+ return;
+
+ usedspace = (context->bitcount[0] >> 3) % PG_SHA512_BLOCK_LENGTH;
+ if (usedspace > 0)
+ {
+ /* Calculate how much free space is available in the buffer */
+ freespace = PG_SHA512_BLOCK_LENGTH - usedspace;
+
+ if (len >= freespace)
+ {
+ /* Fill the buffer completely and process it */
+ memcpy(&context->buffer[usedspace], data, freespace);
+ ADDINC128(context->bitcount, freespace << 3);
+ len -= freespace;
+ data += freespace;
+ SHA512_Transform(context, context->buffer);
+ }
+ else
+ {
+ /* The buffer is not yet full */
+ memcpy(&context->buffer[usedspace], data, len);
+ ADDINC128(context->bitcount, len << 3);
+ /* Clean up: */
+ usedspace = freespace = 0;
+ return;
+ }
+ }
+ while (len >= PG_SHA512_BLOCK_LENGTH)
+ {
+ /* Process as many complete blocks as we can */
+ SHA512_Transform(context, data);
+ ADDINC128(context->bitcount, PG_SHA512_BLOCK_LENGTH << 3);
+ len -= PG_SHA512_BLOCK_LENGTH;
+ data += PG_SHA512_BLOCK_LENGTH;
+ }
+ if (len > 0)
+ {
+ /* There's left-overs, so save 'em */
+ memcpy(context->buffer, data, len);
+ ADDINC128(context->bitcount, len << 3);
+ }
+ /* Clean up: */
+ usedspace = freespace = 0;
+}
+
+static void
+SHA512_Last(pg_sha512_ctx *context)
+{
+ unsigned int usedspace;
+
+ usedspace = (context->bitcount[0] >> 3) % PG_SHA512_BLOCK_LENGTH;
+#ifndef WORDS_BIGENDIAN
+ /* Convert FROM host byte order */
+ REVERSE64(context->bitcount[0], context->bitcount[0]);
+ REVERSE64(context->bitcount[1], context->bitcount[1]);
+#endif
+ if (usedspace > 0)
+ {
+ /* Begin padding with a 1 bit: */
+ context->buffer[usedspace++] = 0x80;
+
+ if (usedspace <= PG_SHA512_SHORT_BLOCK_LENGTH)
+ {
+ /* Set-up for the last transform: */
+ memset(&context->buffer[usedspace], 0, PG_SHA512_SHORT_BLOCK_LENGTH - usedspace);
+ }
+ else
+ {
+ if (usedspace < PG_SHA512_BLOCK_LENGTH)
+ {
+ memset(&context->buffer[usedspace], 0, PG_SHA512_BLOCK_LENGTH - usedspace);
+ }
+ /* Do second-to-last transform: */
+ SHA512_Transform(context, context->buffer);
+
+ /* And set-up for the last transform: */
+ memset(context->buffer, 0, PG_SHA512_BLOCK_LENGTH - 2);
+ }
+ }
+ else
+ {
+ /* Prepare for final transform: */
+ memset(context->buffer, 0, PG_SHA512_SHORT_BLOCK_LENGTH);
+
+ /* Begin padding with a 1 bit: */
+ *context->buffer = 0x80;
+ }
+ /* Store the length of input data (in bits): */
+ *(uint64 *) &context->buffer[PG_SHA512_SHORT_BLOCK_LENGTH] = context->bitcount[1];
+ *(uint64 *) &context->buffer[PG_SHA512_SHORT_BLOCK_LENGTH + 8] = context->bitcount[0];
+
+ /* Final transform: */
+ SHA512_Transform(context, context->buffer);
+}
+
+void
+pg_sha512_final(pg_sha512_ctx *context, uint8 *digest)
+{
+ /* If no digest buffer is passed, we don't bother doing this: */
+ if (digest != NULL)
+ {
+ SHA512_Last(context);
+
+ /* Save the hash data for output: */
+#ifndef WORDS_BIGENDIAN
+ {
+ /* Convert TO host byte order */
+ int j;
+
+ for (j = 0; j < 8; j++)
+ {
+ REVERSE64(context->state[j], context->state[j]);
+ }
+ }
+#endif
+ memcpy(digest, context->state, PG_SHA512_DIGEST_LENGTH);
+ }
+
+ /* Zero out state data */
+ memset(context, 0, sizeof(pg_sha512_ctx));
+}
+
+
+/*** SHA-384: *********************************************************/
+void
+pg_sha384_init(pg_sha384_ctx *context)
+{
+ if (context == NULL)
+ return;
+ memcpy(context->state, sha384_initial_hash_value, PG_SHA512_DIGEST_LENGTH);
+ memset(context->buffer, 0, PG_SHA384_BLOCK_LENGTH);
+ context->bitcount[0] = context->bitcount[1] = 0;
+}
+
+void
+pg_sha384_update(pg_sha384_ctx *context, const uint8 *data, size_t len)
+{
+ pg_sha512_update((pg_sha512_ctx *) context, data, len);
+}
+
+void
+pg_sha384_final(pg_sha384_ctx *context, uint8 *digest)
+{
+ /* If no digest buffer is passed, we don't bother doing this: */
+ if (digest != NULL)
+ {
+ SHA512_Last((pg_sha512_ctx *) context);
+
+ /* Save the hash data for output: */
+#ifndef WORDS_BIGENDIAN
+ {
+ /* Convert TO host byte order */
+ int j;
+
+ for (j = 0; j < 6; j++)
+ {
+ REVERSE64(context->state[j], context->state[j]);
+ }
+ }
+#endif
+ memcpy(digest, context->state, PG_SHA384_DIGEST_LENGTH);
+ }
+
+ /* Zero out state data */
+ memset(context, 0, sizeof(pg_sha384_ctx));
+}
+
+/*** SHA-224: *********************************************************/
+void
+pg_sha224_init(pg_sha224_ctx *context)
+{
+ if (context == NULL)
+ return;
+ memcpy(context->state, sha224_initial_hash_value, PG_SHA256_DIGEST_LENGTH);
+ memset(context->buffer, 0, PG_SHA256_BLOCK_LENGTH);
+ context->bitcount = 0;
+}
+
+void
+pg_sha224_update(pg_sha224_ctx *context, const uint8 *data, size_t len)
+{
+ pg_sha256_update((pg_sha256_ctx *) context, data, len);
+}
+
+void
+pg_sha224_final(pg_sha224_ctx *context, uint8 *digest)
+{
+ /* If no digest buffer is passed, we don't bother doing this: */
+ if (digest != NULL)
+ {
+ SHA256_Last(context);
+
+#ifndef WORDS_BIGENDIAN
+ {
+ /* Convert TO host byte order */
+ int j;
+
+ for (j = 0; j < 8; j++)
+ {
+ REVERSE32(context->state[j], context->state[j]);
+ }
+ }
+#endif
+ memcpy(digest, context->state, PG_SHA224_DIGEST_LENGTH);
+ }
+
+ /* Clean up state data: */
+ memset(context, 0, sizeof(pg_sha224_ctx));
+}
diff --git a/src/common/sha2_int.h b/src/common/sha2_int.h
new file mode 100644
index 0000000..954e5d7
--- /dev/null
+++ b/src/common/sha2_int.h
@@ -0,0 +1,91 @@
+/*-------------------------------------------------------------------------
+ *
+ * sha2_int.h
+ * Internal headers for fallback implementation of SHA{224,256,384,512}
+ *
+ * Portions Copyright (c) 2016-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/sha2_int.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* $OpenBSD: sha2.h,v 1.2 2004/04/28 23:11:57 millert Exp $ */
+
+/*
+ * FILE: sha2.h
+ * AUTHOR: Aaron D. Gifford <me@aarongifford.com>
+ *
+ * Copyright (c) 2000-2001, Aaron D. Gifford
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the copyright holder nor the names of contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTOR(S) ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTOR(S) BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $From: sha2.h,v 1.1 2001/11/08 00:02:01 adg Exp adg $
+ */
+
+#ifndef PG_SHA2_INT_H
+#define PG_SHA2_INT_H
+
+#include "common/sha2.h"
+
+typedef struct pg_sha256_ctx
+{
+ uint32 state[8];
+ uint64 bitcount;
+ uint8 buffer[PG_SHA256_BLOCK_LENGTH];
+} pg_sha256_ctx;
+typedef struct pg_sha512_ctx
+{
+ uint64 state[8];
+ uint64 bitcount[2];
+ uint8 buffer[PG_SHA512_BLOCK_LENGTH];
+} pg_sha512_ctx;
+typedef struct pg_sha256_ctx pg_sha224_ctx;
+typedef struct pg_sha512_ctx pg_sha384_ctx;
+
+/* Interface routines for SHA224/256/384/512 */
+extern void pg_sha224_init(pg_sha224_ctx *ctx);
+extern void pg_sha224_update(pg_sha224_ctx *ctx, const uint8 *input0,
+ size_t len);
+extern void pg_sha224_final(pg_sha224_ctx *ctx, uint8 *dest);
+
+extern void pg_sha256_init(pg_sha256_ctx *ctx);
+extern void pg_sha256_update(pg_sha256_ctx *ctx, const uint8 *input0,
+ size_t len);
+extern void pg_sha256_final(pg_sha256_ctx *ctx, uint8 *dest);
+
+extern void pg_sha384_init(pg_sha384_ctx *ctx);
+extern void pg_sha384_update(pg_sha384_ctx *ctx,
+ const uint8 *, size_t len);
+extern void pg_sha384_final(pg_sha384_ctx *ctx, uint8 *dest);
+
+extern void pg_sha512_init(pg_sha512_ctx *ctx);
+extern void pg_sha512_update(pg_sha512_ctx *ctx, const uint8 *input0,
+ size_t len);
+extern void pg_sha512_final(pg_sha512_ctx *ctx, uint8 *dest);
+
+#endif /* PG_SHA2_INT_H */
diff --git a/src/common/sprompt.c b/src/common/sprompt.c
new file mode 100644
index 0000000..201c831
--- /dev/null
+++ b/src/common/sprompt.c
@@ -0,0 +1,181 @@
+/*-------------------------------------------------------------------------
+ *
+ * sprompt.c
+ * simple_prompt() routine
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/sprompt.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "common/fe_memutils.h"
+#include "common/string.h"
+
+#ifdef HAVE_TERMIOS_H
+#include <termios.h>
+#endif
+
+
+/*
+ * simple_prompt
+ *
+ * Generalized function especially intended for reading in usernames and
+ * passwords interactively. Reads from /dev/tty or stdin/stderr.
+ *
+ * prompt: The prompt to print, or NULL if none (automatically localized)
+ * echo: Set to false if you want to hide what is entered (for passwords)
+ *
+ * The input (without trailing newline) is returned as a malloc'd string.
+ * Caller is responsible for freeing it when done.
+ */
+char *
+simple_prompt(const char *prompt, bool echo)
+{
+ return simple_prompt_extended(prompt, echo, NULL);
+}
+
+/*
+ * simple_prompt_extended
+ *
+ * This is the same as simple_prompt(), except that prompt_ctx can
+ * optionally be provided to allow this function to be canceled via an
+ * existing SIGINT signal handler that will longjmp to the specified place
+ * only when *(prompt_ctx->enabled) is true. If canceled, this function
+ * returns an empty string, and prompt_ctx->canceled is set to true.
+ */
+char *
+simple_prompt_extended(const char *prompt, bool echo,
+ PromptInterruptContext *prompt_ctx)
+{
+ char *result;
+ FILE *termin,
+ *termout;
+#if defined(HAVE_TERMIOS_H)
+ struct termios t_orig,
+ t;
+#elif defined(WIN32)
+ HANDLE t = NULL;
+ DWORD t_orig = 0;
+#endif
+
+#ifdef WIN32
+
+ /*
+ * A Windows console has an "input code page" and an "output code page";
+ * these usually match each other, but they rarely match the "Windows ANSI
+ * code page" defined at system boot and expected of "char *" arguments to
+ * Windows API functions. The Microsoft CRT write() implementation
+ * automatically converts text between these code pages when writing to a
+ * console. To identify such file descriptors, it calls GetConsoleMode()
+ * on the underlying HANDLE, which in turn requires GENERIC_READ access on
+ * the HANDLE. Opening termout in mode "w+" allows that detection to
+ * succeed. Otherwise, write() would not recognize the descriptor as a
+ * console, and non-ASCII characters would display incorrectly.
+ *
+ * XXX fgets() still receives text in the console's input code page. This
+ * makes non-ASCII credentials unportable.
+ *
+ * Unintuitively, we also open termin in mode "w+", even though we only
+ * read it; that's needed for SetConsoleMode() to succeed.
+ */
+ termin = fopen("CONIN$", "w+");
+ termout = fopen("CONOUT$", "w+");
+#else
+
+ /*
+ * Do not try to collapse these into one "w+" mode file. Doesn't work on
+ * some platforms (eg, HPUX 10.20).
+ */
+ termin = fopen("/dev/tty", "r");
+ termout = fopen("/dev/tty", "w");
+#endif
+ if (!termin || !termout
+#ifdef WIN32
+
+ /*
+ * Direct console I/O does not work from the MSYS 1.0.10 console. Writes
+ * reach nowhere user-visible; reads block indefinitely. XXX This affects
+ * most Windows terminal environments, including rxvt, mintty, Cygwin
+ * xterm, Cygwin sshd, and PowerShell ISE. Switch to a more-generic test.
+ */
+ || (getenv("OSTYPE") && strcmp(getenv("OSTYPE"), "msys") == 0)
+#endif
+ )
+ {
+ if (termin)
+ fclose(termin);
+ if (termout)
+ fclose(termout);
+ termin = stdin;
+ termout = stderr;
+ }
+
+ if (!echo)
+ {
+#if defined(HAVE_TERMIOS_H)
+ /* disable echo via tcgetattr/tcsetattr */
+ tcgetattr(fileno(termin), &t);
+ t_orig = t;
+ t.c_lflag &= ~ECHO;
+ tcsetattr(fileno(termin), TCSAFLUSH, &t);
+#elif defined(WIN32)
+ /* need the file's HANDLE to turn echo off */
+ t = (HANDLE) _get_osfhandle(_fileno(termin));
+
+ /* save the old configuration first */
+ GetConsoleMode(t, &t_orig);
+
+ /* set to the new mode */
+ SetConsoleMode(t, ENABLE_LINE_INPUT | ENABLE_PROCESSED_INPUT);
+#endif
+ }
+
+ if (prompt)
+ {
+ fputs(_(prompt), termout);
+ fflush(termout);
+ }
+
+ result = pg_get_line(termin, prompt_ctx);
+
+ /* If we failed to read anything, just return an empty string */
+ if (result == NULL)
+ result = pg_strdup("");
+
+ /* strip trailing newline, including \r in case we're on Windows */
+ (void) pg_strip_crlf(result);
+
+ if (!echo)
+ {
+ /* restore previous echo behavior, then echo \n */
+#if defined(HAVE_TERMIOS_H)
+ tcsetattr(fileno(termin), TCSAFLUSH, &t_orig);
+ fputs("\n", termout);
+ fflush(termout);
+#elif defined(WIN32)
+ SetConsoleMode(t, t_orig);
+ fputs("\n", termout);
+ fflush(termout);
+#endif
+ }
+ else if (prompt_ctx && prompt_ctx->canceled)
+ {
+ /* also echo \n if prompt was canceled */
+ fputs("\n", termout);
+ fflush(termout);
+ }
+
+ if (termin != stdin)
+ {
+ fclose(termin);
+ fclose(termout);
+ }
+
+ return result;
+}
diff --git a/src/common/string.c b/src/common/string.c
new file mode 100644
index 0000000..de97413
--- /dev/null
+++ b/src/common/string.c
@@ -0,0 +1,164 @@
+/*-------------------------------------------------------------------------
+ *
+ * string.c
+ * string handling helpers
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/string.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/string.h"
+#include "lib/stringinfo.h"
+
+
+/*
+ * Returns whether the string `str' has the postfix `end'.
+ */
+bool
+pg_str_endswith(const char *str, const char *end)
+{
+ size_t slen = strlen(str);
+ size_t elen = strlen(end);
+
+ /* can't be a postfix if longer */
+ if (elen > slen)
+ return false;
+
+ /* compare the end of the strings */
+ str += slen - elen;
+ return strcmp(str, end) == 0;
+}
+
+
+/*
+ * strtoint --- just like strtol, but returns int not long
+ */
+int
+strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base)
+{
+ long val;
+
+ val = strtol(str, endptr, base);
+ if (val != (int) val)
+ errno = ERANGE;
+ return (int) val;
+}
+
+
+/*
+ * pg_clean_ascii -- Replace any non-ASCII chars with a "\xXX" string
+ *
+ * Makes a newly allocated copy of the string passed in, which must be
+ * '\0'-terminated. In the backend, additional alloc_flags may be provided and
+ * will be passed as-is to palloc_extended(); in the frontend, alloc_flags is
+ * ignored and the copy is malloc'd.
+ *
+ * This function exists specifically to deal with filtering out
+ * non-ASCII characters in a few places where the client can provide an almost
+ * arbitrary string (and it isn't checked to ensure it's a valid username or
+ * database name or similar) and we don't want to have control characters or other
+ * things ending up in the log file where server admins might end up with a
+ * messed up terminal when looking at them.
+ *
+ * In general, this function should NOT be used- instead, consider how to handle
+ * the string without needing to filter out the non-ASCII characters.
+ *
+ * Ultimately, we'd like to improve the situation to not require replacing all
+ * non-ASCII but perform more intelligent filtering which would allow UTF or
+ * similar, but it's unclear exactly what we should allow, so stick to ASCII only
+ * for now.
+ */
+char *
+pg_clean_ascii(const char *str, int alloc_flags)
+{
+ size_t dstlen;
+ char *dst;
+ const char *p;
+ size_t i = 0;
+
+ /* Worst case, each byte can become four bytes, plus a null terminator. */
+ dstlen = strlen(str) * 4 + 1;
+
+#ifdef FRONTEND
+ dst = malloc(dstlen);
+#else
+ dst = palloc_extended(dstlen, alloc_flags);
+#endif
+
+ if (!dst)
+ return NULL;
+
+ for (p = str; *p != '\0'; p++)
+ {
+
+ /* Only allow clean ASCII chars in the string */
+ if (*p < 32 || *p > 126)
+ {
+ Assert(i < (dstlen - 3));
+ snprintf(&dst[i], dstlen - i, "\\x%02x", (unsigned char) *p);
+ i += 4;
+ }
+ else
+ {
+ Assert(i < dstlen);
+ dst[i] = *p;
+ i++;
+ }
+ }
+
+ Assert(i < dstlen);
+ dst[i] = '\0';
+ return dst;
+}
+
+
+/*
+ * pg_is_ascii -- Check if string is made only of ASCII characters
+ */
+bool
+pg_is_ascii(const char *str)
+{
+ while (*str)
+ {
+ if (IS_HIGHBIT_SET(*str))
+ return false;
+ str++;
+ }
+ return true;
+}
+
+
+/*
+ * pg_strip_crlf -- Remove any trailing newline and carriage return
+ *
+ * Removes any trailing newline and carriage return characters (\r on
+ * Windows) in the input string, zero-terminating it.
+ *
+ * The passed in string must be zero-terminated. This function returns
+ * the new length of the string.
+ */
+int
+pg_strip_crlf(char *str)
+{
+ int len = strlen(str);
+
+ while (len > 0 && (str[len - 1] == '\n' ||
+ str[len - 1] == '\r'))
+ str[--len] = '\0';
+
+ return len;
+}
diff --git a/src/common/stringinfo.c b/src/common/stringinfo.c
new file mode 100644
index 0000000..05b22b5
--- /dev/null
+++ b/src/common/stringinfo.c
@@ -0,0 +1,343 @@
+/*-------------------------------------------------------------------------
+ *
+ * stringinfo.c
+ *
+ * StringInfo provides an extensible string data type (currently limited to a
+ * length of 1GB). It can be used to buffer either ordinary C strings
+ * (null-terminated text) or arbitrary binary data. All storage is allocated
+ * with palloc() (falling back to malloc in frontend code).
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/common/stringinfo.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+
+#include "postgres.h"
+#include "utils/memutils.h"
+
+#else
+
+#include "postgres_fe.h"
+
+/* It's possible we could use a different value for this in frontend code */
+#define MaxAllocSize ((Size) 0x3fffffff) /* 1 gigabyte - 1 */
+
+#endif
+
+#include "lib/stringinfo.h"
+
+
+/*
+ * makeStringInfo
+ *
+ * Create an empty 'StringInfoData' & return a pointer to it.
+ */
+StringInfo
+makeStringInfo(void)
+{
+ StringInfo res;
+
+ res = (StringInfo) palloc(sizeof(StringInfoData));
+
+ initStringInfo(res);
+
+ return res;
+}
+
+/*
+ * initStringInfo
+ *
+ * Initialize a StringInfoData struct (with previously undefined contents)
+ * to describe an empty string.
+ */
+void
+initStringInfo(StringInfo str)
+{
+ int size = 1024; /* initial default buffer size */
+
+ str->data = (char *) palloc(size);
+ str->maxlen = size;
+ resetStringInfo(str);
+}
+
+/*
+ * resetStringInfo
+ *
+ * Reset the StringInfo: the data buffer remains valid, but its
+ * previous content, if any, is cleared.
+ */
+void
+resetStringInfo(StringInfo str)
+{
+ str->data[0] = '\0';
+ str->len = 0;
+ str->cursor = 0;
+}
+
+/*
+ * appendStringInfo
+ *
+ * Format text data under the control of fmt (an sprintf-style format string)
+ * and append it to whatever is already in str. More space is allocated
+ * to str if necessary. This is sort of like a combination of sprintf and
+ * strcat.
+ */
+void
+appendStringInfo(StringInfo str, const char *fmt,...)
+{
+ int save_errno = errno;
+
+ for (;;)
+ {
+ va_list args;
+ int needed;
+
+ /* Try to format the data. */
+ errno = save_errno;
+ va_start(args, fmt);
+ needed = appendStringInfoVA(str, fmt, args);
+ va_end(args);
+
+ if (needed == 0)
+ break; /* success */
+
+ /* Increase the buffer size and try again. */
+ enlargeStringInfo(str, needed);
+ }
+}
+
+/*
+ * appendStringInfoVA
+ *
+ * Attempt to format text data under the control of fmt (an sprintf-style
+ * format string) and append it to whatever is already in str. If successful
+ * return zero; if not (because there's not enough space), return an estimate
+ * of the space needed, without modifying str. Typically the caller should
+ * pass the return value to enlargeStringInfo() before trying again; see
+ * appendStringInfo for standard usage pattern.
+ *
+ * Caution: callers must be sure to preserve their entry-time errno
+ * when looping, in case the fmt contains "%m".
+ *
+ * XXX This API is ugly, but there seems no alternative given the C spec's
+ * restrictions on what can portably be done with va_list arguments: you have
+ * to redo va_start before you can rescan the argument list, and we can't do
+ * that from here.
+ */
+int
+appendStringInfoVA(StringInfo str, const char *fmt, va_list args)
+{
+ int avail;
+ size_t nprinted;
+
+ Assert(str != NULL);
+
+ /*
+ * If there's hardly any space, don't bother trying, just fail to make the
+ * caller enlarge the buffer first. We have to guess at how much to
+ * enlarge, since we're skipping the formatting work.
+ */
+ avail = str->maxlen - str->len;
+ if (avail < 16)
+ return 32;
+
+ nprinted = pvsnprintf(str->data + str->len, (size_t) avail, fmt, args);
+
+ if (nprinted < (size_t) avail)
+ {
+ /* Success. Note nprinted does not include trailing null. */
+ str->len += (int) nprinted;
+ return 0;
+ }
+
+ /* Restore the trailing null so that str is unmodified. */
+ str->data[str->len] = '\0';
+
+ /*
+ * Return pvsnprintf's estimate of the space needed. (Although this is
+ * given as a size_t, we know it will fit in int because it's not more
+ * than MaxAllocSize.)
+ */
+ return (int) nprinted;
+}
+
+/*
+ * appendStringInfoString
+ *
+ * Append a null-terminated string to str.
+ * Like appendStringInfo(str, "%s", s) but faster.
+ */
+void
+appendStringInfoString(StringInfo str, const char *s)
+{
+ appendBinaryStringInfo(str, s, strlen(s));
+}
+
+/*
+ * appendStringInfoChar
+ *
+ * Append a single byte to str.
+ * Like appendStringInfo(str, "%c", ch) but much faster.
+ */
+void
+appendStringInfoChar(StringInfo str, char ch)
+{
+ /* Make more room if needed */
+ if (str->len + 1 >= str->maxlen)
+ enlargeStringInfo(str, 1);
+
+ /* OK, append the character */
+ str->data[str->len] = ch;
+ str->len++;
+ str->data[str->len] = '\0';
+}
+
+/*
+ * appendStringInfoSpaces
+ *
+ * Append the specified number of spaces to a buffer.
+ */
+void
+appendStringInfoSpaces(StringInfo str, int count)
+{
+ if (count > 0)
+ {
+ /* Make more room if needed */
+ enlargeStringInfo(str, count);
+
+ /* OK, append the spaces */
+ memset(&str->data[str->len], ' ', count);
+ str->len += count;
+ str->data[str->len] = '\0';
+ }
+}
+
+/*
+ * appendBinaryStringInfo
+ *
+ * Append arbitrary binary data to a StringInfo, allocating more space
+ * if necessary. Ensures that a trailing null byte is present.
+ */
+void
+appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
+{
+ Assert(str != NULL);
+
+ /* Make more room if needed */
+ enlargeStringInfo(str, datalen);
+
+ /* OK, append the data */
+ memcpy(str->data + str->len, data, datalen);
+ str->len += datalen;
+
+ /*
+ * Keep a trailing null in place, even though it's probably useless for
+ * binary data. (Some callers are dealing with text but call this because
+ * their input isn't null-terminated.)
+ */
+ str->data[str->len] = '\0';
+}
+
+/*
+ * appendBinaryStringInfoNT
+ *
+ * Append arbitrary binary data to a StringInfo, allocating more space
+ * if necessary. Does not ensure a trailing null-byte exists.
+ */
+void
+appendBinaryStringInfoNT(StringInfo str, const void *data, int datalen)
+{
+ Assert(str != NULL);
+
+ /* Make more room if needed */
+ enlargeStringInfo(str, datalen);
+
+ /* OK, append the data */
+ memcpy(str->data + str->len, data, datalen);
+ str->len += datalen;
+}
+
+/*
+ * enlargeStringInfo
+ *
+ * Make sure there is enough space for 'needed' more bytes
+ * ('needed' does not include the terminating null).
+ *
+ * External callers usually need not concern themselves with this, since
+ * all stringinfo.c routines do it automatically. However, if a caller
+ * knows that a StringInfo will eventually become X bytes large, it
+ * can save some palloc overhead by enlarging the buffer before starting
+ * to store data in it.
+ *
+ * NB: In the backend, because we use repalloc() to enlarge the buffer, the
+ * string buffer will remain allocated in the same memory context that was
+ * current when initStringInfo was called, even if another context is now
+ * current. This is the desired and indeed critical behavior!
+ */
+void
+enlargeStringInfo(StringInfo str, int needed)
+{
+ int newlen;
+
+ /*
+ * Guard against out-of-range "needed" values. Without this, we can get
+ * an overflow or infinite loop in the following.
+ */
+ if (needed < 0) /* should not happen */
+ {
+#ifndef FRONTEND
+ elog(ERROR, "invalid string enlargement request size: %d", needed);
+#else
+ fprintf(stderr, "invalid string enlargement request size: %d\n", needed);
+ exit(EXIT_FAILURE);
+#endif
+ }
+ if (((Size) needed) >= (MaxAllocSize - (Size) str->len))
+ {
+#ifndef FRONTEND
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("out of memory"),
+ errdetail("Cannot enlarge string buffer containing %d bytes by %d more bytes.",
+ str->len, needed)));
+#else
+ fprintf(stderr,
+ _("out of memory\n\nCannot enlarge string buffer containing %d bytes by %d more bytes.\n"),
+ str->len, needed);
+ exit(EXIT_FAILURE);
+#endif
+ }
+
+ needed += str->len + 1; /* total space required now */
+
+ /* Because of the above test, we now have needed <= MaxAllocSize */
+
+ if (needed <= str->maxlen)
+ return; /* got enough space already */
+
+ /*
+ * We don't want to allocate just a little more space with each append;
+ * for efficiency, double the buffer size each time it overflows.
+ * Actually, we might need to more than double it if 'needed' is big...
+ */
+ newlen = 2 * str->maxlen;
+ while (needed > newlen)
+ newlen = 2 * newlen;
+
+ /*
+ * Clamp to MaxAllocSize in case we went past it. Note we are assuming
+ * here that MaxAllocSize <= INT_MAX/2, else the above loop could
+ * overflow. We will still have newlen >= needed.
+ */
+ if (newlen > (int) MaxAllocSize)
+ newlen = (int) MaxAllocSize;
+
+ str->data = (char *) repalloc(str->data, newlen);
+
+ str->maxlen = newlen;
+}
diff --git a/src/common/unicode/.gitignore b/src/common/unicode/.gitignore
new file mode 100644
index 0000000..46243f7
--- /dev/null
+++ b/src/common/unicode/.gitignore
@@ -0,0 +1,9 @@
+/norm_test
+/norm_test_table.h
+
+# Downloaded files
+/CompositionExclusions.txt
+/DerivedNormalizationProps.txt
+/EastAsianWidth.txt
+/NormalizationTest.txt
+/UnicodeData.txt
diff --git a/src/common/unicode/Makefile b/src/common/unicode/Makefile
new file mode 100644
index 0000000..382da47
--- /dev/null
+++ b/src/common/unicode/Makefile
@@ -0,0 +1,72 @@
+#-------------------------------------------------------------------------
+#
+# Makefile
+# Makefile for src/common/unicode
+#
+# IDENTIFICATION
+# src/common/unicode/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/common/unicode
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+override CPPFLAGS := -DFRONTEND -I. $(CPPFLAGS)
+LIBS += $(PTHREAD_LIBS)
+
+# By default, do nothing.
+all:
+
+update-unicode: unicode_norm_table.h unicode_nonspacing_table.h unicode_east_asian_fw_table.h unicode_normprops_table.h unicode_norm_hashfunc.h
+ mv $^ $(top_srcdir)/src/include/common/
+ $(MAKE) normalization-check
+
+# These files are part of the Unicode Character Database. Download
+# them on demand. The dependency on Makefile.global is for
+# UNICODE_VERSION.
+UnicodeData.txt EastAsianWidth.txt DerivedNormalizationProps.txt CompositionExclusions.txt NormalizationTest.txt: $(top_builddir)/src/Makefile.global
+ $(DOWNLOAD) https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/$(@F)
+
+# Generation of conversion tables used for string normalization with
+# UTF-8 strings.
+unicode_norm_hashfunc.h: unicode_norm_table.h
+
+unicode_norm_table.h: generate-unicode_norm_table.pl UnicodeData.txt CompositionExclusions.txt
+ $(PERL) $<
+
+unicode_nonspacing_table.h: generate-unicode_nonspacing_table.pl UnicodeData.txt
+ $(PERL) $^ >$@
+
+unicode_east_asian_fw_table.h: generate-unicode_east_asian_fw_table.pl EastAsianWidth.txt
+ $(PERL) $^ >$@
+
+unicode_normprops_table.h: generate-unicode_normprops_table.pl DerivedNormalizationProps.txt
+ $(PERL) $^ >$@
+
+# Test suite
+normalization-check: norm_test
+ ./norm_test
+
+norm_test: norm_test.o ../unicode_norm.o | submake-common
+
+norm_test.o: norm_test_table.h
+
+.PHONY: submake-common
+
+submake-common:
+ $(MAKE) -C .. all
+
+norm_test_table.h: generate-norm_test_table.pl NormalizationTest.txt
+ perl $^ $@
+
+.PHONY: normalization-check
+
+
+clean:
+ rm -f $(OBJS) norm_test norm_test.o
+
+distclean: clean
+ rm -f UnicodeData.txt EastAsianWidth.txt CompositionExclusions.txt NormalizationTest.txt norm_test_table.h unicode_norm_table.h
+
+maintainer-clean: distclean
diff --git a/src/common/unicode/README b/src/common/unicode/README
new file mode 100644
index 0000000..56956f6
--- /dev/null
+++ b/src/common/unicode/README
@@ -0,0 +1,28 @@
+This directory contains tools to generate the tables in
+src/include/common/unicode_norm.h, used for Unicode normalization. The
+generated .h file is included in the source tree, so these are normally not
+needed to build PostgreSQL, only if you need to re-generate the .h file
+from the Unicode data files for some reason, e.g. to update to a new version
+of Unicode.
+
+Generating unicode_norm_table.h
+-------------------------------
+
+Run
+
+ make update-unicode
+
+from the top level of the source tree and commit the result.
+
+Tests
+-----
+
+The Unicode consortium publishes a comprehensive test suite for the
+normalization algorithm, in a file called NormalizationTest.txt. This
+directory also contains a perl script and some C code, to run our
+normalization code with all the test strings in NormalizationTest.txt.
+To download NormalizationTest.txt and run the tests:
+
+ make normalization-check
+
+This is also run as part of the update-unicode target.
diff --git a/src/common/unicode/generate-norm_test_table.pl b/src/common/unicode/generate-norm_test_table.pl
new file mode 100644
index 0000000..3434f7e
--- /dev/null
+++ b/src/common/unicode/generate-norm_test_table.pl
@@ -0,0 +1,106 @@
+#!/usr/bin/perl
+#
+# Read Unicode consortium's normalization test suite, NormalizationTest.txt,
+# and generate a C array from it, for norm_test.c.
+#
+# NormalizationTest.txt is part of the Unicode Character Database.
+#
+# Copyright (c) 2000-2023, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+use File::Basename;
+
+die "Usage: $0 INPUT_FILE OUTPUT_FILE\n" if @ARGV != 2;
+my $input_file = $ARGV[0];
+my $output_file = $ARGV[1];
+my $output_base = basename($output_file);
+
+# Open the input and output files
+open my $INPUT, '<', $input_file
+ or die "Could not open input file $input_file: $!";
+open my $OUTPUT, '>', $output_file
+ or die "Could not open output file $output_file: $!\n";
+
+# Print header of output file.
+print $OUTPUT <<HEADER;
+/*-------------------------------------------------------------------------
+ *
+ * norm_test_table.h
+ * Test strings for Unicode normalization.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/common/unicode/norm_test_table.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * File auto-generated by src/common/unicode/generate-norm_test_table.pl, do
+ * not edit. There is deliberately not an #ifndef PG_NORM_TEST_TABLE_H
+ * here.
+ */
+
+typedef struct
+{
+ int linenum;
+ pg_wchar input[50];
+ pg_wchar output[4][50];
+} pg_unicode_test;
+
+/* test table */
+HEADER
+print $OUTPUT
+ "static const pg_unicode_test UnicodeNormalizationTests[] =\n{\n";
+
+# Helper routine to convert a space-separated list of Unicode characters to
+# hexadecimal list format, suitable for outputting in a C array.
+sub codepoint_string_to_hex
+{
+ my $codepoint_string = shift;
+
+ my $result;
+
+ foreach (split(' ', $codepoint_string))
+ {
+ my $cp = $_;
+ my $utf8 = "0x$cp, ";
+ $result .= $utf8;
+ }
+ $result .= '0'; # null-terminated the array
+ return $result;
+}
+
+# Process the input file line by line
+my $linenum = 0;
+while (my $line = <$INPUT>)
+{
+ $linenum = $linenum + 1;
+ if ($line =~ /^\s*#/) { next; } # ignore comments
+
+ if ($line =~ /^@/) { next; } # ignore @Part0 like headers
+
+ # Split the line wanted and get the fields needed:
+ #
+ # source; NFC; NFD; NFKC; NFKD
+ my ($source, $nfc, $nfd, $nfkc, $nfkd) = split(';', $line);
+
+ my $source_utf8 = codepoint_string_to_hex($source);
+ my $nfc_utf8 = codepoint_string_to_hex($nfc);
+ my $nfd_utf8 = codepoint_string_to_hex($nfd);
+ my $nfkc_utf8 = codepoint_string_to_hex($nfkc);
+ my $nfkd_utf8 = codepoint_string_to_hex($nfkd);
+
+ print $OUTPUT
+ "\t{ $linenum, { $source_utf8 }, { { $nfc_utf8 }, { $nfd_utf8 }, { $nfkc_utf8 }, { $nfkd_utf8 } } },\n";
+}
+
+# Output terminator entry
+print $OUTPUT "\t{ 0, { 0 }, { { 0 }, { 0 }, { 0 }, { 0 } } }";
+print $OUTPUT "\n};\n";
+
+close $OUTPUT;
+close $INPUT;
diff --git a/src/common/unicode/generate-unicode_east_asian_fw_table.pl b/src/common/unicode/generate-unicode_east_asian_fw_table.pl
new file mode 100644
index 0000000..2b2df37
--- /dev/null
+++ b/src/common/unicode/generate-unicode_east_asian_fw_table.pl
@@ -0,0 +1,76 @@
+#!/usr/bin/perl
+#
+# Generate a sorted list of non-overlapping intervals of East Asian Wide (W)
+# and East Asian Fullwidth (F) characters, using Unicode data files as input.
+# Pass EastAsianWidth.txt as argument. The output is on stdout.
+#
+# Copyright (c) 2019-2023, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+my $range_start = undef;
+my ($first, $last);
+my $prev_last;
+
+print
+ "/* generated by src/common/unicode/generate-unicode_east_asian_fw_table.pl, do not edit */\n\n";
+
+print "static const struct mbinterval east_asian_fw[] = {\n";
+
+foreach my $line (<ARGV>)
+{
+ chomp $line;
+ $line =~ s/\s*#.*$//;
+ next if $line eq '';
+ my ($codepoint, $width) = split ';', $line;
+
+ if ($codepoint =~ /\.\./)
+ {
+ ($first, $last) = split /\.\./, $codepoint;
+ }
+ else
+ {
+ $first = $last = $codepoint;
+ }
+
+ ($first, $last) = map(hex, ($first, $last));
+
+ if ($width eq 'F' || $width eq 'W')
+ {
+ # fullwidth/wide characters
+ if (!defined($range_start))
+ {
+ # save for start of range if one hasn't been started yet
+ $range_start = $first;
+ }
+ elsif ($first != $prev_last + 1)
+ {
+ # ranges aren't contiguous; emit the last and start a new one
+ printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_last;
+ $range_start = $first;
+ }
+ }
+ else
+ {
+ # not wide characters, print out previous range if any
+ if (defined($range_start))
+ {
+ printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_last;
+ $range_start = undef;
+ }
+ }
+}
+continue
+{
+ $prev_last = $last;
+}
+
+# don't forget any ranges at the very end of the database (though there are none
+# as of Unicode 13.0)
+if (defined($range_start))
+{
+ printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_last;
+}
+
+print "};\n";
diff --git a/src/common/unicode/generate-unicode_nonspacing_table.pl b/src/common/unicode/generate-unicode_nonspacing_table.pl
new file mode 100644
index 0000000..ae86e82
--- /dev/null
+++ b/src/common/unicode/generate-unicode_nonspacing_table.pl
@@ -0,0 +1,53 @@
+#!/usr/bin/perl
+#
+# Generate sorted list of non-overlapping intervals of non-spacing
+# characters, using Unicode data files as input. Pass UnicodeData.txt
+# as argument. The output is on stdout.
+#
+# Copyright (c) 2019-2023, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+my $range_start = undef;
+my $codepoint;
+my $prev_codepoint;
+my $count = 0;
+
+print
+ "/* generated by src/common/unicode/generate-unicode_nonspacing_table.pl, do not edit */\n\n";
+
+print "static const struct mbinterval nonspacing[] = {\n";
+
+foreach my $line (<ARGV>)
+{
+ chomp $line;
+ my @fields = split ';', $line;
+ $codepoint = hex $fields[0];
+
+ # Me and Mn refer to combining characters
+ # Cf refers to format characters
+ if ($fields[2] eq 'Me' || $fields[2] eq 'Mn' || $fields[2] eq 'Cf')
+ {
+ # non-spacing character, save for start of range
+ if (!defined($range_start))
+ {
+ $range_start = $codepoint;
+ }
+ }
+ else
+ {
+ # not a non-spacing character, print out previous range if any
+ if (defined($range_start))
+ {
+ printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint;
+ $range_start = undef;
+ }
+ }
+}
+continue
+{
+ $prev_codepoint = $codepoint;
+}
+
+print "};\n";
diff --git a/src/common/unicode/generate-unicode_norm_table.pl b/src/common/unicode/generate-unicode_norm_table.pl
new file mode 100644
index 0000000..d591411
--- /dev/null
+++ b/src/common/unicode/generate-unicode_norm_table.pl
@@ -0,0 +1,412 @@
+#!/usr/bin/perl
+#
+# Generate a composition table and its lookup utilities, using Unicode data
+# files as input.
+#
+# Input: UnicodeData.txt and CompositionExclusions.txt
+# Output: unicode_norm_table.h and unicode_norm_hashfunc.h
+#
+# Copyright (c) 2000-2023, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+use FindBin;
+use lib "$FindBin::RealBin/../../tools/";
+use PerfectHash;
+
+my $output_path = '.';
+
+GetOptions('outdir:s' => \$output_path);
+
+my $output_table_file = "$output_path/unicode_norm_table.h";
+my $output_func_file = "$output_path/unicode_norm_hashfunc.h";
+
+
+my $FH;
+
+# Read list of codes that should be excluded from re-composition.
+my @composition_exclusion_codes = ();
+open($FH, '<', "$output_path/CompositionExclusions.txt")
+ or die "Could not open $output_path/CompositionExclusions.txt: $!.";
+while (my $line = <$FH>)
+{
+ if ($line =~ /^([[:xdigit:]]+)/)
+ {
+ push @composition_exclusion_codes, $1;
+ }
+}
+close $FH;
+
+# Read entries from UnicodeData.txt into a list, and a hash table. We need
+# three fields from each row: the codepoint, canonical combining class,
+# and character decomposition mapping
+my @characters = ();
+my %character_hash = ();
+open($FH, '<', "$output_path/UnicodeData.txt")
+ or die "Could not open $output_path/UnicodeData.txt: $!.";
+while (my $line = <$FH>)
+{
+
+ # Split the line wanted and get the fields needed:
+ # - Unicode code value
+ # - Canonical Combining Class
+ # - Character Decomposition Mapping
+ my @elts = split(';', $line);
+ my $code = $elts[0];
+ my $class = $elts[3];
+ my $decomp = $elts[5];
+
+ # Skip codepoints above U+10FFFF. They cannot be represented in 4 bytes
+ # in UTF-8, and PostgreSQL doesn't support UTF-8 characters longer than
+ # 4 bytes. (This is just pro forma, as there aren't any such entries in
+ # the data file, currently.)
+ next if hex($code) > 0x10FFFF;
+
+ # Skip characters with no decompositions and a class of 0, to reduce the
+ # table size.
+ next if $class eq '0' && $decomp eq '';
+
+ my %char_entry = (code => $code, class => $class, decomp => $decomp);
+ push(@characters, \%char_entry);
+ $character_hash{$code} = \%char_entry;
+}
+close $FH;
+
+my $num_characters = scalar @characters;
+
+# Start writing out the output files
+open my $OT, '>', $output_table_file
+ or die "Could not open output file $output_table_file: $!\n";
+open my $OF, '>', $output_func_file
+ or die "Could not open output file $output_func_file: $!\n";
+
+print $OT <<HEADER;
+/*-------------------------------------------------------------------------
+ *
+ * unicode_norm_table.h
+ * Composition table used for Unicode normalization
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/common/unicode_norm_table.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * File auto-generated by src/common/unicode/generate-unicode_norm_table.pl,
+ * do not edit. There is deliberately not an #ifndef PG_UNICODE_NORM_TABLE_H
+ * here.
+ */
+typedef struct
+{
+ uint32 codepoint; /* Unicode codepoint */
+ uint8 comb_class; /* combining class of character */
+ uint8 dec_size_flags; /* size and flags of decomposition code list */
+ uint16 dec_index; /* index into UnicodeDecomp_codepoints, or the
+ * decomposition itself if DECOMP_INLINE */
+} pg_unicode_decomposition;
+
+#define DECOMP_NO_COMPOSE 0x80 /* don't use for re-composition */
+#define DECOMP_INLINE 0x40 /* decomposition is stored inline in
+ * dec_index */
+#define DECOMP_COMPAT 0x20 /* compatibility mapping */
+
+#define DECOMPOSITION_SIZE(x) ((x)->dec_size_flags & 0x1F)
+#define DECOMPOSITION_NO_COMPOSE(x) (((x)->dec_size_flags & (DECOMP_NO_COMPOSE | DECOMP_COMPAT)) != 0)
+#define DECOMPOSITION_IS_INLINE(x) (((x)->dec_size_flags & DECOMP_INLINE) != 0)
+#define DECOMPOSITION_IS_COMPAT(x) (((x)->dec_size_flags & DECOMP_COMPAT) != 0)
+
+/* Table of Unicode codepoints and their decompositions */
+static const pg_unicode_decomposition UnicodeDecompMain[$num_characters] =
+{
+HEADER
+
+print $OF <<HEADER;
+/*-------------------------------------------------------------------------
+ *
+ * unicode_norm_hashfunc.h
+ * Perfect hash functions used for Unicode normalization
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/common/unicode_norm_hashfunc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * File auto-generated by src/common/unicode/generate-unicode_norm_table.pl,
+ * do not edit. There is deliberately not an #ifndef PG_UNICODE_NORM_HASHFUNC_H
+ * here.
+ */
+
+#include "common/unicode_norm_table.h"
+
+/* Typedef for perfect hash functions */
+typedef int (*cp_hash_func) (const void *key);
+
+/* Information for lookups with perfect hash functions */
+typedef struct
+{
+ const pg_unicode_decomposition *decomps;
+ cp_hash_func hash;
+ int num_decomps;
+} pg_unicode_decompinfo;
+
+typedef struct
+{
+ const uint16 *inverse_lookup;
+ cp_hash_func hash;
+ int num_recomps;
+} pg_unicode_recompinfo;
+
+HEADER
+
+my $decomp_index = 0;
+my $decomp_string = "";
+my @dec_cp_packed;
+my $main_index = 0;
+my @rec_info;
+
+my $last_code = $characters[-1]->{code};
+foreach my $char (@characters)
+{
+ my $code = $char->{code};
+ my $class = $char->{class};
+ my $decomp = $char->{decomp};
+
+ # Save the code point bytes as a string in network order.
+ push @dec_cp_packed, pack('N', hex($char->{code}));
+
+ # The character decomposition mapping field in UnicodeData.txt is a list
+ # of unicode codepoints, separated by space. But it can be prefixed with
+ # so-called compatibility formatting tag, like "<compat>", or "<font>".
+ # The entries with compatibility formatting tags should not be used for
+ # re-composing characters during normalization, so flag them in the table.
+ # (The tag doesn't matter, only whether there is a tag or not)
+ my $compat = 0;
+ if ($decomp =~ /\<.*\>/)
+ {
+ $compat = 1;
+ $decomp =~ s/\<[^][]*\>//g;
+ }
+ my @decomp_elts = split(" ", $decomp);
+
+ # Decomposition size
+ # Print size of decomposition
+ my $decomp_size = scalar(@decomp_elts);
+ die if $decomp_size > 0x1F; # to not overrun bitmask
+
+ my $first_decomp = shift @decomp_elts;
+
+ my $flags = "";
+ my $comment = "";
+
+ if ($compat)
+ {
+ $flags .= " | DECOMP_COMPAT";
+ }
+
+ if ($decomp_size == 2)
+ {
+ # Should this be used for recomposition?
+ if ( $character_hash{$first_decomp}
+ && $character_hash{$first_decomp}->{class} != 0)
+ {
+ $flags .= " | DECOMP_NO_COMPOSE";
+ $comment = "non-starter decomposition";
+ }
+ else
+ {
+ foreach my $lcode (@composition_exclusion_codes)
+ {
+ if ($lcode eq $code)
+ {
+ $flags .= " | DECOMP_NO_COMPOSE";
+ $comment = "in exclusion list";
+ last;
+ }
+ }
+ }
+
+ # Save info for recomposeable codepoints.
+ # Note that this MUST match the macro DECOMPOSITION_NO_COMPOSE in C
+ # above! See also the inverse lookup in recompose_code() found in
+ # src/common/unicode_norm.c.
+ if (!($flags =~ /DECOMP_COMPAT/ || $flags =~ /DECOMP_NO_COMPOSE/))
+ {
+ push @rec_info,
+ {
+ code => $code,
+ main_index => $main_index,
+ first => $first_decomp,
+ second => $decomp_elts[0]
+ };
+ }
+ }
+
+ if ($decomp_size == 0)
+ {
+ print $OT "\t{0x$code, $class, 0$flags, 0}";
+ }
+ elsif ($decomp_size == 1 && length($first_decomp) <= 4)
+ {
+
+ # The decomposition consists of a single codepoint, and it fits
+ # in a uint16, so we can store it "inline" in the main table.
+ $flags .= " | DECOMP_INLINE";
+ print $OT "\t{0x$code, $class, 1$flags, 0x$first_decomp}";
+ }
+ else
+ {
+ print $OT "\t{0x$code, $class, $decomp_size$flags, $decomp_index}";
+
+ # Now save the decompositions into a dedicated area that will
+ # be written afterwards. First build the entry dedicated to
+ # a sub-table with the code and decomposition.
+ $decomp_string .= ",\n" if ($decomp_string ne "");
+
+ $decomp_string .= "\t /* $decomp_index */ 0x$first_decomp";
+ foreach (@decomp_elts)
+ {
+ $decomp_string .= ", 0x$_";
+ }
+
+ $decomp_index = $decomp_index + $decomp_size;
+ }
+
+ # Print a comma after all items except the last one.
+ print $OT "," unless ($code eq $last_code);
+
+ print $OT "\t/* $comment */" if ($comment ne "");
+ print $OT "\n";
+
+ $main_index++;
+}
+print $OT "\n};\n\n";
+
+# Print the array of decomposed codes.
+print $OT <<HEADER;
+/* codepoints array */
+static const uint32 UnicodeDecomp_codepoints[$decomp_index] =
+{
+$decomp_string
+};
+HEADER
+
+# Emit the definition of the decomp hash function.
+my $dec_funcname = 'Decomp_hash_func';
+my $dec_func = PerfectHash::generate_hash_function(\@dec_cp_packed,
+ $dec_funcname, fixed_key_length => 4);
+print $OF "/* Perfect hash function for decomposition */\n";
+print $OF "static $dec_func\n";
+
+# Emit the structure that wraps the hash lookup information into
+# one variable.
+print $OF <<HEADER;
+/* Hash lookup information for decomposition */
+static const pg_unicode_decompinfo UnicodeDecompInfo =
+{
+ UnicodeDecompMain,
+ $dec_funcname,
+ $num_characters
+};
+
+HEADER
+
+# Find the lowest codepoint that decomposes to each recomposeable
+# code pair and create a mapping to it.
+my $recomp_string = "";
+my @rec_cp_packed;
+my %seenit;
+my $firstentry = 1;
+foreach my $rec (sort recomp_sort @rec_info)
+{
+ # The hash key is formed by concatenating the bytes of the two
+ # codepoints. See also recompose_code() in common/unicode_norm.c.
+ my $hashkey = (hex($rec->{first}) << 32) | hex($rec->{second});
+
+ # We are only interested in the lowest code point that decomposes
+ # to the given code pair.
+ next if $seenit{$hashkey};
+
+ # Save the hash key bytes in network order
+ push @rec_cp_packed, pack('Q>', $hashkey);
+
+ # Append inverse lookup element
+ $recomp_string .= ",\n" if !$firstentry;
+ $recomp_string .= sprintf "\t/* U+%s+%s -> U+%s */ %s",
+ $rec->{first},
+ $rec->{second},
+ $rec->{code},
+ $rec->{main_index};
+
+ $seenit{$hashkey} = 1;
+ $firstentry = 0;
+}
+
+# Emit the inverse lookup array containing indexes into UnicodeDecompMain.
+my $num_recomps = scalar @rec_cp_packed;
+print $OF <<HEADER;
+/* Inverse lookup array -- contains indexes into UnicodeDecompMain[] */
+static const uint16 RecompInverseLookup[$num_recomps] =
+{
+$recomp_string
+};
+
+HEADER
+
+# Emit the definition of the recomposition hash function.
+my $rec_funcname = 'Recomp_hash_func';
+my $rec_func =
+ PerfectHash::generate_hash_function(\@rec_cp_packed, $rec_funcname,
+ fixed_key_length => 8);
+print $OF "/* Perfect hash function for recomposition */\n";
+print $OF "static $rec_func\n";
+
+# Emit the structure that wraps the hash lookup information into
+# one variable.
+print $OF <<HEADER;
+/* Hash lookup information for recomposition */
+static const pg_unicode_recompinfo UnicodeRecompInfo =
+{
+ RecompInverseLookup,
+ $rec_funcname,
+ $num_recomps
+};
+HEADER
+
+close $OT;
+close $OF;
+
+sub recomp_sort
+{
+ my $a1 = hex($a->{first});
+ my $b1 = hex($b->{first});
+
+ my $a2 = hex($a->{second});
+ my $b2 = hex($b->{second});
+
+ # First sort by the first code point
+ return -1 if $a1 < $b1;
+ return 1 if $a1 > $b1;
+
+ # Then sort by the second code point
+ return -1 if $a2 < $b2;
+ return 1 if $a2 > $b2;
+
+ # Finally sort by the code point that decomposes into first and
+ # second ones.
+ my $acode = hex($a->{code});
+ my $bcode = hex($b->{code});
+
+ return -1 if $acode < $bcode;
+ return 1 if $acode > $bcode;
+
+ die "found duplicate entries of recomposeable code pairs";
+}
diff --git a/src/common/unicode/generate-unicode_normprops_table.pl b/src/common/unicode/generate-unicode_normprops_table.pl
new file mode 100644
index 0000000..1b74731
--- /dev/null
+++ b/src/common/unicode/generate-unicode_normprops_table.pl
@@ -0,0 +1,125 @@
+#!/usr/bin/perl
+#
+# Generate table of Unicode normalization "quick check" properties
+# (see UAX #15). Pass DerivedNormalizationProps.txt as argument. The
+# output is on stdout.
+#
+# Copyright (c) 2020-2023, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+use FindBin;
+use lib "$FindBin::RealBin/../../tools/";
+use PerfectHash;
+
+my %data;
+
+print
+ "/* generated by src/common/unicode/generate-unicode_normprops_table.pl, do not edit */\n\n";
+
+print <<EOS;
+#include "common/unicode_norm.h"
+
+/*
+ * Normalization quick check entry for codepoint. We use a bit field
+ * here to save space.
+ */
+typedef struct
+{
+ unsigned int codepoint:21;
+ signed int quickcheck:4; /* really UnicodeNormalizationQC */
+} pg_unicode_normprops;
+
+/* Typedef for hash function on quick check table */
+typedef int (*qc_hash_func) (const void *key);
+
+/* Information for quick check lookup with perfect hash function */
+typedef struct
+{
+ const pg_unicode_normprops *normprops;
+ qc_hash_func hash;
+ int num_normprops;
+} pg_unicode_norminfo;
+EOS
+
+foreach my $line (<ARGV>)
+{
+ chomp $line;
+ $line =~ s/\s*#.*$//;
+ next if $line eq '';
+ my ($codepoint, $prop, $value) = split /\s*;\s*/, $line;
+ next if $prop !~ /_QC/;
+
+ my ($first, $last);
+ if ($codepoint =~ /\.\./)
+ {
+ ($first, $last) = split /\.\./, $codepoint;
+ }
+ else
+ {
+ $first = $last = $codepoint;
+ }
+
+ foreach my $cp (hex($first) .. hex($last))
+ {
+ $data{$prop}{$cp} = $value;
+ }
+}
+
+# We create a separate array for each normalization form rather than,
+# say, a two-dimensional array, because that array would be very
+# sparse and would create unnecessary overhead especially for the NFC
+# lookup.
+foreach my $prop (sort keys %data)
+{
+ # Don't build the tables for the "D" forms because they are too
+ # big. See also unicode_is_normalized_quickcheck().
+ next if $prop eq "NFD_QC" || $prop eq "NFKD_QC";
+
+ print "\n";
+ print
+ "static const pg_unicode_normprops UnicodeNormProps_${prop}[] = {\n";
+
+ my %subdata = %{ $data{$prop} };
+ my @cp_packed;
+ foreach my $cp (sort { $a <=> $b } keys %subdata)
+ {
+ my $qc;
+ if ($subdata{$cp} eq 'N')
+ {
+ $qc = 'UNICODE_NORM_QC_NO';
+ }
+ elsif ($subdata{$cp} eq 'M')
+ {
+ $qc = 'UNICODE_NORM_QC_MAYBE';
+ }
+ else
+ {
+ die;
+ }
+ printf "\t{0x%04X, %s},\n", $cp, $qc;
+
+ # Save the bytes as a string in network order.
+ push @cp_packed, pack('N', $cp);
+ }
+
+ print "};\n";
+
+ # Emit the definition of the perfect hash function.
+ my $funcname = $prop . '_hash_func';
+ my $f = PerfectHash::generate_hash_function(\@cp_packed, $funcname,
+ fixed_key_length => 4);
+ printf "\n/* Perfect hash function for %s */", $prop;
+ print "\nstatic $f\n";
+
+ # Emit the structure that wraps the hash lookup information into
+ # one variable.
+ printf "/* Hash lookup information for %s */", $prop;
+ printf "\nstatic const pg_unicode_norminfo ";
+ printf "UnicodeNormInfo_%s = {\n", $prop;
+ printf "\tUnicodeNormProps_%s,\n", $prop;
+ printf "\t%s,\n", $funcname;
+ printf "\t%d\n", scalar @cp_packed;
+ printf "};\n";
+}
diff --git a/src/common/unicode/meson.build b/src/common/unicode/meson.build
new file mode 100644
index 0000000..9033c4a
--- /dev/null
+++ b/src/common/unicode/meson.build
@@ -0,0 +1,111 @@
+# Copyright (c) 2022-2023, PostgreSQL Global Development Group
+
+UNICODE_VERSION = '15.0.0'
+
+unicode_data = {}
+unicode_baseurl = 'https://www.unicode.org/Public/@0@/ucd/@1@'
+
+if not wget.found() or not cp.found()
+ subdir_done()
+endif
+
+# These files are part of the Unicode Character Database. Download them on
+# demand.
+foreach f : ['UnicodeData.txt', 'EastAsianWidth.txt', 'DerivedNormalizationProps.txt', 'CompositionExclusions.txt', 'NormalizationTest.txt']
+ url = unicode_baseurl.format(UNICODE_VERSION, f)
+ target = custom_target(f,
+ output: f,
+ command: [wget, wget_flags, url],
+ build_by_default: false,
+ )
+ unicode_data += {f: target}
+endforeach
+
+
+update_unicode_targets = []
+
+update_unicode_targets += \
+ custom_target('unicode_norm_table.h',
+ input: [unicode_data['UnicodeData.txt'], unicode_data['CompositionExclusions.txt']],
+ output: ['unicode_norm_table.h', 'unicode_norm_hashfunc.h'],
+ depend_files: perfect_hash_pm,
+ command: [
+ perl, files('generate-unicode_norm_table.pl'),
+ '--outdir', '@OUTDIR@', '@INPUT@'],
+ build_by_default: false,
+ )
+
+update_unicode_targets += \
+ custom_target('unicode_nonspacing_table.h',
+ input: [unicode_data['UnicodeData.txt']],
+ output: ['unicode_nonspacing_table.h'],
+ depend_files: perfect_hash_pm,
+ command: [perl, files('generate-unicode_nonspacing_table.pl'), '@INPUT@'],
+ build_by_default: false,
+ capture: true,
+ )
+
+update_unicode_targets += \
+ custom_target('unicode_east_asian_fw_table.h',
+ input: [unicode_data['EastAsianWidth.txt']],
+ output: ['unicode_east_asian_fw_table.h'],
+ command: [perl, files('generate-unicode_east_asian_fw_table.pl'), '@INPUT@'],
+ build_by_default: false,
+ capture: true,
+ )
+
+update_unicode_targets += \
+ custom_target('unicode_normprops_table.h',
+ input: [unicode_data['DerivedNormalizationProps.txt']],
+ output: ['unicode_normprops_table.h'],
+ depend_files: perfect_hash_pm,
+ command: [perl, files('generate-unicode_normprops_table.pl'), '@INPUT@'],
+ build_by_default: false,
+ capture: true,
+ )
+
+norm_test_table = custom_target('norm_test_table.h',
+ input: [unicode_data['NormalizationTest.txt']],
+ output: ['norm_test_table.h'],
+ command: [perl, files('generate-norm_test_table.pl'), '@INPUT@', '@OUTPUT@'],
+ build_by_default: false,
+ )
+
+inc = include_directories('.')
+
+norm_test = executable('norm_test',
+ ['norm_test.c', norm_test_table],
+ dependencies: [frontend_port_code],
+ include_directories: inc,
+ link_with: [common_static, pgport_static],
+ build_by_default: false,
+ kwargs: default_bin_args + {
+ 'install': false,
+ }
+)
+
+update_unicode_dep = []
+
+if not meson.is_cross_build()
+ update_unicode_dep += custom_target('norm_test.run',
+ output: 'norm_test.run',
+ input: update_unicode_targets,
+ command: [norm_test],
+ build_by_default: false,
+ build_always_stale: true,
+ )
+endif
+
+
+# Use a custom target, as run targets serialize the output, making this harder
+# to debug, and don't deal well with targets with multiple outputs.
+update_unicode = custom_target('update-unicode',
+ depends: update_unicode_dep,
+ output: ['dont-exist'],
+ input: update_unicode_targets,
+ command: [cp, '@INPUT@', '@SOURCE_ROOT@/src/include/common/'],
+ build_by_default: false,
+ build_always_stale: true,
+)
+
+alias_target('update-unicode', update_unicode)
diff --git a/src/common/unicode/norm_test.c b/src/common/unicode/norm_test.c
new file mode 100644
index 0000000..809a6de
--- /dev/null
+++ b/src/common/unicode/norm_test.c
@@ -0,0 +1,86 @@
+/*-------------------------------------------------------------------------
+ * norm_test.c
+ * Program to test Unicode normalization functions.
+ *
+ * Portions Copyright (c) 2017-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/unicode/norm_test.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres_fe.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "common/unicode_norm.h"
+
+#include "norm_test_table.h"
+
+static char *
+print_wchar_str(const pg_wchar *s)
+{
+#define BUF_DIGITS 50
+ static char buf[BUF_DIGITS * 11 + 1];
+ int i;
+ char *p;
+
+ i = 0;
+ p = buf;
+ while (*s && i < BUF_DIGITS)
+ {
+ p += sprintf(p, "U+%04X ", *s);
+ i++;
+ s++;
+ }
+ *p = '\0';
+
+ return buf;
+}
+
+static int
+pg_wcscmp(const pg_wchar *s1, const pg_wchar *s2)
+{
+ for (;;)
+ {
+ if (*s1 < *s2)
+ return -1;
+ if (*s1 > *s2)
+ return 1;
+ if (*s1 == 0)
+ return 0;
+ s1++;
+ s2++;
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ const pg_unicode_test *test;
+
+ for (test = UnicodeNormalizationTests; test->input[0] != 0; test++)
+ {
+ for (int form = 0; form < 4; form++)
+ {
+ pg_wchar *result;
+
+ result = unicode_normalize(form, test->input);
+
+ if (pg_wcscmp(test->output[form], result) != 0)
+ {
+ printf("FAILURE (NormalizationTest.txt line %d form %d):\n", test->linenum, form);
+ printf("input: %s\n", print_wchar_str(test->input));
+ printf("expected: %s\n", print_wchar_str(test->output[form]));
+ printf("got: %s\n", print_wchar_str(result));
+ printf("\n");
+ exit(1);
+ }
+ }
+ }
+
+ printf("All tests successful!\n");
+ exit(0);
+}
diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c
new file mode 100644
index 0000000..8e17cf0
--- /dev/null
+++ b/src/common/unicode_norm.c
@@ -0,0 +1,634 @@
+/*-------------------------------------------------------------------------
+ * unicode_norm.c
+ * Normalize a Unicode string
+ *
+ * This implements Unicode normalization, per the documentation at
+ * https://www.unicode.org/reports/tr15/.
+ *
+ * Portions Copyright (c) 2017-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/unicode_norm.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/unicode_norm.h"
+#ifndef FRONTEND
+#include "common/unicode_norm_hashfunc.h"
+#include "common/unicode_normprops_table.h"
+#include "port/pg_bswap.h"
+#else
+#include "common/unicode_norm_table.h"
+#endif
+
+#ifndef FRONTEND
+#define ALLOC(size) palloc(size)
+#define FREE(size) pfree(size)
+#else
+#define ALLOC(size) malloc(size)
+#define FREE(size) free(size)
+#endif
+
+/* Constants for calculations with Hangul characters */
+#define SBASE 0xAC00 /* U+AC00 */
+#define LBASE 0x1100 /* U+1100 */
+#define VBASE 0x1161 /* U+1161 */
+#define TBASE 0x11A7 /* U+11A7 */
+#define LCOUNT 19
+#define VCOUNT 21
+#define TCOUNT 28
+#define NCOUNT VCOUNT * TCOUNT
+#define SCOUNT LCOUNT * NCOUNT
+
+#ifdef FRONTEND
+/* comparison routine for bsearch() of decomposition lookup table. */
+static int
+conv_compare(const void *p1, const void *p2)
+{
+ uint32 v1,
+ v2;
+
+ v1 = *(const uint32 *) p1;
+ v2 = ((const pg_unicode_decomposition *) p2)->codepoint;
+ return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
+}
+
+#endif
+
+/*
+ * get_code_entry
+ *
+ * Get the entry corresponding to code in the decomposition lookup table.
+ * The backend version of this code uses a perfect hash function for the
+ * lookup, while the frontend version uses a binary search.
+ */
+static const pg_unicode_decomposition *
+get_code_entry(pg_wchar code)
+{
+#ifndef FRONTEND
+ int h;
+ uint32 hashkey;
+ pg_unicode_decompinfo decompinfo = UnicodeDecompInfo;
+
+ /*
+ * Compute the hash function. The hash key is the codepoint with the bytes
+ * in network order.
+ */
+ hashkey = pg_hton32(code);
+ h = decompinfo.hash(&hashkey);
+
+ /* An out-of-range result implies no match */
+ if (h < 0 || h >= decompinfo.num_decomps)
+ return NULL;
+
+ /*
+ * Since it's a perfect hash, we need only match to the specific codepoint
+ * it identifies.
+ */
+ if (code != decompinfo.decomps[h].codepoint)
+ return NULL;
+
+ /* Success! */
+ return &decompinfo.decomps[h];
+#else
+ return bsearch(&(code),
+ UnicodeDecompMain,
+ lengthof(UnicodeDecompMain),
+ sizeof(pg_unicode_decomposition),
+ conv_compare);
+#endif
+}
+
+/*
+ * Get the combining class of the given codepoint.
+ */
+static uint8
+get_canonical_class(pg_wchar code)
+{
+ const pg_unicode_decomposition *entry = get_code_entry(code);
+
+ /*
+ * If no entries are found, the character used is either an Hangul
+ * character or a character with a class of 0 and no decompositions.
+ */
+ if (!entry)
+ return 0;
+ else
+ return entry->comb_class;
+}
+
+/*
+ * Given a decomposition entry looked up earlier, get the decomposed
+ * characters.
+ *
+ * Note: the returned pointer can point to statically allocated buffer, and
+ * is only valid until next call to this function!
+ */
+static const pg_wchar *
+get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size)
+{
+ static pg_wchar x;
+
+ if (DECOMPOSITION_IS_INLINE(entry))
+ {
+ Assert(DECOMPOSITION_SIZE(entry) == 1);
+ x = (pg_wchar) entry->dec_index;
+ *dec_size = 1;
+ return &x;
+ }
+ else
+ {
+ *dec_size = DECOMPOSITION_SIZE(entry);
+ return &UnicodeDecomp_codepoints[entry->dec_index];
+ }
+}
+
+/*
+ * Calculate how many characters a given character will decompose to.
+ *
+ * This needs to recurse, if the character decomposes into characters that
+ * are, in turn, decomposable.
+ */
+static int
+get_decomposed_size(pg_wchar code, bool compat)
+{
+ const pg_unicode_decomposition *entry;
+ int size = 0;
+ int i;
+ const uint32 *decomp;
+ int dec_size;
+
+ /*
+ * Fast path for Hangul characters not stored in tables to save memory as
+ * decomposition is algorithmic. See
+ * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details
+ * on the matter.
+ */
+ if (code >= SBASE && code < SBASE + SCOUNT)
+ {
+ uint32 tindex,
+ sindex;
+
+ sindex = code - SBASE;
+ tindex = sindex % TCOUNT;
+
+ if (tindex != 0)
+ return 3;
+ return 2;
+ }
+
+ entry = get_code_entry(code);
+
+ /*
+ * Just count current code if no other decompositions. A NULL entry is
+ * equivalent to a character with class 0 and no decompositions.
+ */
+ if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||
+ (!compat && DECOMPOSITION_IS_COMPAT(entry)))
+ return 1;
+
+ /*
+ * If this entry has other decomposition codes look at them as well. First
+ * get its decomposition in the list of tables available.
+ */
+ decomp = get_code_decomposition(entry, &dec_size);
+ for (i = 0; i < dec_size; i++)
+ {
+ uint32 lcode = decomp[i];
+
+ size += get_decomposed_size(lcode, compat);
+ }
+
+ return size;
+}
+
+/*
+ * Recompose a set of characters. For hangul characters, the calculation
+ * is algorithmic. For others, an inverse lookup at the decomposition
+ * table is necessary. Returns true if a recomposition can be done, and
+ * false otherwise.
+ */
+static bool
+recompose_code(uint32 start, uint32 code, uint32 *result)
+{
+ /*
+ * Handle Hangul characters algorithmically, per the Unicode spec.
+ *
+ * Check if two current characters are L and V.
+ */
+ if (start >= LBASE && start < LBASE + LCOUNT &&
+ code >= VBASE && code < VBASE + VCOUNT)
+ {
+ /* make syllable of form LV */
+ uint32 lindex = start - LBASE;
+ uint32 vindex = code - VBASE;
+
+ *result = SBASE + (lindex * VCOUNT + vindex) * TCOUNT;
+ return true;
+ }
+ /* Check if two current characters are LV and T */
+ else if (start >= SBASE && start < (SBASE + SCOUNT) &&
+ ((start - SBASE) % TCOUNT) == 0 &&
+ code >= TBASE && code < (TBASE + TCOUNT))
+ {
+ /* make syllable of form LVT */
+ uint32 tindex = code - TBASE;
+
+ *result = start + tindex;
+ return true;
+ }
+ else
+ {
+ const pg_unicode_decomposition *entry;
+
+ /*
+ * Do an inverse lookup of the decomposition tables to see if anything
+ * matches. The comparison just needs to be a perfect match on the
+ * sub-table of size two, because the start character has already been
+ * recomposed partially. This lookup uses a perfect hash function for
+ * the backend code.
+ */
+#ifndef FRONTEND
+
+ int h,
+ inv_lookup_index;
+ uint64 hashkey;
+ pg_unicode_recompinfo recompinfo = UnicodeRecompInfo;
+
+ /*
+ * Compute the hash function. The hash key is formed by concatenating
+ * bytes of the two codepoints in network order. See also
+ * src/common/unicode/generate-unicode_norm_table.pl.
+ */
+ hashkey = pg_hton64(((uint64) start << 32) | (uint64) code);
+ h = recompinfo.hash(&hashkey);
+
+ /* An out-of-range result implies no match */
+ if (h < 0 || h >= recompinfo.num_recomps)
+ return false;
+
+ inv_lookup_index = recompinfo.inverse_lookup[h];
+ entry = &UnicodeDecompMain[inv_lookup_index];
+
+ if (start == UnicodeDecomp_codepoints[entry->dec_index] &&
+ code == UnicodeDecomp_codepoints[entry->dec_index + 1])
+ {
+ *result = entry->codepoint;
+ return true;
+ }
+
+#else
+
+ int i;
+
+ for (i = 0; i < lengthof(UnicodeDecompMain); i++)
+ {
+ entry = &UnicodeDecompMain[i];
+
+ if (DECOMPOSITION_SIZE(entry) != 2)
+ continue;
+
+ if (DECOMPOSITION_NO_COMPOSE(entry))
+ continue;
+
+ if (start == UnicodeDecomp_codepoints[entry->dec_index] &&
+ code == UnicodeDecomp_codepoints[entry->dec_index + 1])
+ {
+ *result = entry->codepoint;
+ return true;
+ }
+ }
+#endif /* !FRONTEND */
+ }
+
+ return false;
+}
+
+/*
+ * Decompose the given code into the array given by caller. The
+ * decomposition begins at the position given by caller, saving one
+ * lookup on the decomposition table. The current position needs to be
+ * updated here to let the caller know from where to continue filling
+ * in the array result.
+ */
+static void
+decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)
+{
+ const pg_unicode_decomposition *entry;
+ int i;
+ const uint32 *decomp;
+ int dec_size;
+
+ /*
+ * Fast path for Hangul characters not stored in tables to save memory as
+ * decomposition is algorithmic. See
+ * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details
+ * on the matter.
+ */
+ if (code >= SBASE && code < SBASE + SCOUNT)
+ {
+ uint32 l,
+ v,
+ tindex,
+ sindex;
+ pg_wchar *res = *result;
+
+ sindex = code - SBASE;
+ l = LBASE + sindex / (VCOUNT * TCOUNT);
+ v = VBASE + (sindex % (VCOUNT * TCOUNT)) / TCOUNT;
+ tindex = sindex % TCOUNT;
+
+ res[*current] = l;
+ (*current)++;
+ res[*current] = v;
+ (*current)++;
+
+ if (tindex != 0)
+ {
+ res[*current] = TBASE + tindex;
+ (*current)++;
+ }
+
+ return;
+ }
+
+ entry = get_code_entry(code);
+
+ /*
+ * Just fill in with the current decomposition if there are no
+ * decomposition codes to recurse to. A NULL entry is equivalent to a
+ * character with class 0 and no decompositions, so just leave also in
+ * this case.
+ */
+ if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||
+ (!compat && DECOMPOSITION_IS_COMPAT(entry)))
+ {
+ pg_wchar *res = *result;
+
+ res[*current] = code;
+ (*current)++;
+ return;
+ }
+
+ /*
+ * If this entry has other decomposition codes look at them as well.
+ */
+ decomp = get_code_decomposition(entry, &dec_size);
+ for (i = 0; i < dec_size; i++)
+ {
+ pg_wchar lcode = (pg_wchar) decomp[i];
+
+ /* Leave if no more decompositions */
+ decompose_code(lcode, compat, result, current);
+ }
+}
+
+/*
+ * unicode_normalize - Normalize a Unicode string to the specified form.
+ *
+ * The input is a 0-terminated array of codepoints.
+ *
+ * In frontend, returns a 0-terminated array of codepoints, allocated with
+ * malloc. Or NULL if we run out of memory. In backend, the returned
+ * string is palloc'd instead, and OOM is reported with ereport().
+ */
+pg_wchar *
+unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
+{
+ bool compat = (form == UNICODE_NFKC || form == UNICODE_NFKD);
+ bool recompose = (form == UNICODE_NFC || form == UNICODE_NFKC);
+ pg_wchar *decomp_chars;
+ pg_wchar *recomp_chars;
+ int decomp_size,
+ current_size;
+ int count;
+ const pg_wchar *p;
+
+ /* variables for recomposition */
+ int last_class;
+ int starter_pos;
+ int target_pos;
+ uint32 starter_ch;
+
+ /* First, do character decomposition */
+
+ /*
+ * Calculate how many characters long the decomposed version will be.
+ */
+ decomp_size = 0;
+ for (p = input; *p; p++)
+ decomp_size += get_decomposed_size(*p, compat);
+
+ decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
+ if (decomp_chars == NULL)
+ return NULL;
+
+ /*
+ * Now fill in each entry recursively. This needs a second pass on the
+ * decomposition table.
+ */
+ current_size = 0;
+ for (p = input; *p; p++)
+ decompose_code(*p, compat, &decomp_chars, &current_size);
+ decomp_chars[decomp_size] = '\0';
+ Assert(decomp_size == current_size);
+
+ /* Leave if there is nothing to decompose */
+ if (decomp_size == 0)
+ return decomp_chars;
+
+ /*
+ * Now apply canonical ordering.
+ */
+ for (count = 1; count < decomp_size; count++)
+ {
+ pg_wchar prev = decomp_chars[count - 1];
+ pg_wchar next = decomp_chars[count];
+ pg_wchar tmp;
+ const uint8 prevClass = get_canonical_class(prev);
+ const uint8 nextClass = get_canonical_class(next);
+
+ /*
+ * Per Unicode (https://www.unicode.org/reports/tr15/tr15-18.html)
+ * annex 4, a sequence of two adjacent characters in a string is an
+ * exchangeable pair if the combining class (from the Unicode
+ * Character Database) for the first character is greater than the
+ * combining class for the second, and the second is not a starter. A
+ * character is a starter if its combining class is 0.
+ */
+ if (prevClass == 0 || nextClass == 0)
+ continue;
+
+ if (prevClass <= nextClass)
+ continue;
+
+ /* exchange can happen */
+ tmp = decomp_chars[count - 1];
+ decomp_chars[count - 1] = decomp_chars[count];
+ decomp_chars[count] = tmp;
+
+ /* backtrack to check again */
+ if (count > 1)
+ count -= 2;
+ }
+
+ if (!recompose)
+ return decomp_chars;
+
+ /*
+ * The last phase of NFC and NFKC is the recomposition of the reordered
+ * Unicode string using combining classes. The recomposed string cannot be
+ * longer than the decomposed one, so make the allocation of the output
+ * string based on that assumption.
+ */
+ recomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
+ if (!recomp_chars)
+ {
+ FREE(decomp_chars);
+ return NULL;
+ }
+
+ last_class = -1; /* this eliminates a special check */
+ starter_pos = 0;
+ target_pos = 1;
+ starter_ch = recomp_chars[0] = decomp_chars[0];
+
+ for (count = 1; count < decomp_size; count++)
+ {
+ pg_wchar ch = decomp_chars[count];
+ int ch_class = get_canonical_class(ch);
+ pg_wchar composite;
+
+ if (last_class < ch_class &&
+ recompose_code(starter_ch, ch, &composite))
+ {
+ recomp_chars[starter_pos] = composite;
+ starter_ch = composite;
+ }
+ else if (ch_class == 0)
+ {
+ starter_pos = target_pos;
+ starter_ch = ch;
+ last_class = -1;
+ recomp_chars[target_pos++] = ch;
+ }
+ else
+ {
+ last_class = ch_class;
+ recomp_chars[target_pos++] = ch;
+ }
+ }
+ recomp_chars[target_pos] = (pg_wchar) '\0';
+
+ FREE(decomp_chars);
+
+ return recomp_chars;
+}
+
+/*
+ * Normalization "quick check" algorithm; see
+ * <http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms>
+ */
+
+/* We only need this in the backend. */
+#ifndef FRONTEND
+
+static const pg_unicode_normprops *
+qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo)
+{
+ int h;
+ uint32 hashkey;
+
+ /*
+ * Compute the hash function. The hash key is the codepoint with the bytes
+ * in network order.
+ */
+ hashkey = pg_hton32(ch);
+ h = norminfo->hash(&hashkey);
+
+ /* An out-of-range result implies no match */
+ if (h < 0 || h >= norminfo->num_normprops)
+ return NULL;
+
+ /*
+ * Since it's a perfect hash, we need only match to the specific codepoint
+ * it identifies.
+ */
+ if (ch != norminfo->normprops[h].codepoint)
+ return NULL;
+
+ /* Success! */
+ return &norminfo->normprops[h];
+}
+
+/*
+ * Look up the normalization quick check character property
+ */
+static UnicodeNormalizationQC
+qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch)
+{
+ const pg_unicode_normprops *found = NULL;
+
+ switch (form)
+ {
+ case UNICODE_NFC:
+ found = qc_hash_lookup(ch, &UnicodeNormInfo_NFC_QC);
+ break;
+ case UNICODE_NFKC:
+ found = qc_hash_lookup(ch, &UnicodeNormInfo_NFKC_QC);
+ break;
+ default:
+ Assert(false);
+ break;
+ }
+
+ if (found)
+ return found->quickcheck;
+ else
+ return UNICODE_NORM_QC_YES;
+}
+
+UnicodeNormalizationQC
+unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input)
+{
+ uint8 lastCanonicalClass = 0;
+ UnicodeNormalizationQC result = UNICODE_NORM_QC_YES;
+
+ /*
+ * For the "D" forms, we don't run the quickcheck. We don't include the
+ * lookup tables for those because they are huge, checking for these
+ * particular forms is less common, and running the slow path is faster
+ * for the "D" forms than the "C" forms because you don't need to
+ * recompose, which is slow.
+ */
+ if (form == UNICODE_NFD || form == UNICODE_NFKD)
+ return UNICODE_NORM_QC_MAYBE;
+
+ for (const pg_wchar *p = input; *p; p++)
+ {
+ pg_wchar ch = *p;
+ uint8 canonicalClass;
+ UnicodeNormalizationQC check;
+
+ canonicalClass = get_canonical_class(ch);
+ if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
+ return UNICODE_NORM_QC_NO;
+
+ check = qc_is_allowed(form, ch);
+ if (check == UNICODE_NORM_QC_NO)
+ return UNICODE_NORM_QC_NO;
+ else if (check == UNICODE_NORM_QC_MAYBE)
+ result = UNICODE_NORM_QC_MAYBE;
+
+ lastCanonicalClass = canonicalClass;
+ }
+ return result;
+}
+
+#endif /* !FRONTEND */
diff --git a/src/common/username.c b/src/common/username.c
new file mode 100644
index 0000000..e8ac4c4
--- /dev/null
+++ b/src/common/username.c
@@ -0,0 +1,87 @@
+/*-------------------------------------------------------------------------
+ *
+ * username.c
+ * get user name
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/common/username.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <pwd.h>
+#include <unistd.h>
+
+#include "common/username.h"
+
+/*
+ * Returns the current user name in a static buffer
+ * On error, returns NULL and sets *errstr to point to a palloc'd message
+ */
+const char *
+get_user_name(char **errstr)
+{
+#ifndef WIN32
+ struct passwd *pw;
+ uid_t user_id = geteuid();
+
+ *errstr = NULL;
+
+ errno = 0; /* clear errno before call */
+ pw = getpwuid(user_id);
+ if (!pw)
+ {
+ *errstr = psprintf(_("could not look up effective user ID %ld: %s"),
+ (long) user_id,
+ errno ? strerror(errno) : _("user does not exist"));
+ return NULL;
+ }
+
+ return pw->pw_name;
+#else
+ /* Microsoft recommends buffer size of UNLEN+1, where UNLEN = 256 */
+ /* "static" variable remains after function exit */
+ static char username[256 + 1];
+ DWORD len = sizeof(username);
+
+ *errstr = NULL;
+
+ if (!GetUserName(username, &len))
+ {
+ *errstr = psprintf(_("user name lookup failure: error code %lu"),
+ GetLastError());
+ return NULL;
+ }
+
+ return username;
+#endif
+}
+
+
+/*
+ * Returns the current user name in a static buffer or exits
+ */
+const char *
+get_user_name_or_exit(const char *progname)
+{
+ const char *user_name;
+ char *errstr;
+
+ user_name = get_user_name(&errstr);
+
+ if (!user_name)
+ {
+ fprintf(stderr, "%s: %s\n", progname, errstr);
+ exit(1);
+ }
+ return user_name;
+}
diff --git a/src/common/wait_error.c b/src/common/wait_error.c
new file mode 100644
index 0000000..a90b745
--- /dev/null
+++ b/src/common/wait_error.c
@@ -0,0 +1,148 @@
+/*-------------------------------------------------------------------------
+ *
+ * wait_error.c
+ * Convert a wait/waitpid(2) result code to a human-readable string
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/wait_error.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include <signal.h>
+#include <sys/wait.h>
+
+/*
+ * Return a human-readable string explaining the reason a child process
+ * terminated. The argument is a return code returned by wait(2) or
+ * waitpid(2), which also applies to pclose(3) and system(3). The result is a
+ * translated, palloc'd or malloc'd string.
+ */
+char *
+wait_result_to_str(int exitstatus)
+{
+ char str[512];
+
+ /*
+ * To simplify using this after pclose() and system(), handle status -1
+ * first. In that case, there is no wait result but some error indicated
+ * by errno.
+ */
+ if (exitstatus == -1)
+ {
+ snprintf(str, sizeof(str), "%m");
+ }
+ else if (WIFEXITED(exitstatus))
+ {
+ /*
+ * Give more specific error message for some common exit codes that
+ * have a special meaning in shells.
+ */
+ switch (WEXITSTATUS(exitstatus))
+ {
+ case 126:
+ snprintf(str, sizeof(str), _("command not executable"));
+ break;
+
+ case 127:
+ snprintf(str, sizeof(str), _("command not found"));
+ break;
+
+ default:
+ snprintf(str, sizeof(str),
+ _("child process exited with exit code %d"),
+ WEXITSTATUS(exitstatus));
+ }
+ }
+ else if (WIFSIGNALED(exitstatus))
+ {
+#if defined(WIN32)
+ snprintf(str, sizeof(str),
+ _("child process was terminated by exception 0x%X"),
+ WTERMSIG(exitstatus));
+#else
+ snprintf(str, sizeof(str),
+ _("child process was terminated by signal %d: %s"),
+ WTERMSIG(exitstatus), pg_strsignal(WTERMSIG(exitstatus)));
+#endif
+ }
+ else
+ snprintf(str, sizeof(str),
+ _("child process exited with unrecognized status %d"),
+ exitstatus);
+
+ return pstrdup(str);
+}
+
+/*
+ * Return true if a wait(2) result indicates that the child process
+ * died due to the specified signal.
+ *
+ * The reason this is worth having a wrapper function for is that
+ * there are two cases: the signal might have been received by our
+ * immediate child process, or there might've been a shell process
+ * between us and the child that died. The shell will, per POSIX,
+ * report the child death using exit code 128 + signal number.
+ *
+ * If there is no possibility of an intermediate shell, this function
+ * need not (and probably should not) be used.
+ */
+bool
+wait_result_is_signal(int exit_status, int signum)
+{
+ if (WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum)
+ return true;
+ if (WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == 128 + signum)
+ return true;
+ return false;
+}
+
+/*
+ * Return true if a wait(2) result indicates that the child process
+ * died due to any signal. We consider either direct child death
+ * or a shell report of child process death as matching the condition.
+ *
+ * If include_command_not_found is true, also return true for shell
+ * exit codes indicating "command not found" and the like
+ * (specifically, exit codes 126 and 127; see above).
+ */
+bool
+wait_result_is_any_signal(int exit_status, bool include_command_not_found)
+{
+ if (WIFSIGNALED(exit_status))
+ return true;
+ if (WIFEXITED(exit_status) &&
+ WEXITSTATUS(exit_status) > (include_command_not_found ? 125 : 128))
+ return true;
+ return false;
+}
+
+/*
+ * Return the shell exit code (normally 0 to 255) that corresponds to the
+ * given wait status. The argument is a wait status as returned by wait(2)
+ * or waitpid(2), which also applies to pclose(3) and system(3). To support
+ * the latter two cases, we pass through "-1" unchanged.
+ */
+int
+wait_result_to_exit_code(int exit_status)
+{
+ if (exit_status == -1)
+ return -1; /* failure of pclose() or system() */
+ if (WIFEXITED(exit_status))
+ return WEXITSTATUS(exit_status);
+ if (WIFSIGNALED(exit_status))
+ return 128 + WTERMSIG(exit_status);
+ /* On many systems, this is unreachable */
+ return -1;
+}
diff --git a/src/common/wchar.c b/src/common/wchar.c
new file mode 100644
index 0000000..fbac11d
--- /dev/null
+++ b/src/common/wchar.c
@@ -0,0 +1,2194 @@
+/*-------------------------------------------------------------------------
+ *
+ * wchar.c
+ * Functions for working with multibyte characters in various encodings.
+ *
+ * Portions Copyright (c) 1998-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/common/wchar.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "mb/pg_wchar.h"
+#include "utils/ascii.h"
+
+
+/*
+ * Operations on multi-byte encodings are driven by a table of helper
+ * functions.
+ *
+ * To add an encoding support, define mblen(), dsplen(), verifychar() and
+ * verifystr() for the encoding. For server-encodings, also define mb2wchar()
+ * and wchar2mb() conversion functions.
+ *
+ * These functions generally assume that their input is validly formed.
+ * The "verifier" functions, further down in the file, have to be more
+ * paranoid.
+ *
+ * We expect that mblen() does not need to examine more than the first byte
+ * of the character to discover the correct length. GB18030 is an exception
+ * to that rule, though, as it also looks at second byte. But even that
+ * behaves in a predictable way, if you only pass the first byte: it will
+ * treat 4-byte encoded characters as two 2-byte encoded characters, which is
+ * good enough for all current uses.
+ *
+ * Note: for the display output of psql to work properly, the return values
+ * of the dsplen functions must conform to the Unicode standard. In particular
+ * the NUL character is zero width and control characters are generally
+ * width -1. It is recommended that non-ASCII encodings refer their ASCII
+ * subset to the ASCII routines to ensure consistency.
+ */
+
+/*
+ * SQL/ASCII
+ */
+static int
+pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ *to++ = *from++;
+ len--;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+static int
+pg_ascii_mblen(const unsigned char *s)
+{
+ return 1;
+}
+
+static int
+pg_ascii_dsplen(const unsigned char *s)
+{
+ if (*s == '\0')
+ return 0;
+ if (*s < 0x20 || *s == 0x7f)
+ return -1;
+
+ return 1;
+}
+
+/*
+ * EUC
+ */
+static int
+pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
+ * KANA") */
+ {
+ from++;
+ *to = (SS2 << 8) | *from++;
+ len -= 2;
+ }
+ else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
+ {
+ from++;
+ *to = (SS3 << 16) | (*from++ << 8);
+ *to |= *from++;
+ len -= 3;
+ }
+ else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
+ {
+ *to = *from++ << 8;
+ *to |= *from++;
+ len -= 2;
+ }
+ else /* must be ASCII */
+ {
+ *to = *from++;
+ len--;
+ }
+ to++;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+static inline int
+pg_euc_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (*s == SS2)
+ len = 2;
+ else if (*s == SS3)
+ len = 3;
+ else if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = 1;
+ return len;
+}
+
+static inline int
+pg_euc_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (*s == SS2)
+ len = 2;
+ else if (*s == SS3)
+ len = 2;
+ else if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = pg_ascii_dsplen(s);
+ return len;
+}
+
+/*
+ * EUC_JP
+ */
+static int
+pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ return pg_euc2wchar_with_len(from, to, len);
+}
+
+static int
+pg_eucjp_mblen(const unsigned char *s)
+{
+ return pg_euc_mblen(s);
+}
+
+static int
+pg_eucjp_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (*s == SS2)
+ len = 1;
+ else if (*s == SS3)
+ len = 2;
+ else if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = pg_ascii_dsplen(s);
+ return len;
+}
+
+/*
+ * EUC_KR
+ */
+static int
+pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ return pg_euc2wchar_with_len(from, to, len);
+}
+
+static int
+pg_euckr_mblen(const unsigned char *s)
+{
+ return pg_euc_mblen(s);
+}
+
+static int
+pg_euckr_dsplen(const unsigned char *s)
+{
+ return pg_euc_dsplen(s);
+}
+
+/*
+ * EUC_CN
+ *
+ */
+static int
+pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
+ {
+ from++;
+ *to = (SS2 << 16) | (*from++ << 8);
+ *to |= *from++;
+ len -= 3;
+ }
+ else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
+ {
+ from++;
+ *to = (SS3 << 16) | (*from++ << 8);
+ *to |= *from++;
+ len -= 3;
+ }
+ else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
+ {
+ *to = *from++ << 8;
+ *to |= *from++;
+ len -= 2;
+ }
+ else
+ {
+ *to = *from++;
+ len--;
+ }
+ to++;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+static int
+pg_euccn_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = 1;
+ return len;
+}
+
+static int
+pg_euccn_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = pg_ascii_dsplen(s);
+ return len;
+}
+
+/*
+ * EUC_TW
+ *
+ */
+static int
+pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ if (*from == SS2 && len >= 4) /* code set 2 */
+ {
+ from++;
+ *to = (((uint32) SS2) << 24) | (*from++ << 16);
+ *to |= *from++ << 8;
+ *to |= *from++;
+ len -= 4;
+ }
+ else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
+ {
+ from++;
+ *to = (SS3 << 16) | (*from++ << 8);
+ *to |= *from++;
+ len -= 3;
+ }
+ else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
+ {
+ *to = *from++ << 8;
+ *to |= *from++;
+ len -= 2;
+ }
+ else
+ {
+ *to = *from++;
+ len--;
+ }
+ to++;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+static int
+pg_euctw_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (*s == SS2)
+ len = 4;
+ else if (*s == SS3)
+ len = 3;
+ else if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = 1;
+ return len;
+}
+
+static int
+pg_euctw_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (*s == SS2)
+ len = 2;
+ else if (*s == SS3)
+ len = 2;
+ else if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = pg_ascii_dsplen(s);
+ return len;
+}
+
+/*
+ * Convert pg_wchar to EUC_* encoding.
+ * caller must allocate enough space for "to", including a trailing zero!
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ unsigned char c;
+
+ if ((c = (*from >> 24)))
+ {
+ *to++ = c;
+ *to++ = (*from >> 16) & 0xff;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 4;
+ }
+ else if ((c = (*from >> 16)))
+ {
+ *to++ = c;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if ((c = (*from >> 8)))
+ {
+ *to++ = c;
+ *to++ = *from & 0xff;
+ cnt += 2;
+ }
+ else
+ {
+ *to++ = *from;
+ cnt++;
+ }
+ from++;
+ len--;
+ }
+ *to = 0;
+ return cnt;
+}
+
+
+/*
+ * JOHAB
+ */
+static int
+pg_johab_mblen(const unsigned char *s)
+{
+ return pg_euc_mblen(s);
+}
+
+static int
+pg_johab_dsplen(const unsigned char *s)
+{
+ return pg_euc_dsplen(s);
+}
+
+/*
+ * convert UTF8 string to pg_wchar (UCS-4)
+ * caller must allocate enough space for "to", including a trailing zero!
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ int cnt = 0;
+ uint32 c1,
+ c2,
+ c3,
+ c4;
+
+ while (len > 0 && *from)
+ {
+ if ((*from & 0x80) == 0)
+ {
+ *to = *from++;
+ len--;
+ }
+ else if ((*from & 0xe0) == 0xc0)
+ {
+ if (len < 2)
+ break; /* drop trailing incomplete char */
+ c1 = *from++ & 0x1f;
+ c2 = *from++ & 0x3f;
+ *to = (c1 << 6) | c2;
+ len -= 2;
+ }
+ else if ((*from & 0xf0) == 0xe0)
+ {
+ if (len < 3)
+ break; /* drop trailing incomplete char */
+ c1 = *from++ & 0x0f;
+ c2 = *from++ & 0x3f;
+ c3 = *from++ & 0x3f;
+ *to = (c1 << 12) | (c2 << 6) | c3;
+ len -= 3;
+ }
+ else if ((*from & 0xf8) == 0xf0)
+ {
+ if (len < 4)
+ break; /* drop trailing incomplete char */
+ c1 = *from++ & 0x07;
+ c2 = *from++ & 0x3f;
+ c3 = *from++ & 0x3f;
+ c4 = *from++ & 0x3f;
+ *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
+ len -= 4;
+ }
+ else
+ {
+ /* treat a bogus char as length 1; not ours to raise error */
+ *to = *from++;
+ len--;
+ }
+ to++;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+
+/*
+ * Map a Unicode code point to UTF-8. utf8string must have 4 bytes of
+ * space allocated.
+ */
+unsigned char *
+unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
+{
+ if (c <= 0x7F)
+ {
+ utf8string[0] = c;
+ }
+ else if (c <= 0x7FF)
+ {
+ utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
+ utf8string[1] = 0x80 | (c & 0x3F);
+ }
+ else if (c <= 0xFFFF)
+ {
+ utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
+ utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
+ utf8string[2] = 0x80 | (c & 0x3F);
+ }
+ else
+ {
+ utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
+ utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
+ utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
+ utf8string[3] = 0x80 | (c & 0x3F);
+ }
+
+ return utf8string;
+}
+
+/*
+ * Trivial conversion from pg_wchar to UTF-8.
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ int char_len;
+
+ unicode_to_utf8(*from, to);
+ char_len = pg_utf_mblen(to);
+ cnt += char_len;
+ to += char_len;
+ from++;
+ len--;
+ }
+ *to = 0;
+ return cnt;
+}
+
+/*
+ * Return the byte length of a UTF8 character pointed to by s
+ *
+ * Note: in the current implementation we do not support UTF8 sequences
+ * of more than 4 bytes; hence do NOT return a value larger than 4.
+ * We return "1" for any leading byte that is either flat-out illegal or
+ * indicates a length larger than we support.
+ *
+ * pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), and perhaps
+ * other places would need to be fixed to change this.
+ */
+int
+pg_utf_mblen(const unsigned char *s)
+{
+ int len;
+
+ if ((*s & 0x80) == 0)
+ len = 1;
+ else if ((*s & 0xe0) == 0xc0)
+ len = 2;
+ else if ((*s & 0xf0) == 0xe0)
+ len = 3;
+ else if ((*s & 0xf8) == 0xf0)
+ len = 4;
+#ifdef NOT_USED
+ else if ((*s & 0xfc) == 0xf8)
+ len = 5;
+ else if ((*s & 0xfe) == 0xfc)
+ len = 6;
+#endif
+ else
+ len = 1;
+ return len;
+}
+
+/*
+ * This is an implementation of wcwidth() and wcswidth() as defined in
+ * "The Single UNIX Specification, Version 2, The Open Group, 1997"
+ * <http://www.unix.org/online.html>
+ *
+ * Markus Kuhn -- 2001-09-08 -- public domain
+ *
+ * customised for PostgreSQL
+ *
+ * original available at : http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+ */
+
+struct mbinterval
+{
+ unsigned int first;
+ unsigned int last;
+};
+
+/* auxiliary function for binary search in interval table */
+static int
+mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
+{
+ int min = 0;
+ int mid;
+
+ if (ucs < table[0].first || ucs > table[max].last)
+ return 0;
+ while (max >= min)
+ {
+ mid = (min + max) / 2;
+ if (ucs > table[mid].last)
+ min = mid + 1;
+ else if (ucs < table[mid].first)
+ max = mid - 1;
+ else
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/* The following functions define the column width of an ISO 10646
+ * character as follows:
+ *
+ * - The null character (U+0000) has a column width of 0.
+ *
+ * - Other C0/C1 control characters and DEL will lead to a return
+ * value of -1.
+ *
+ * - Non-spacing and enclosing combining characters (general
+ * category code Mn, Me or Cf in the Unicode database) have a
+ * column width of 0.
+ *
+ * - Spacing characters in the East Asian Wide (W) or East Asian
+ * FullWidth (F) category as defined in Unicode Technical
+ * Report #11 have a column width of 2.
+ *
+ * - All remaining characters (including all printable
+ * ISO 8859-1 and WGL4 characters, Unicode control characters,
+ * etc.) have a column width of 1.
+ *
+ * This implementation assumes that wchar_t characters are encoded
+ * in ISO 10646.
+ */
+
+static int
+ucs_wcwidth(pg_wchar ucs)
+{
+#include "common/unicode_nonspacing_table.h"
+#include "common/unicode_east_asian_fw_table.h"
+
+ /* test for 8-bit control characters */
+ if (ucs == 0)
+ return 0;
+
+ if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
+ return -1;
+
+ /*
+ * binary search in table of non-spacing characters
+ *
+ * XXX: In the official Unicode sources, it is possible for a character to
+ * be described as both non-spacing and wide at the same time. As of
+ * Unicode 13.0, treating the non-spacing property as the determining
+ * factor for display width leads to the correct behavior, so do that
+ * search first.
+ */
+ if (mbbisearch(ucs, nonspacing,
+ sizeof(nonspacing) / sizeof(struct mbinterval) - 1))
+ return 0;
+
+ /* binary search in table of wide characters */
+ if (mbbisearch(ucs, east_asian_fw,
+ sizeof(east_asian_fw) / sizeof(struct mbinterval) - 1))
+ return 2;
+
+ return 1;
+}
+
+/*
+ * Convert a UTF-8 character to a Unicode code point.
+ * This is a one-character version of pg_utf2wchar_with_len.
+ *
+ * No error checks here, c must point to a long-enough string.
+ */
+pg_wchar
+utf8_to_unicode(const unsigned char *c)
+{
+ if ((*c & 0x80) == 0)
+ return (pg_wchar) c[0];
+ else if ((*c & 0xe0) == 0xc0)
+ return (pg_wchar) (((c[0] & 0x1f) << 6) |
+ (c[1] & 0x3f));
+ else if ((*c & 0xf0) == 0xe0)
+ return (pg_wchar) (((c[0] & 0x0f) << 12) |
+ ((c[1] & 0x3f) << 6) |
+ (c[2] & 0x3f));
+ else if ((*c & 0xf8) == 0xf0)
+ return (pg_wchar) (((c[0] & 0x07) << 18) |
+ ((c[1] & 0x3f) << 12) |
+ ((c[2] & 0x3f) << 6) |
+ (c[3] & 0x3f));
+ else
+ /* that is an invalid code on purpose */
+ return 0xffffffff;
+}
+
+static int
+pg_utf_dsplen(const unsigned char *s)
+{
+ return ucs_wcwidth(utf8_to_unicode(s));
+}
+
+/*
+ * convert mule internal code to pg_wchar
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ if (IS_LC1(*from) && len >= 2)
+ {
+ *to = *from++ << 16;
+ *to |= *from++;
+ len -= 2;
+ }
+ else if (IS_LCPRV1(*from) && len >= 3)
+ {
+ from++;
+ *to = *from++ << 16;
+ *to |= *from++;
+ len -= 3;
+ }
+ else if (IS_LC2(*from) && len >= 3)
+ {
+ *to = *from++ << 16;
+ *to |= *from++ << 8;
+ *to |= *from++;
+ len -= 3;
+ }
+ else if (IS_LCPRV2(*from) && len >= 4)
+ {
+ from++;
+ *to = *from++ << 16;
+ *to |= *from++ << 8;
+ *to |= *from++;
+ len -= 4;
+ }
+ else
+ { /* assume ASCII */
+ *to = (unsigned char) *from++;
+ len--;
+ }
+ to++;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+/*
+ * convert pg_wchar to mule internal code
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ unsigned char lb;
+
+ lb = (*from >> 16) & 0xff;
+ if (IS_LC1(lb))
+ {
+ *to++ = lb;
+ *to++ = *from & 0xff;
+ cnt += 2;
+ }
+ else if (IS_LC2(lb))
+ {
+ *to++ = lb;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if (IS_LCPRV1_A_RANGE(lb))
+ {
+ *to++ = LCPRV1_A;
+ *to++ = lb;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if (IS_LCPRV1_B_RANGE(lb))
+ {
+ *to++ = LCPRV1_B;
+ *to++ = lb;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if (IS_LCPRV2_A_RANGE(lb))
+ {
+ *to++ = LCPRV2_A;
+ *to++ = lb;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 4;
+ }
+ else if (IS_LCPRV2_B_RANGE(lb))
+ {
+ *to++ = LCPRV2_B;
+ *to++ = lb;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 4;
+ }
+ else
+ {
+ *to++ = *from & 0xff;
+ cnt += 1;
+ }
+ from++;
+ len--;
+ }
+ *to = 0;
+ return cnt;
+}
+
+/* exported for direct use by conv.c */
+int
+pg_mule_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_LC1(*s))
+ len = 2;
+ else if (IS_LCPRV1(*s))
+ len = 3;
+ else if (IS_LC2(*s))
+ len = 3;
+ else if (IS_LCPRV2(*s))
+ len = 4;
+ else
+ len = 1; /* assume ASCII */
+ return len;
+}
+
+static int
+pg_mule_dsplen(const unsigned char *s)
+{
+ int len;
+
+ /*
+ * Note: it's not really appropriate to assume that all multibyte charsets
+ * are double-wide on screen. But this seems an okay approximation for
+ * the MULE charsets we currently support.
+ */
+
+ if (IS_LC1(*s))
+ len = 1;
+ else if (IS_LCPRV1(*s))
+ len = 1;
+ else if (IS_LC2(*s))
+ len = 2;
+ else if (IS_LCPRV2(*s))
+ len = 2;
+ else
+ len = 1; /* assume ASCII */
+
+ return len;
+}
+
+/*
+ * ISO8859-1
+ */
+static int
+pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ *to++ = *from++;
+ len--;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+/*
+ * Trivial conversion from pg_wchar to single byte encoding. Just ignores
+ * high bits.
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ *to++ = *from++;
+ len--;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
+static int
+pg_latin1_mblen(const unsigned char *s)
+{
+ return 1;
+}
+
+static int
+pg_latin1_dsplen(const unsigned char *s)
+{
+ return pg_ascii_dsplen(s);
+}
+
+/*
+ * SJIS
+ */
+static int
+pg_sjis_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (*s >= 0xa1 && *s <= 0xdf)
+ len = 1; /* 1 byte kana? */
+ else if (IS_HIGHBIT_SET(*s))
+ len = 2; /* kanji? */
+ else
+ len = 1; /* should be ASCII */
+ return len;
+}
+
+static int
+pg_sjis_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (*s >= 0xa1 && *s <= 0xdf)
+ len = 1; /* 1 byte kana? */
+ else if (IS_HIGHBIT_SET(*s))
+ len = 2; /* kanji? */
+ else
+ len = pg_ascii_dsplen(s); /* should be ASCII */
+ return len;
+}
+
+/*
+ * Big5
+ */
+static int
+pg_big5_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2; /* kanji? */
+ else
+ len = 1; /* should be ASCII */
+ return len;
+}
+
+static int
+pg_big5_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2; /* kanji? */
+ else
+ len = pg_ascii_dsplen(s); /* should be ASCII */
+ return len;
+}
+
+/*
+ * GBK
+ */
+static int
+pg_gbk_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2; /* kanji? */
+ else
+ len = 1; /* should be ASCII */
+ return len;
+}
+
+static int
+pg_gbk_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2; /* kanji? */
+ else
+ len = pg_ascii_dsplen(s); /* should be ASCII */
+ return len;
+}
+
+/*
+ * UHC
+ */
+static int
+pg_uhc_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2; /* 2byte? */
+ else
+ len = 1; /* should be ASCII */
+ return len;
+}
+
+static int
+pg_uhc_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2; /* 2byte? */
+ else
+ len = pg_ascii_dsplen(s); /* should be ASCII */
+ return len;
+}
+
+/*
+ * GB18030
+ * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
+ */
+
+/*
+ * Unlike all other mblen() functions, this also looks at the second byte of
+ * the input. However, if you only pass the first byte of a multi-byte
+ * string, and \0 as the second byte, this still works in a predictable way:
+ * a 4-byte character will be reported as two 2-byte characters. That's
+ * enough for all current uses, as a client-only encoding. It works that
+ * way, because in any valid 4-byte GB18030-encoded character, the third and
+ * fourth byte look like a 2-byte encoded character, when looked at
+ * separately.
+ */
+static int
+pg_gb18030_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (!IS_HIGHBIT_SET(*s))
+ len = 1; /* ASCII */
+ else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
+ len = 4;
+ else
+ len = 2;
+ return len;
+}
+
+static int
+pg_gb18030_dsplen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_HIGHBIT_SET(*s))
+ len = 2;
+ else
+ len = pg_ascii_dsplen(s); /* ASCII */
+ return len;
+}
+
+/*
+ *-------------------------------------------------------------------
+ * multibyte sequence validators
+ *
+ * The verifychar functions accept "s", a pointer to the first byte of a
+ * string, and "len", the remaining length of the string. If there is a
+ * validly encoded character beginning at *s, return its length in bytes;
+ * else return -1.
+ *
+ * The verifystr functions also accept "s", a pointer to a string and "len",
+ * the length of the string. They verify the whole string, and return the
+ * number of input bytes (<= len) that are valid. In other words, if the
+ * whole string is valid, verifystr returns "len", otherwise it returns the
+ * byte offset of the first invalid character. The verifystr functions must
+ * test for and reject zeroes in the input.
+ *
+ * The verifychar functions can assume that len > 0 and that *s != '\0', but
+ * they must test for and reject zeroes in any additional bytes of a
+ * multibyte character. Note that this definition allows the function for a
+ * single-byte encoding to be just "return 1".
+ *-------------------------------------------------------------------
+ */
+static int
+pg_ascii_verifychar(const unsigned char *s, int len)
+{
+ return 1;
+}
+
+static int
+pg_ascii_verifystr(const unsigned char *s, int len)
+{
+ const unsigned char *nullpos = memchr(s, 0, len);
+
+ if (nullpos == NULL)
+ return len;
+ else
+ return nullpos - s;
+}
+
+#define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
+
+static int
+pg_eucjp_verifychar(const unsigned char *s, int len)
+{
+ int l;
+ unsigned char c1,
+ c2;
+
+ c1 = *s++;
+
+ switch (c1)
+ {
+ case SS2: /* JIS X 0201 */
+ l = 2;
+ if (l > len)
+ return -1;
+ c2 = *s++;
+ if (c2 < 0xa1 || c2 > 0xdf)
+ return -1;
+ break;
+
+ case SS3: /* JIS X 0212 */
+ l = 3;
+ if (l > len)
+ return -1;
+ c2 = *s++;
+ if (!IS_EUC_RANGE_VALID(c2))
+ return -1;
+ c2 = *s++;
+ if (!IS_EUC_RANGE_VALID(c2))
+ return -1;
+ break;
+
+ default:
+ if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
+ {
+ l = 2;
+ if (l > len)
+ return -1;
+ if (!IS_EUC_RANGE_VALID(c1))
+ return -1;
+ c2 = *s++;
+ if (!IS_EUC_RANGE_VALID(c2))
+ return -1;
+ }
+ else
+ /* must be ASCII */
+ {
+ l = 1;
+ }
+ break;
+ }
+
+ return l;
+}
+
+static int
+pg_eucjp_verifystr(const unsigned char *s, int len)
+{
+ const unsigned char *start = s;
+
+ while (len > 0)
+ {
+ int l;
+
+ /* fast path for ASCII-subset characters */
+ if (!IS_HIGHBIT_SET(*s))
+ {
+ if (*s == '\0')
+ break;
+ l = 1;
+ }
+ else
+ {
+ l = pg_eucjp_verifychar(s, len);
+ if (l == -1)
+ break;
+ }
+ s += l;
+ len -= l;
+ }
+
+ return s - start;
+}
+
+static int
+pg_euckr_verifychar(const unsigned char *s, int len)
+{
+ int l;
+ unsigned char c1,
+ c2;
+
+ c1 = *s++;
+
+ if (IS_HIGHBIT_SET(c1))
+ {
+ l = 2;
+ if (l > len)
+ return -1;
+ if (!IS_EUC_RANGE_VALID(c1))
+ return -1;
+ c2 = *s++;
+ if (!IS_EUC_RANGE_VALID(c2))
+ return -1;
+ }
+ else
+ /* must be ASCII */
+ {
+ l = 1;
+ }
+
+ return l;
+}
+
+static int
+pg_euckr_verifystr(const unsigned char *s, int len)
+{
+ const unsigned char *start = s;
+
+ while (len > 0)
+ {
+ int l;
+
+ /* fast path for ASCII-subset characters */
+ if (!IS_HIGHBIT_SET(*s))
+ {
+ if (*s == '\0')
+ break;
+ l = 1;
+ }
+ else
+ {
+ l = pg_euckr_verifychar(s, len);
+ if (l == -1)
+ break;
+ }
+ s += l;
+ len -= l;
+ }
+
+ return s - start;
+}
+
+/* EUC-CN byte sequences are exactly same as EUC-KR */
+#define pg_euccn_verifychar pg_euckr_verifychar
+#define pg_euccn_verifystr pg_euckr_verifystr
+
+static int
+pg_euctw_verifychar(const unsigned char *s, int len)
+{
+ int l;
+ unsigned char c1,
+ c2;
+
+ c1 = *s++;
+
+ switch (c1)
+ {
+ case SS2: /* CNS 11643 Plane 1-7 */
+ l = 4;
+ if (l > len)
+ return -1;
+ c2 = *s++;
+ if (c2 < 0xa1 || c2 > 0xa7)
+ return -1;
+ c2 = *s++;
+ if (!IS_EUC_RANGE_VALID(c2))
+ return -1;
+ c2 = *s++;
+ if (!IS_EUC_RANGE_VALID(c2))
+ return -1;
+ break;
+
+ case SS3: /* unused */
+ return -1;
+
+ default:
+ if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
+ {
+ l = 2;
+ if (l > len)
+ return -1;
+ /* no further range check on c1? */
+ c2 = *s++;
+ if (!IS_EUC_RANGE_VALID(c2))
+ return -1;
+ }
+ else
+ /* must be ASCII */
+ {
+ l = 1;
+ }
+ break;
+ }
+ return l;
+}
+
+static int
+pg_euctw_verifystr(const unsigned char *s, int len)
+{
+ const unsigned char *start = s;
+
+ while (len > 0)
+ {
+ int l;
+
+ /* fast path for ASCII-subset characters */
+ if (!IS_HIGHBIT_SET(*s))
+ {
+ if (*s == '\0')
+ break;
+ l = 1;
+ }
+ else
+ {
+ l = pg_euctw_verifychar(s, len);
+ if (l == -1)
+ break;
+ }
+ s += l;
+ len -= l;
+ }
+
+ return s - start;
+}
+
+static int
+pg_johab_verifychar(const unsigned char *s, int len)
+{
+ int l,
+ mbl;
+ unsigned char c;
+
+ l = mbl = pg_johab_mblen(s);
+
+ if (len < l)
+ return -1;
+
+ if (!IS_HIGHBIT_SET(*s))
+ return mbl;
+
+ while (--l > 0)
+ {
+ c = *++s;
+ if (!IS_EUC_RANGE_VALID(c))
+ return -1;
+ }
+ return mbl;
+}
+
+static int
+pg_johab_verifystr(const unsigned char *s, int len)
+{
+ const unsigned char *start = s;
+
+ while (len > 0)
+ {
+ int l;
+
+ /* fast path for ASCII-subset characters */
+ if (!IS_HIGHBIT_SET(*s))
+ {
+ if (*s == '\0')
+ break;
+ l = 1;
+ }
+ else
+ {
+ l = pg_johab_verifychar(s, len);
+ if (l == -1)
+ break;
+ }
+ s += l;
+ len -= l;
+ }
+
+ return s - start;
+}
+
+static int
+pg_mule_verifychar(const unsigned char *s, int len)
+{
+ int l,
+ mbl;
+ unsigned char c;
+
+ l = mbl = pg_mule_mblen(s);
+
+ if (len < l)
+ return -1;
+
+ while (--l > 0)
+ {
+ c = *++s;
+ if (!IS_HIGHBIT_SET(c))
+ return -1;
+ }
+ return mbl;
+}
+
+static int
+pg_mule_verifystr(const unsigned char *s, int len)
+{
+ const unsigned char *start = s;
+
+ while (len > 0)
+ {
+ int l;
+
+ /* fast path for ASCII-subset characters */
+ if (!IS_HIGHBIT_SET(*s))
+ {
+ if (*s == '\0')
+ break;
+ l = 1;
+ }
+ else
+ {
+ l = pg_mule_verifychar(s, len);
+ if (l == -1)
+ break;
+ }
+ s += l;
+ len -= l;
+ }
+
+ return s - start;
+}
+
+static int
+pg_latin1_verifychar(const unsigned char *s, int len)
+{
+ return 1;
+}
+
+static int
+pg_latin1_verifystr(const unsigned char *s, int len)
+{
+ const unsigned char *nullpos = memchr(s, 0, len);
+
+ if (nullpos == NULL)
+ return len;
+ else
+ return nullpos - s;
+}
+
+static int
+pg_sjis_verifychar(const unsigned char *s, int len)
+{
+ int l,
+ mbl;
+ unsigned char c1,
+ c2;
+
+ l = mbl = pg_sjis_mblen(s);
+
+ if (len < l)
+ return -1;
+
+ if (l == 1) /* pg_sjis_mblen already verified it */
+ return mbl;
+
+ c1 = *s++;
+ c2 = *s;
+ if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
+ return -1;
+ return mbl;
+}
+
+static int
+pg_sjis_verifystr(const unsigned char *s, int len)
+{
+ const unsigned char *start = s;
+
+ while (len > 0)
+ {
+ int l;
+
+ /* fast path for ASCII-subset characters */
+ if (!IS_HIGHBIT_SET(*s))
+ {
+ if (*s == '\0')
+ break;
+ l = 1;
+ }
+ else
+ {
+ l = pg_sjis_verifychar(s, len);
+ if (l == -1)
+ break;
+ }
+ s += l;
+ len -= l;
+ }
+
+ return s - start;
+}
+
+static int
+pg_big5_verifychar(const unsigned char *s, int len)
+{
+ int l,
+ mbl;
+
+ l = mbl = pg_big5_mblen(s);
+
+ if (len < l)
+ return -1;
+
+ while (--l > 0)
+ {
+ if (*++s == '\0')
+ return -1;
+ }
+
+ return mbl;
+}
+
+static int
+pg_big5_verifystr(const unsigned char *s, int len)
+{
+ const unsigned char *start = s;
+
+ while (len > 0)
+ {
+ int l;
+
+ /* fast path for ASCII-subset characters */
+ if (!IS_HIGHBIT_SET(*s))
+ {
+ if (*s == '\0')
+ break;
+ l = 1;
+ }
+ else
+ {
+ l = pg_big5_verifychar(s, len);
+ if (l == -1)
+ break;
+ }
+ s += l;
+ len -= l;
+ }
+
+ return s - start;
+}
+
+static int
+pg_gbk_verifychar(const unsigned char *s, int len)
+{
+ int l,
+ mbl;
+
+ l = mbl = pg_gbk_mblen(s);
+
+ if (len < l)
+ return -1;
+
+ while (--l > 0)
+ {
+ if (*++s == '\0')
+ return -1;
+ }
+
+ return mbl;
+}
+
+static int
+pg_gbk_verifystr(const unsigned char *s, int len)
+{
+ const unsigned char *start = s;
+
+ while (len > 0)
+ {
+ int l;
+
+ /* fast path for ASCII-subset characters */
+ if (!IS_HIGHBIT_SET(*s))
+ {
+ if (*s == '\0')
+ break;
+ l = 1;
+ }
+ else
+ {
+ l = pg_gbk_verifychar(s, len);
+ if (l == -1)
+ break;
+ }
+ s += l;
+ len -= l;
+ }
+
+ return s - start;
+}
+
+static int
+pg_uhc_verifychar(const unsigned char *s, int len)
+{
+ int l,
+ mbl;
+
+ l = mbl = pg_uhc_mblen(s);
+
+ if (len < l)
+ return -1;
+
+ while (--l > 0)
+ {
+ if (*++s == '\0')
+ return -1;
+ }
+
+ return mbl;
+}
+
+static int
+pg_uhc_verifystr(const unsigned char *s, int len)
+{
+ const unsigned char *start = s;
+
+ while (len > 0)
+ {
+ int l;
+
+ /* fast path for ASCII-subset characters */
+ if (!IS_HIGHBIT_SET(*s))
+ {
+ if (*s == '\0')
+ break;
+ l = 1;
+ }
+ else
+ {
+ l = pg_uhc_verifychar(s, len);
+ if (l == -1)
+ break;
+ }
+ s += l;
+ len -= l;
+ }
+
+ return s - start;
+}
+
+static int
+pg_gb18030_verifychar(const unsigned char *s, int len)
+{
+ int l;
+
+ if (!IS_HIGHBIT_SET(*s))
+ l = 1; /* ASCII */
+ else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
+ {
+ /* Should be 4-byte, validate remaining bytes */
+ if (*s >= 0x81 && *s <= 0xfe &&
+ *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
+ *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
+ l = 4;
+ else
+ l = -1;
+ }
+ else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
+ {
+ /* Should be 2-byte, validate */
+ if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
+ (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
+ l = 2;
+ else
+ l = -1;
+ }
+ else
+ l = -1;
+ return l;
+}
+
+static int
+pg_gb18030_verifystr(const unsigned char *s, int len)
+{
+ const unsigned char *start = s;
+
+ while (len > 0)
+ {
+ int l;
+
+ /* fast path for ASCII-subset characters */
+ if (!IS_HIGHBIT_SET(*s))
+ {
+ if (*s == '\0')
+ break;
+ l = 1;
+ }
+ else
+ {
+ l = pg_gb18030_verifychar(s, len);
+ if (l == -1)
+ break;
+ }
+ s += l;
+ len -= l;
+ }
+
+ return s - start;
+}
+
+static int
+pg_utf8_verifychar(const unsigned char *s, int len)
+{
+ int l;
+
+ if ((*s & 0x80) == 0)
+ {
+ if (*s == '\0')
+ return -1;
+ return 1;
+ }
+ else if ((*s & 0xe0) == 0xc0)
+ l = 2;
+ else if ((*s & 0xf0) == 0xe0)
+ l = 3;
+ else if ((*s & 0xf8) == 0xf0)
+ l = 4;
+ else
+ l = 1;
+
+ if (l > len)
+ return -1;
+
+ if (!pg_utf8_islegal(s, l))
+ return -1;
+
+ return l;
+}
+
+/*
+ * The fast path of the UTF-8 verifier uses a deterministic finite automaton
+ * (DFA) for multibyte characters. In a traditional table-driven DFA, the
+ * input byte and current state are used to compute an index into an array of
+ * state transitions. Since the address of the next transition is dependent
+ * on this computation, there is latency in executing the load instruction,
+ * and the CPU is not kept busy.
+ *
+ * Instead, we use a "shift-based" DFA as described by Per Vognsen:
+ *
+ * https://gist.github.com/pervognsen/218ea17743e1442e59bb60d29b1aa725
+ *
+ * In a shift-based DFA, the input byte is an index into array of integers
+ * whose bit pattern encodes the state transitions. To compute the next
+ * state, we simply right-shift the integer by the current state and apply a
+ * mask. In this scheme, the address of the transition only depends on the
+ * input byte, so there is better pipelining.
+ *
+ * The naming convention for states and transitions was adopted from a UTF-8
+ * to UTF-16/32 transcoder, whose table is reproduced below:
+ *
+ * https://github.com/BobSteagall/utf_utils/blob/6b7a465265de2f5fa6133d653df0c9bdd73bbcf8/src/utf_utils.cpp
+ *
+ * ILL ASC CR1 CR2 CR3 L2A L3A L3B L3C L4A L4B L4C CLASS / STATE
+ * ==========================================================================
+ * err, END, err, err, err, CS1, P3A, CS2, P3B, P4A, CS3, P4B, | BGN/END
+ * err, err, err, err, err, err, err, err, err, err, err, err, | ERR
+ * |
+ * err, err, END, END, END, err, err, err, err, err, err, err, | CS1
+ * err, err, CS1, CS1, CS1, err, err, err, err, err, err, err, | CS2
+ * err, err, CS2, CS2, CS2, err, err, err, err, err, err, err, | CS3
+ * |
+ * err, err, err, err, CS1, err, err, err, err, err, err, err, | P3A
+ * err, err, CS1, CS1, err, err, err, err, err, err, err, err, | P3B
+ * |
+ * err, err, err, CS2, CS2, err, err, err, err, err, err, err, | P4A
+ * err, err, CS2, err, err, err, err, err, err, err, err, err, | P4B
+ *
+ * In the most straightforward implementation, a shift-based DFA for UTF-8
+ * requires 64-bit integers to encode the transitions, but with an SMT solver
+ * it's possible to find state numbers such that the transitions fit within
+ * 32-bit integers, as Dougall Johnson demonstrated:
+ *
+ * https://gist.github.com/dougallj/166e326de6ad4cf2c94be97a204c025f
+ *
+ * This packed representation is the reason for the seemingly odd choice of
+ * state values below.
+ */
+
+/* Error */
+#define ERR 0
+/* Begin */
+#define BGN 11
+/* Continuation states, expect 1/2/3 continuation bytes */
+#define CS1 16
+#define CS2 1
+#define CS3 5
+/* Partial states, where the first continuation byte has a restricted range */
+#define P3A 6 /* Lead was E0, check for 3-byte overlong */
+#define P3B 20 /* Lead was ED, check for surrogate */
+#define P4A 25 /* Lead was F0, check for 4-byte overlong */
+#define P4B 30 /* Lead was F4, check for too-large */
+/* Begin and End are the same state */
+#define END BGN
+
+/* the encoded state transitions for the lookup table */
+
+/* ASCII */
+#define ASC (END << BGN)
+/* 2-byte lead */
+#define L2A (CS1 << BGN)
+/* 3-byte lead */
+#define L3A (P3A << BGN)
+#define L3B (CS2 << BGN)
+#define L3C (P3B << BGN)
+/* 4-byte lead */
+#define L4A (P4A << BGN)
+#define L4B (CS3 << BGN)
+#define L4C (P4B << BGN)
+/* continuation byte */
+#define CR1 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
+#define CR2 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
+#define CR3 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
+/* invalid byte */
+#define ILL ERR
+
+static const uint32 Utf8Transition[256] =
+{
+ /* ASCII */
+
+ ILL, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,
+
+ /* continuation bytes */
+
+ /* 80..8F */
+ CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1,
+ CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1,
+
+ /* 90..9F */
+ CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2,
+ CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2,
+
+ /* A0..BF */
+ CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
+ CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
+ CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
+ CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3,
+
+ /* leading bytes */
+
+ /* C0..DF */
+ ILL, ILL, L2A, L2A, L2A, L2A, L2A, L2A,
+ L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
+ L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
+ L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A,
+
+ /* E0..EF */
+ L3A, L3B, L3B, L3B, L3B, L3B, L3B, L3B,
+ L3B, L3B, L3B, L3B, L3B, L3C, L3B, L3B,
+
+ /* F0..FF */
+ L4A, L4B, L4B, L4B, L4C, ILL, ILL, ILL,
+ ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL
+};
+
+static void
+utf8_advance(const unsigned char *s, uint32 *state, int len)
+{
+ /* Note: We deliberately don't check the state's value here. */
+ while (len > 0)
+ {
+ /*
+ * It's important that the mask value is 31: In most instruction sets,
+ * a shift by a 32-bit operand is understood to be a shift by its mod
+ * 32, so the compiler should elide the mask operation.
+ */
+ *state = Utf8Transition[*s++] >> (*state & 31);
+ len--;
+ }
+
+ *state &= 31;
+}
+
+static int
+pg_utf8_verifystr(const unsigned char *s, int len)
+{
+ const unsigned char *start = s;
+ const int orig_len = len;
+ uint32 state = BGN;
+
+/*
+ * With a stride of two vector widths, gcc will unroll the loop. Even if
+ * the compiler can unroll a longer loop, it's not worth it because we
+ * must fall back to the byte-wise algorithm if we find any non-ASCII.
+ */
+#define STRIDE_LENGTH (2 * sizeof(Vector8))
+
+ if (len >= STRIDE_LENGTH)
+ {
+ while (len >= STRIDE_LENGTH)
+ {
+ /*
+ * If the chunk is all ASCII, we can skip the full UTF-8 check,
+ * but we must first check for a non-END state, which means the
+ * previous chunk ended in the middle of a multibyte sequence.
+ */
+ if (state != END || !is_valid_ascii(s, STRIDE_LENGTH))
+ utf8_advance(s, &state, STRIDE_LENGTH);
+
+ s += STRIDE_LENGTH;
+ len -= STRIDE_LENGTH;
+ }
+
+ /* The error state persists, so we only need to check for it here. */
+ if (state == ERR)
+ {
+ /*
+ * Start over from the beginning with the slow path so we can
+ * count the valid bytes.
+ */
+ len = orig_len;
+ s = start;
+ }
+ else if (state != END)
+ {
+ /*
+ * The fast path exited in the middle of a multibyte sequence.
+ * Walk backwards to find the leading byte so that the slow path
+ * can resume checking from there. We must always backtrack at
+ * least one byte, since the current byte could be e.g. an ASCII
+ * byte after a 2-byte lead, which is invalid.
+ */
+ do
+ {
+ Assert(s > start);
+ s--;
+ len++;
+ Assert(IS_HIGHBIT_SET(*s));
+ } while (pg_utf_mblen(s) <= 1);
+ }
+ }
+
+ /* check remaining bytes */
+ while (len > 0)
+ {
+ int l;
+
+ /* fast path for ASCII-subset characters */
+ if (!IS_HIGHBIT_SET(*s))
+ {
+ if (*s == '\0')
+ break;
+ l = 1;
+ }
+ else
+ {
+ l = pg_utf8_verifychar(s, len);
+ if (l == -1)
+ break;
+ }
+ s += l;
+ len -= l;
+ }
+
+ return s - start;
+}
+
+/*
+ * Check for validity of a single UTF-8 encoded character
+ *
+ * This directly implements the rules in RFC3629. The bizarre-looking
+ * restrictions on the second byte are meant to ensure that there isn't
+ * more than one encoding of a given Unicode character point; that is,
+ * you may not use a longer-than-necessary byte sequence with high order
+ * zero bits to represent a character that would fit in fewer bytes.
+ * To do otherwise is to create security hazards (eg, create an apparent
+ * non-ASCII character that decodes to plain ASCII).
+ *
+ * length is assumed to have been obtained by pg_utf_mblen(), and the
+ * caller must have checked that that many bytes are present in the buffer.
+ */
+bool
+pg_utf8_islegal(const unsigned char *source, int length)
+{
+ unsigned char a;
+
+ switch (length)
+ {
+ default:
+ /* reject lengths 5 and 6 for now */
+ return false;
+ case 4:
+ a = source[3];
+ if (a < 0x80 || a > 0xBF)
+ return false;
+ /* FALL THRU */
+ case 3:
+ a = source[2];
+ if (a < 0x80 || a > 0xBF)
+ return false;
+ /* FALL THRU */
+ case 2:
+ a = source[1];
+ switch (*source)
+ {
+ case 0xE0:
+ if (a < 0xA0 || a > 0xBF)
+ return false;
+ break;
+ case 0xED:
+ if (a < 0x80 || a > 0x9F)
+ return false;
+ break;
+ case 0xF0:
+ if (a < 0x90 || a > 0xBF)
+ return false;
+ break;
+ case 0xF4:
+ if (a < 0x80 || a > 0x8F)
+ return false;
+ break;
+ default:
+ if (a < 0x80 || a > 0xBF)
+ return false;
+ break;
+ }
+ /* FALL THRU */
+ case 1:
+ a = *source;
+ if (a >= 0x80 && a < 0xC2)
+ return false;
+ if (a > 0xF4)
+ return false;
+ break;
+ }
+ return true;
+}
+
+
+/*
+ *-------------------------------------------------------------------
+ * encoding info table
+ * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
+ *-------------------------------------------------------------------
+ */
+const pg_wchar_tbl pg_wchar_table[] = {
+ {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1}, /* PG_SQL_ASCII */
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JP */
+ {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2}, /* PG_EUC_CN */
+ {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3}, /* PG_EUC_KR */
+ {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4}, /* PG_EUC_TW */
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JIS_2004 */
+ {pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifychar, pg_utf8_verifystr, 4}, /* PG_UTF8 */
+ {pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifychar, pg_mule_verifystr, 4}, /* PG_MULE_INTERNAL */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN1 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN2 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN3 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN4 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN5 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN6 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN7 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN8 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN9 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN10 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1256 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1258 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN866 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN874 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_KOI8R */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1251 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1252 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-5 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-6 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-7 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-8 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1250 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1253 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1254 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1255 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1257 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_KOI8U */
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2}, /* PG_SJIS */
+ {0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifychar, pg_big5_verifystr, 2}, /* PG_BIG5 */
+ {0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifychar, pg_gbk_verifystr, 2}, /* PG_GBK */
+ {0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifychar, pg_uhc_verifystr, 2}, /* PG_UHC */
+ {0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifychar, pg_gb18030_verifystr, 4}, /* PG_GB18030 */
+ {0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifychar, pg_johab_verifystr, 3}, /* PG_JOHAB */
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2} /* PG_SHIFT_JIS_2004 */
+};
+
+/*
+ * Returns the byte length of a multibyte character.
+ *
+ * Caution: when dealing with text that is not certainly valid in the
+ * specified encoding, the result may exceed the actual remaining
+ * string length. Callers that are not prepared to deal with that
+ * should use pg_encoding_mblen_bounded() instead.
+ */
+int
+pg_encoding_mblen(int encoding, const char *mbstr)
+{
+ return (PG_VALID_ENCODING(encoding) ?
+ pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
+ pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
+}
+
+/*
+ * Returns the byte length of a multibyte character; but not more than
+ * the distance to end of string.
+ */
+int
+pg_encoding_mblen_bounded(int encoding, const char *mbstr)
+{
+ return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr));
+}
+
+/*
+ * Returns the display length of a multibyte character.
+ */
+int
+pg_encoding_dsplen(int encoding, const char *mbstr)
+{
+ return (PG_VALID_ENCODING(encoding) ?
+ pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
+ pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
+}
+
+/*
+ * Verify the first multibyte character of the given string.
+ * Return its byte length if good, -1 if bad. (See comments above for
+ * full details of the mbverifychar API.)
+ */
+int
+pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
+{
+ return (PG_VALID_ENCODING(encoding) ?
+ pg_wchar_table[encoding].mbverifychar((const unsigned char *) mbstr, len) :
+ pg_wchar_table[PG_SQL_ASCII].mbverifychar((const unsigned char *) mbstr, len));
+}
+
+/*
+ * Verify that a string is valid for the given encoding.
+ * Returns the number of input bytes (<= len) that form a valid string.
+ * (See comments above for full details of the mbverifystr API.)
+ */
+int
+pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
+{
+ return (PG_VALID_ENCODING(encoding) ?
+ pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len) :
+ pg_wchar_table[PG_SQL_ASCII].mbverifystr((const unsigned char *) mbstr, len));
+}
+
+/*
+ * fetch maximum length of a given encoding
+ */
+int
+pg_encoding_max_length(int encoding)
+{
+ Assert(PG_VALID_ENCODING(encoding));
+
+ return pg_wchar_table[encoding].maxmblen;
+}