diff options
Diffstat (limited to 'src/common')
68 files changed, 22708 insertions, 0 deletions
diff --git a/src/common/.gitignore b/src/common/.gitignore new file mode 100644 index 0000000..ffa3284 --- /dev/null +++ b/src/common/.gitignore @@ -0,0 +1 @@ +/kwlist_d.h diff --git a/src/common/Makefile b/src/common/Makefile new file mode 100644 index 0000000..113029b --- /dev/null +++ b/src/common/Makefile @@ -0,0 +1,197 @@ +#------------------------------------------------------------------------- +# +# Makefile +# Makefile for src/common +# +# These files are used by the Postgres backend, and also by frontend +# programs. These files provide common functionality that isn't directly +# concerned with portability and thus doesn't belong in src/port. +# +# This makefile generates three outputs: +# +# libpgcommon.a - contains object files with FRONTEND defined, +# for use by client applications +# +# libpgcommon_shlib.a - contains object files with FRONTEND defined, +# built suitably for use in shared libraries; for use +# by frontend libraries +# +# libpgcommon_srv.a - contains object files without FRONTEND defined, +# for use only by the backend +# +# IDENTIFICATION +# src/common/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/common +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global + +# don't include subdirectory-path-dependent -I and -L switches +STD_CPPFLAGS := $(filter-out -I$(top_srcdir)/src/include -I$(top_builddir)/src/include,$(CPPFLAGS)) +STD_LDFLAGS := $(filter-out -L$(top_builddir)/src/common -L$(top_builddir)/src/port,$(LDFLAGS)) +override CPPFLAGS += -DVAL_CC="\"$(CC)\"" +override CPPFLAGS += -DVAL_CPPFLAGS="\"$(STD_CPPFLAGS)\"" +override CPPFLAGS += -DVAL_CFLAGS="\"$(CFLAGS)\"" +override CPPFLAGS += -DVAL_CFLAGS_SL="\"$(CFLAGS_SL)\"" +override CPPFLAGS += -DVAL_LDFLAGS="\"$(STD_LDFLAGS)\"" +override CPPFLAGS += -DVAL_LDFLAGS_EX="\"$(LDFLAGS_EX)\"" +override CPPFLAGS += -DVAL_LDFLAGS_SL="\"$(LDFLAGS_SL)\"" +override CPPFLAGS += -DVAL_LIBS="\"$(LIBS)\"" + +override CPPFLAGS := -DFRONTEND -I. -I$(top_srcdir)/src/common $(CPPFLAGS) +LIBS += $(PTHREAD_LIBS) + +# If you add objects here, see also src/tools/msvc/Mkvcbuild.pm + +OBJS_COMMON = \ + archive.o \ + base64.o \ + checksum_helper.o \ + compression.o \ + config_info.o \ + controldata_utils.o \ + d2s.o \ + encnames.o \ + exec.o \ + f2s.o \ + file_perm.o \ + file_utils.o \ + hashfn.o \ + ip.o \ + jsonapi.o \ + keywords.o \ + kwlookup.o \ + link-canary.o \ + md5_common.o \ + percentrepl.o \ + pg_get_line.o \ + pg_lzcompress.o \ + pg_prng.o \ + pgfnames.o \ + psprintf.o \ + relpath.o \ + rmtree.o \ + saslprep.o \ + scram-common.o \ + string.o \ + stringinfo.o \ + unicode_norm.o \ + username.o \ + wait_error.o \ + wchar.o + +ifeq ($(with_ssl),openssl) +OBJS_COMMON += \ + cryptohash_openssl.o \ + hmac_openssl.o \ + protocol_openssl.o +else +OBJS_COMMON += \ + cryptohash.o \ + hmac.o \ + md5.o \ + sha1.o \ + sha2.o +endif + +# A few files are currently only built for frontend, not server +# (Mkvcbuild.pm has a copy of this list, too). logging.c is excluded +# from OBJS_FRONTEND_SHLIB (shared library) as a matter of policy, +# because it is not appropriate for general purpose libraries such +# as libpq to report errors directly. +OBJS_FRONTEND_SHLIB = \ + $(OBJS_COMMON) \ + fe_memutils.o \ + restricted_token.o \ + sprompt.o +OBJS_FRONTEND = \ + $(OBJS_FRONTEND_SHLIB) \ + logging.o + +# foo.o, foo_shlib.o, and foo_srv.o are all built from foo.c +OBJS_SHLIB = $(OBJS_FRONTEND_SHLIB:%.o=%_shlib.o) +OBJS_SRV = $(OBJS_COMMON:%.o=%_srv.o) + +# where to find gen_keywordlist.pl and subsidiary files +TOOLSDIR = $(top_srcdir)/src/tools +GEN_KEYWORDLIST = $(PERL) -I $(TOOLSDIR) $(TOOLSDIR)/gen_keywordlist.pl +GEN_KEYWORDLIST_DEPS = $(TOOLSDIR)/gen_keywordlist.pl $(TOOLSDIR)/PerfectHash.pm + +all: libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a + +distprep: kwlist_d.h + +# libpgcommon is needed by some contrib +install: all installdirs + $(INSTALL_STLIB) libpgcommon.a '$(DESTDIR)$(libdir)/libpgcommon.a' + $(INSTALL_STLIB) libpgcommon_shlib.a '$(DESTDIR)$(libdir)/libpgcommon_shlib.a' + +installdirs: + $(MKDIR_P) '$(DESTDIR)$(libdir)' + +uninstall: + rm -f '$(DESTDIR)$(libdir)/libpgcommon.a' + rm -f '$(DESTDIR)$(libdir)/libpgcommon_shlib.a' + +libpgcommon.a: $(OBJS_FRONTEND) + rm -f $@ + $(AR) $(AROPT) $@ $^ + +# +# Shared library versions of object files +# + +libpgcommon_shlib.a: $(OBJS_SHLIB) + rm -f $@ + $(AR) $(AROPT) $@ $^ + +# Because this uses its own compilation rule, it doesn't use the +# dependency tracking logic from Makefile.global. To make sure that +# dependency tracking works anyway for the *_shlib.o files, depend on +# their *.o siblings as well, which do have proper dependencies. It's +# a hack that might fail someday if there is a *_shlib.o without a +# corresponding *.o, but there seems little reason for that. +%_shlib.o: %.c %.o + $(CC) $(CFLAGS) $(CFLAGS_SL) $(CPPFLAGS) -c $< -o $@ + +# +# Server versions of object files +# + +libpgcommon_srv.a: $(OBJS_SRV) + rm -f $@ + $(AR) $(AROPT) $@ $^ + +# Because this uses its own compilation rule, it doesn't use the +# dependency tracking logic from Makefile.global. To make sure that +# dependency tracking works anyway for the *_srv.o files, depend on +# their *.o siblings as well, which do have proper dependencies. It's +# a hack that might fail someday if there is a *_srv.o without a +# corresponding *.o, but it works for now. +%_srv.o: %.c %.o + $(CC) $(CFLAGS) $(subst -DFRONTEND,, $(CPPFLAGS)) -c $< -o $@ + +# generate SQL keyword lookup table to be included into keywords*.o. +kwlist_d.h: $(top_srcdir)/src/include/parser/kwlist.h $(GEN_KEYWORDLIST_DEPS) + $(GEN_KEYWORDLIST) --extern $< + +# Dependencies of keywords*.o need to be managed explicitly to make sure +# that you don't get broken parsing code, even in a non-enable-depend build. +keywords.o keywords_shlib.o keywords_srv.o: kwlist_d.h + +# The code imported from Ryu gets a pass on declaration-after-statement, +# in order to keep it more closely aligned with its upstream. +RYU_FILES = d2s.o f2s.o +RYU_OBJS = $(RYU_FILES) $(RYU_FILES:%.o=%_shlib.o) $(RYU_FILES:%.o=%_srv.o) + +$(RYU_OBJS): CFLAGS += $(PERMIT_DECLARATION_AFTER_STATEMENT) + +# kwlist_d.h is in the distribution tarball, so it is not cleaned here. +clean distclean: + rm -f libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a + rm -f $(OBJS_FRONTEND) $(OBJS_SHLIB) $(OBJS_SRV) + +maintainer-clean: distclean + rm -f kwlist_d.h diff --git a/src/common/archive.c b/src/common/archive.c new file mode 100644 index 0000000..641a58e --- /dev/null +++ b/src/common/archive.c @@ -0,0 +1,60 @@ +/*------------------------------------------------------------------------- + * + * archive.c + * Common WAL archive routines + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/archive.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/archive.h" +#include "common/percentrepl.h" + +/* + * BuildRestoreCommand + * + * Builds a restore command to retrieve a file from WAL archives, replacing + * the supported aliases with values supplied by the caller as defined by + * the GUC parameter restore_command: xlogpath for %p, xlogfname for %f and + * lastRestartPointFname for %r. + * + * The result is a palloc'd string for the restore command built. The + * caller is responsible for freeing it. If any of the required arguments + * is NULL and that the corresponding alias is found in the command given + * by the caller, then an error is thrown. + */ +char * +BuildRestoreCommand(const char *restoreCommand, + const char *xlogpath, + const char *xlogfname, + const char *lastRestartPointFname) +{ + char *nativePath = NULL; + char *result; + + if (xlogpath) + { + nativePath = pstrdup(xlogpath); + make_native_path(nativePath); + } + + result = replace_percent_placeholders(restoreCommand, "restore_command", "frp", + xlogfname, lastRestartPointFname, nativePath); + + if (nativePath) + pfree(nativePath); + + return result; +} diff --git a/src/common/base64.c b/src/common/base64.c new file mode 100644 index 0000000..ec4eb49 --- /dev/null +++ b/src/common/base64.c @@ -0,0 +1,242 @@ +/*------------------------------------------------------------------------- + * + * base64.c + * Encoding and decoding routines for base64 without whitespace. + * + * Copyright (c) 2001-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/common/base64.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/base64.h" + +/* + * BASE64 + */ + +static const char _base64[] = +"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +static const int8 b64lookup[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, +}; + +/* + * pg_b64_encode + * + * Encode into base64 the given string. Returns the length of the encoded + * string, and -1 in the event of an error with the result buffer zeroed + * for safety. + */ +int +pg_b64_encode(const char *src, int len, char *dst, int dstlen) +{ + char *p; + const char *s, + *end = src + len; + int pos = 2; + uint32 buf = 0; + + s = src; + p = dst; + + while (s < end) + { + buf |= (unsigned char) *s << (pos << 3); + pos--; + s++; + + /* write it out */ + if (pos < 0) + { + /* + * Leave if there is an overflow in the area allocated for the + * encoded string. + */ + if ((p - dst + 4) > dstlen) + goto error; + + *p++ = _base64[(buf >> 18) & 0x3f]; + *p++ = _base64[(buf >> 12) & 0x3f]; + *p++ = _base64[(buf >> 6) & 0x3f]; + *p++ = _base64[buf & 0x3f]; + + pos = 2; + buf = 0; + } + } + if (pos != 2) + { + /* + * Leave if there is an overflow in the area allocated for the encoded + * string. + */ + if ((p - dst + 4) > dstlen) + goto error; + + *p++ = _base64[(buf >> 18) & 0x3f]; + *p++ = _base64[(buf >> 12) & 0x3f]; + *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '='; + *p++ = '='; + } + + Assert((p - dst) <= dstlen); + return p - dst; + +error: + memset(dst, 0, dstlen); + return -1; +} + +/* + * pg_b64_decode + * + * Decode the given base64 string. Returns the length of the decoded + * string on success, and -1 in the event of an error with the result + * buffer zeroed for safety. + */ +int +pg_b64_decode(const char *src, int len, char *dst, int dstlen) +{ + const char *srcend = src + len, + *s = src; + char *p = dst; + char c; + int b = 0; + uint32 buf = 0; + int pos = 0, + end = 0; + + while (s < srcend) + { + c = *s++; + + /* Leave if a whitespace is found */ + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') + goto error; + + if (c == '=') + { + /* end sequence */ + if (!end) + { + if (pos == 2) + end = 1; + else if (pos == 3) + end = 2; + else + { + /* + * Unexpected "=" character found while decoding base64 + * sequence. + */ + goto error; + } + } + b = 0; + } + else + { + b = -1; + if (c > 0 && c < 127) + b = b64lookup[(unsigned char) c]; + if (b < 0) + { + /* invalid symbol found */ + goto error; + } + } + /* add it to buffer */ + buf = (buf << 6) + b; + pos++; + if (pos == 4) + { + /* + * Leave if there is an overflow in the area allocated for the + * decoded string. + */ + if ((p - dst + 1) > dstlen) + goto error; + *p++ = (buf >> 16) & 255; + + if (end == 0 || end > 1) + { + /* overflow check */ + if ((p - dst + 1) > dstlen) + goto error; + *p++ = (buf >> 8) & 255; + } + if (end == 0 || end > 2) + { + /* overflow check */ + if ((p - dst + 1) > dstlen) + goto error; + *p++ = buf & 255; + } + buf = 0; + pos = 0; + } + } + + if (pos != 0) + { + /* + * base64 end sequence is invalid. Input data is missing padding, is + * truncated or is otherwise corrupted. + */ + goto error; + } + + Assert((p - dst) <= dstlen); + return p - dst; + +error: + memset(dst, 0, dstlen); + return -1; +} + +/* + * pg_b64_enc_len + * + * Returns to caller the length of the string if it were encoded with + * base64 based on the length provided by caller. This is useful to + * estimate how large a buffer allocation needs to be done before doing + * the actual encoding. + */ +int +pg_b64_enc_len(int srclen) +{ + /* 3 bytes will be converted to 4 */ + return (srclen + 2) / 3 * 4; +} + +/* + * pg_b64_dec_len + * + * Returns to caller the length of the string if it were to be decoded + * with base64, based on the length given by caller. This is useful to + * estimate how large a buffer allocation needs to be done before doing + * the actual decoding. + */ +int +pg_b64_dec_len(int srclen) +{ + return (srclen * 3) >> 2; +} diff --git a/src/common/checksum_helper.c b/src/common/checksum_helper.c new file mode 100644 index 0000000..21ff895 --- /dev/null +++ b/src/common/checksum_helper.c @@ -0,0 +1,232 @@ +/*------------------------------------------------------------------------- + * + * checksum_helper.c + * Compute a checksum of any of various types using common routines + * + * Portions Copyright (c) 2016-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/checksum_helper.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/checksum_helper.h" + +/* + * If 'name' is a recognized checksum type, set *type to the corresponding + * constant and return true. Otherwise, set *type to CHECKSUM_TYPE_NONE and + * return false. + */ +bool +pg_checksum_parse_type(char *name, pg_checksum_type *type) +{ + pg_checksum_type result_type = CHECKSUM_TYPE_NONE; + bool result = true; + + if (pg_strcasecmp(name, "none") == 0) + result_type = CHECKSUM_TYPE_NONE; + else if (pg_strcasecmp(name, "crc32c") == 0) + result_type = CHECKSUM_TYPE_CRC32C; + else if (pg_strcasecmp(name, "sha224") == 0) + result_type = CHECKSUM_TYPE_SHA224; + else if (pg_strcasecmp(name, "sha256") == 0) + result_type = CHECKSUM_TYPE_SHA256; + else if (pg_strcasecmp(name, "sha384") == 0) + result_type = CHECKSUM_TYPE_SHA384; + else if (pg_strcasecmp(name, "sha512") == 0) + result_type = CHECKSUM_TYPE_SHA512; + else + result = false; + + *type = result_type; + return result; +} + +/* + * Get the canonical human-readable name corresponding to a checksum type. + */ +char * +pg_checksum_type_name(pg_checksum_type type) +{ + switch (type) + { + case CHECKSUM_TYPE_NONE: + return "NONE"; + case CHECKSUM_TYPE_CRC32C: + return "CRC32C"; + case CHECKSUM_TYPE_SHA224: + return "SHA224"; + case CHECKSUM_TYPE_SHA256: + return "SHA256"; + case CHECKSUM_TYPE_SHA384: + return "SHA384"; + case CHECKSUM_TYPE_SHA512: + return "SHA512"; + } + + Assert(false); + return "???"; +} + +/* + * Initialize a checksum context for checksums of the given type. + * Returns 0 for a success, -1 for a failure. + */ +int +pg_checksum_init(pg_checksum_context *context, pg_checksum_type type) +{ + context->type = type; + + switch (type) + { + case CHECKSUM_TYPE_NONE: + /* do nothing */ + break; + case CHECKSUM_TYPE_CRC32C: + INIT_CRC32C(context->raw_context.c_crc32c); + break; + case CHECKSUM_TYPE_SHA224: + context->raw_context.c_sha2 = pg_cryptohash_create(PG_SHA224); + if (context->raw_context.c_sha2 == NULL) + return -1; + if (pg_cryptohash_init(context->raw_context.c_sha2) < 0) + { + pg_cryptohash_free(context->raw_context.c_sha2); + return -1; + } + break; + case CHECKSUM_TYPE_SHA256: + context->raw_context.c_sha2 = pg_cryptohash_create(PG_SHA256); + if (context->raw_context.c_sha2 == NULL) + return -1; + if (pg_cryptohash_init(context->raw_context.c_sha2) < 0) + { + pg_cryptohash_free(context->raw_context.c_sha2); + return -1; + } + break; + case CHECKSUM_TYPE_SHA384: + context->raw_context.c_sha2 = pg_cryptohash_create(PG_SHA384); + if (context->raw_context.c_sha2 == NULL) + return -1; + if (pg_cryptohash_init(context->raw_context.c_sha2) < 0) + { + pg_cryptohash_free(context->raw_context.c_sha2); + return -1; + } + break; + case CHECKSUM_TYPE_SHA512: + context->raw_context.c_sha2 = pg_cryptohash_create(PG_SHA512); + if (context->raw_context.c_sha2 == NULL) + return -1; + if (pg_cryptohash_init(context->raw_context.c_sha2) < 0) + { + pg_cryptohash_free(context->raw_context.c_sha2); + return -1; + } + break; + } + + return 0; +} + +/* + * Update a checksum context with new data. + * Returns 0 for a success, -1 for a failure. + */ +int +pg_checksum_update(pg_checksum_context *context, const uint8 *input, + size_t len) +{ + switch (context->type) + { + case CHECKSUM_TYPE_NONE: + /* do nothing */ + break; + case CHECKSUM_TYPE_CRC32C: + COMP_CRC32C(context->raw_context.c_crc32c, input, len); + break; + case CHECKSUM_TYPE_SHA224: + case CHECKSUM_TYPE_SHA256: + case CHECKSUM_TYPE_SHA384: + case CHECKSUM_TYPE_SHA512: + if (pg_cryptohash_update(context->raw_context.c_sha2, input, len) < 0) + return -1; + break; + } + + return 0; +} + +/* + * Finalize a checksum computation and write the result to an output buffer. + * + * The caller must ensure that the buffer is at least PG_CHECKSUM_MAX_LENGTH + * bytes in length. The return value is the number of bytes actually written, + * or -1 for a failure. + */ +int +pg_checksum_final(pg_checksum_context *context, uint8 *output) +{ + int retval = 0; + + StaticAssertDecl(sizeof(pg_crc32c) <= PG_CHECKSUM_MAX_LENGTH, + "CRC-32C digest too big for PG_CHECKSUM_MAX_LENGTH"); + StaticAssertDecl(PG_SHA224_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH, + "SHA224 digest too big for PG_CHECKSUM_MAX_LENGTH"); + StaticAssertDecl(PG_SHA256_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH, + "SHA256 digest too big for PG_CHECKSUM_MAX_LENGTH"); + StaticAssertDecl(PG_SHA384_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH, + "SHA384 digest too big for PG_CHECKSUM_MAX_LENGTH"); + StaticAssertDecl(PG_SHA512_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH, + "SHA512 digest too big for PG_CHECKSUM_MAX_LENGTH"); + + switch (context->type) + { + case CHECKSUM_TYPE_NONE: + break; + case CHECKSUM_TYPE_CRC32C: + FIN_CRC32C(context->raw_context.c_crc32c); + retval = sizeof(pg_crc32c); + memcpy(output, &context->raw_context.c_crc32c, retval); + break; + case CHECKSUM_TYPE_SHA224: + retval = PG_SHA224_DIGEST_LENGTH; + if (pg_cryptohash_final(context->raw_context.c_sha2, + output, retval) < 0) + return -1; + pg_cryptohash_free(context->raw_context.c_sha2); + break; + case CHECKSUM_TYPE_SHA256: + retval = PG_SHA256_DIGEST_LENGTH; + if (pg_cryptohash_final(context->raw_context.c_sha2, + output, retval) < 0) + return -1; + pg_cryptohash_free(context->raw_context.c_sha2); + break; + case CHECKSUM_TYPE_SHA384: + retval = PG_SHA384_DIGEST_LENGTH; + if (pg_cryptohash_final(context->raw_context.c_sha2, + output, retval) < 0) + return -1; + pg_cryptohash_free(context->raw_context.c_sha2); + break; + case CHECKSUM_TYPE_SHA512: + retval = PG_SHA512_DIGEST_LENGTH; + if (pg_cryptohash_final(context->raw_context.c_sha2, + output, retval) < 0) + return -1; + pg_cryptohash_free(context->raw_context.c_sha2); + break; + } + + Assert(retval <= PG_CHECKSUM_MAX_LENGTH); + return retval; +} diff --git a/src/common/compression.c b/src/common/compression.c new file mode 100644 index 0000000..ee93762 --- /dev/null +++ b/src/common/compression.c @@ -0,0 +1,476 @@ +/*------------------------------------------------------------------------- + * + * compression.c + * + * Shared code for compression methods and specifications. + * + * A compression specification specifies the parameters that should be used + * when performing compression with a specific algorithm. The simplest + * possible compression specification is an integer, which sets the + * compression level. + * + * Otherwise, a compression specification is a comma-separated list of items, + * each having the form keyword or keyword=value. + * + * Currently, the supported keywords are "level", "long", and "workers". + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/compression.c + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#ifdef USE_ZSTD +#include <zstd.h> +#endif +#ifdef HAVE_LIBZ +#include <zlib.h> +#endif + +#include "common/compression.h" + +static int expect_integer_value(char *keyword, char *value, + pg_compress_specification *result); +static bool expect_boolean_value(char *keyword, char *value, + pg_compress_specification *result); + +/* + * Look up a compression algorithm by name. Returns true and sets *algorithm + * if the name is recognized. Otherwise returns false. + */ +bool +parse_compress_algorithm(char *name, pg_compress_algorithm *algorithm) +{ + if (strcmp(name, "none") == 0) + *algorithm = PG_COMPRESSION_NONE; + else if (strcmp(name, "gzip") == 0) + *algorithm = PG_COMPRESSION_GZIP; + else if (strcmp(name, "lz4") == 0) + *algorithm = PG_COMPRESSION_LZ4; + else if (strcmp(name, "zstd") == 0) + *algorithm = PG_COMPRESSION_ZSTD; + else + return false; + return true; +} + +/* + * Get the human-readable name corresponding to a particular compression + * algorithm. + */ +const char * +get_compress_algorithm_name(pg_compress_algorithm algorithm) +{ + switch (algorithm) + { + case PG_COMPRESSION_NONE: + return "none"; + case PG_COMPRESSION_GZIP: + return "gzip"; + case PG_COMPRESSION_LZ4: + return "lz4"; + case PG_COMPRESSION_ZSTD: + return "zstd"; + /* no default, to provoke compiler warnings if values are added */ + } + Assert(false); + return "???"; /* placate compiler */ +} + +/* + * Parse a compression specification for a specified algorithm. + * + * See the file header comments for a brief description of what a compression + * specification is expected to look like. + * + * On return, all fields of the result object will be initialized. + * In particular, result->parse_error will be NULL if no errors occurred + * during parsing, and will otherwise contain an appropriate error message. + * The caller may free this error message string using pfree, if desired. + * Note, however, even if there's no parse error, the string might not make + * sense: e.g. for gzip, level=12 is not sensible, but it does parse OK. + * + * The compression level is assigned by default if not directly specified + * by the specification. + * + * Use validate_compress_specification() to find out whether a compression + * specification is semantically sensible. + */ +void +parse_compress_specification(pg_compress_algorithm algorithm, char *specification, + pg_compress_specification *result) +{ + int bare_level; + char *bare_level_endp; + + /* Initial setup of result object. */ + result->algorithm = algorithm; + result->options = 0; + result->parse_error = NULL; + + /* + * Assign a default level depending on the compression method. This may + * be enforced later. + */ + switch (result->algorithm) + { + case PG_COMPRESSION_NONE: + result->level = 0; + break; + case PG_COMPRESSION_LZ4: +#ifdef USE_LZ4 + result->level = 0; /* fast compression mode */ +#else + result->parse_error = + psprintf(_("this build does not support compression with %s"), + "LZ4"); +#endif + break; + case PG_COMPRESSION_ZSTD: +#ifdef USE_ZSTD + result->level = ZSTD_CLEVEL_DEFAULT; +#else + result->parse_error = + psprintf(_("this build does not support compression with %s"), + "ZSTD"); +#endif + break; + case PG_COMPRESSION_GZIP: +#ifdef HAVE_LIBZ + result->level = Z_DEFAULT_COMPRESSION; +#else + result->parse_error = + psprintf(_("this build does not support compression with %s"), + "gzip"); +#endif + break; + } + + /* If there is no specification, we're done already. */ + if (specification == NULL) + return; + + /* As a special case, the specification can be a bare integer. */ + bare_level = strtol(specification, &bare_level_endp, 10); + if (specification != bare_level_endp && *bare_level_endp == '\0') + { + result->level = bare_level; + return; + } + + /* Look for comma-separated keyword or keyword=value entries. */ + while (1) + { + char *kwstart; + char *kwend; + char *vstart; + char *vend; + int kwlen; + int vlen; + bool has_value; + char *keyword; + char *value; + + /* Figure start, end, and length of next keyword and any value. */ + kwstart = kwend = specification; + while (*kwend != '\0' && *kwend != ',' && *kwend != '=') + ++kwend; + kwlen = kwend - kwstart; + if (*kwend != '=') + { + vstart = vend = NULL; + vlen = 0; + has_value = false; + } + else + { + vstart = vend = kwend + 1; + while (*vend != '\0' && *vend != ',') + ++vend; + vlen = vend - vstart; + has_value = true; + } + + /* Reject empty keyword. */ + if (kwlen == 0) + { + result->parse_error = + pstrdup(_("found empty string where a compression option was expected")); + break; + } + + /* Extract keyword and value as separate C strings. */ + keyword = palloc(kwlen + 1); + memcpy(keyword, kwstart, kwlen); + keyword[kwlen] = '\0'; + if (!has_value) + value = NULL; + else + { + value = palloc(vlen + 1); + memcpy(value, vstart, vlen); + value[vlen] = '\0'; + } + + /* Handle whatever keyword we found. */ + if (strcmp(keyword, "level") == 0) + { + result->level = expect_integer_value(keyword, value, result); + + /* + * No need to set a flag in "options", there is a default level + * set at least thanks to the logic above. + */ + } + else if (strcmp(keyword, "workers") == 0) + { + result->workers = expect_integer_value(keyword, value, result); + result->options |= PG_COMPRESSION_OPTION_WORKERS; + } + else if (strcmp(keyword, "long") == 0) + { + result->long_distance = expect_boolean_value(keyword, value, result); + result->options |= PG_COMPRESSION_OPTION_LONG_DISTANCE; + } + else + result->parse_error = + psprintf(_("unrecognized compression option: \"%s\""), keyword); + + /* Release memory, just to be tidy. */ + pfree(keyword); + if (value != NULL) + pfree(value); + + /* + * If we got an error or have reached the end of the string, stop. + * + * If there is no value, then the end of the keyword might have been + * the end of the string. If there is a value, then the end of the + * keyword cannot have been the end of the string, but the end of the + * value might have been. + */ + if (result->parse_error != NULL || + (vend == NULL ? *kwend == '\0' : *vend == '\0')) + break; + + /* Advance to next entry and loop around. */ + specification = vend == NULL ? kwend + 1 : vend + 1; + } +} + +/* + * Parse 'value' as an integer and return the result. + * + * If parsing fails, set result->parse_error to an appropriate message + * and return -1. + */ +static int +expect_integer_value(char *keyword, char *value, pg_compress_specification *result) +{ + int ivalue; + char *ivalue_endp; + + if (value == NULL) + { + result->parse_error = + psprintf(_("compression option \"%s\" requires a value"), + keyword); + return -1; + } + + ivalue = strtol(value, &ivalue_endp, 10); + if (ivalue_endp == value || *ivalue_endp != '\0') + { + result->parse_error = + psprintf(_("value for compression option \"%s\" must be an integer"), + keyword); + return -1; + } + return ivalue; +} + +/* + * Parse 'value' as a boolean and return the result. + * + * If parsing fails, set result->parse_error to an appropriate message + * and return -1. The caller must check result->parse_error to determine if + * the call was successful. + * + * Valid values are: yes, no, on, off, 1, 0. + * + * Inspired by ParseVariableBool(). + */ +static bool +expect_boolean_value(char *keyword, char *value, pg_compress_specification *result) +{ + if (value == NULL) + return true; + + if (pg_strcasecmp(value, "yes") == 0) + return true; + if (pg_strcasecmp(value, "on") == 0) + return true; + if (pg_strcasecmp(value, "1") == 0) + return true; + + if (pg_strcasecmp(value, "no") == 0) + return false; + if (pg_strcasecmp(value, "off") == 0) + return false; + if (pg_strcasecmp(value, "0") == 0) + return false; + + result->parse_error = + psprintf(_("value for compression option \"%s\" must be a Boolean value"), + keyword); + return false; +} + +/* + * Returns NULL if the compression specification string was syntactically + * valid and semantically sensible. Otherwise, returns an error message. + * + * Does not test whether this build of PostgreSQL supports the requested + * compression method. + */ +char * +validate_compress_specification(pg_compress_specification *spec) +{ + int min_level = 1; + int max_level = 1; + int default_level = 0; + + /* If it didn't even parse OK, it's definitely no good. */ + if (spec->parse_error != NULL) + return spec->parse_error; + + /* + * Check that the algorithm expects a compression level and it is within + * the legal range for the algorithm. + */ + switch (spec->algorithm) + { + case PG_COMPRESSION_GZIP: + max_level = 9; +#ifdef HAVE_LIBZ + default_level = Z_DEFAULT_COMPRESSION; +#endif + break; + case PG_COMPRESSION_LZ4: + max_level = 12; + default_level = 0; /* fast mode */ + break; + case PG_COMPRESSION_ZSTD: +#ifdef USE_ZSTD + max_level = ZSTD_maxCLevel(); + min_level = ZSTD_minCLevel(); + default_level = ZSTD_CLEVEL_DEFAULT; +#endif + break; + case PG_COMPRESSION_NONE: + if (spec->level != 0) + return psprintf(_("compression algorithm \"%s\" does not accept a compression level"), + get_compress_algorithm_name(spec->algorithm)); + break; + } + + if ((spec->level < min_level || spec->level > max_level) && + spec->level != default_level) + return psprintf(_("compression algorithm \"%s\" expects a compression level between %d and %d (default at %d)"), + get_compress_algorithm_name(spec->algorithm), + min_level, max_level, default_level); + + /* + * Of the compression algorithms that we currently support, only zstd + * allows parallel workers. + */ + if ((spec->options & PG_COMPRESSION_OPTION_WORKERS) != 0 && + (spec->algorithm != PG_COMPRESSION_ZSTD)) + { + return psprintf(_("compression algorithm \"%s\" does not accept a worker count"), + get_compress_algorithm_name(spec->algorithm)); + } + + /* + * Of the compression algorithms that we currently support, only zstd + * supports long-distance mode. + */ + if ((spec->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0 && + (spec->algorithm != PG_COMPRESSION_ZSTD)) + { + return psprintf(_("compression algorithm \"%s\" does not support long-distance mode"), + get_compress_algorithm_name(spec->algorithm)); + } + + return NULL; +} + +#ifdef FRONTEND + +/* + * Basic parsing of a value specified through a command-line option, commonly + * -Z/--compress. + * + * The parsing consists of a METHOD:DETAIL string fed later to + * parse_compress_specification(). This only extracts METHOD and DETAIL. + * If only an integer is found, the method is implied by the value specified. + */ +void +parse_compress_options(const char *option, char **algorithm, char **detail) +{ + char *sep; + char *endp; + long result; + + /* + * Check whether the compression specification consists of a bare integer. + * + * For backward-compatibility, assume "none" if the integer found is zero + * and "gzip" otherwise. + */ + result = strtol(option, &endp, 10); + if (*endp == '\0') + { + if (result == 0) + { + *algorithm = pstrdup("none"); + *detail = NULL; + } + else + { + *algorithm = pstrdup("gzip"); + *detail = pstrdup(option); + } + return; + } + + /* + * Check whether there is a compression detail following the algorithm + * name. + */ + sep = strchr(option, ':'); + if (sep == NULL) + { + *algorithm = pstrdup(option); + *detail = NULL; + } + else + { + char *alg; + + alg = palloc((sep - option) + 1); + memcpy(alg, option, sep - option); + alg[sep - option] = '\0'; + + *algorithm = alg; + *detail = pstrdup(sep + 1); + } +} +#endif /* FRONTEND */ diff --git a/src/common/config_info.c b/src/common/config_info.c new file mode 100644 index 0000000..09e78a6 --- /dev/null +++ b/src/common/config_info.c @@ -0,0 +1,201 @@ +/*------------------------------------------------------------------------- + * + * config_info.c + * Common code for pg_config output + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/config_info.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/config_info.h" + + +/* + * get_configdata(const char *my_exec_path, size_t *configdata_len) + * + * Get configure-time constants. The caller is responsible + * for pfreeing the result. + */ +ConfigData * +get_configdata(const char *my_exec_path, size_t *configdata_len) +{ + ConfigData *configdata; + char path[MAXPGPATH]; + char *lastsep; + int i = 0; + + /* Adjust this to match the number of items filled below */ + *configdata_len = 23; + configdata = palloc_array(ConfigData, *configdata_len); + + configdata[i].name = pstrdup("BINDIR"); + strlcpy(path, my_exec_path, sizeof(path)); + lastsep = strrchr(path, '/'); + if (lastsep) + *lastsep = '\0'; + cleanup_path(path); + configdata[i].setting = pstrdup(path); + i++; + + configdata[i].name = pstrdup("DOCDIR"); + get_doc_path(my_exec_path, path); + cleanup_path(path); + configdata[i].setting = pstrdup(path); + i++; + + configdata[i].name = pstrdup("HTMLDIR"); + get_html_path(my_exec_path, path); + cleanup_path(path); + configdata[i].setting = pstrdup(path); + i++; + + configdata[i].name = pstrdup("INCLUDEDIR"); + get_include_path(my_exec_path, path); + cleanup_path(path); + configdata[i].setting = pstrdup(path); + i++; + + configdata[i].name = pstrdup("PKGINCLUDEDIR"); + get_pkginclude_path(my_exec_path, path); + cleanup_path(path); + configdata[i].setting = pstrdup(path); + i++; + + configdata[i].name = pstrdup("INCLUDEDIR-SERVER"); + get_includeserver_path(my_exec_path, path); + cleanup_path(path); + configdata[i].setting = pstrdup(path); + i++; + + configdata[i].name = pstrdup("LIBDIR"); + get_lib_path(my_exec_path, path); + cleanup_path(path); + configdata[i].setting = pstrdup(path); + i++; + + configdata[i].name = pstrdup("PKGLIBDIR"); + get_pkglib_path(my_exec_path, path); + cleanup_path(path); + configdata[i].setting = pstrdup(path); + i++; + + configdata[i].name = pstrdup("LOCALEDIR"); + get_locale_path(my_exec_path, path); + cleanup_path(path); + configdata[i].setting = pstrdup(path); + i++; + + configdata[i].name = pstrdup("MANDIR"); + get_man_path(my_exec_path, path); + cleanup_path(path); + configdata[i].setting = pstrdup(path); + i++; + + configdata[i].name = pstrdup("SHAREDIR"); + get_share_path(my_exec_path, path); + cleanup_path(path); + configdata[i].setting = pstrdup(path); + i++; + + configdata[i].name = pstrdup("SYSCONFDIR"); + get_etc_path(my_exec_path, path); + cleanup_path(path); + configdata[i].setting = pstrdup(path); + i++; + + configdata[i].name = pstrdup("PGXS"); + get_pkglib_path(my_exec_path, path); + strlcat(path, "/pgxs/src/makefiles/pgxs.mk", sizeof(path)); + cleanup_path(path); + configdata[i].setting = pstrdup(path); + i++; + + configdata[i].name = pstrdup("CONFIGURE"); + configdata[i].setting = pstrdup(CONFIGURE_ARGS); + i++; + + configdata[i].name = pstrdup("CC"); +#ifdef VAL_CC + configdata[i].setting = pstrdup(VAL_CC); +#else + configdata[i].setting = pstrdup(_("not recorded")); +#endif + i++; + + configdata[i].name = pstrdup("CPPFLAGS"); +#ifdef VAL_CPPFLAGS + configdata[i].setting = pstrdup(VAL_CPPFLAGS); +#else + configdata[i].setting = pstrdup(_("not recorded")); +#endif + i++; + + configdata[i].name = pstrdup("CFLAGS"); +#ifdef VAL_CFLAGS + configdata[i].setting = pstrdup(VAL_CFLAGS); +#else + configdata[i].setting = pstrdup(_("not recorded")); +#endif + i++; + + configdata[i].name = pstrdup("CFLAGS_SL"); +#ifdef VAL_CFLAGS_SL + configdata[i].setting = pstrdup(VAL_CFLAGS_SL); +#else + configdata[i].setting = pstrdup(_("not recorded")); +#endif + i++; + + configdata[i].name = pstrdup("LDFLAGS"); +#ifdef VAL_LDFLAGS + configdata[i].setting = pstrdup(VAL_LDFLAGS); +#else + configdata[i].setting = pstrdup(_("not recorded")); +#endif + i++; + + configdata[i].name = pstrdup("LDFLAGS_EX"); +#ifdef VAL_LDFLAGS_EX + configdata[i].setting = pstrdup(VAL_LDFLAGS_EX); +#else + configdata[i].setting = pstrdup(_("not recorded")); +#endif + i++; + + configdata[i].name = pstrdup("LDFLAGS_SL"); +#ifdef VAL_LDFLAGS_SL + configdata[i].setting = pstrdup(VAL_LDFLAGS_SL); +#else + configdata[i].setting = pstrdup(_("not recorded")); +#endif + i++; + + configdata[i].name = pstrdup("LIBS"); +#ifdef VAL_LIBS + configdata[i].setting = pstrdup(VAL_LIBS); +#else + configdata[i].setting = pstrdup(_("not recorded")); +#endif + i++; + + configdata[i].name = pstrdup("VERSION"); + configdata[i].setting = pstrdup("PostgreSQL " PG_VERSION); + i++; + + Assert(i == *configdata_len); + + return configdata; +} diff --git a/src/common/controldata_utils.c b/src/common/controldata_utils.c new file mode 100644 index 0000000..4d1cd1c --- /dev/null +++ b/src/common/controldata_utils.c @@ -0,0 +1,269 @@ +/*------------------------------------------------------------------------- + * + * controldata_utils.c + * Common code for control data file output. + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/controldata_utils.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <unistd.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> + +#include "access/xlog_internal.h" +#include "catalog/pg_control.h" +#include "common/controldata_utils.h" +#include "common/file_perm.h" +#ifdef FRONTEND +#include "common/logging.h" +#endif +#include "port/pg_crc32c.h" + +#ifndef FRONTEND +#include "pgstat.h" +#include "storage/fd.h" +#endif + +/* + * get_controlfile() + * + * Get controlfile values. The result is returned as a palloc'd copy of the + * control file data. + * + * crc_ok_p can be used by the caller to see whether the CRC of the control + * file data is correct. + */ +ControlFileData * +get_controlfile(const char *DataDir, bool *crc_ok_p) +{ + ControlFileData *ControlFile; + int fd; + char ControlFilePath[MAXPGPATH]; + pg_crc32c crc; + int r; +#ifdef FRONTEND + pg_crc32c last_crc; + int retries = 0; +#endif + + Assert(crc_ok_p); + + ControlFile = palloc_object(ControlFileData); + snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir); + +#ifdef FRONTEND + INIT_CRC32C(last_crc); + +retry: +#endif + +#ifndef FRONTEND + if ((fd = OpenTransientFile(ControlFilePath, O_RDONLY | PG_BINARY)) == -1) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\" for reading: %m", + ControlFilePath))); +#else + if ((fd = open(ControlFilePath, O_RDONLY | PG_BINARY, 0)) == -1) + pg_fatal("could not open file \"%s\" for reading: %m", + ControlFilePath); +#endif + + r = read(fd, ControlFile, sizeof(ControlFileData)); + if (r != sizeof(ControlFileData)) + { + if (r < 0) +#ifndef FRONTEND + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", ControlFilePath))); +#else + pg_fatal("could not read file \"%s\": %m", ControlFilePath); +#endif + else +#ifndef FRONTEND + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("could not read file \"%s\": read %d of %zu", + ControlFilePath, r, sizeof(ControlFileData)))); +#else + pg_fatal("could not read file \"%s\": read %d of %zu", + ControlFilePath, r, sizeof(ControlFileData)); +#endif + } + +#ifndef FRONTEND + if (CloseTransientFile(fd) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", + ControlFilePath))); +#else + if (close(fd) != 0) + pg_fatal("could not close file \"%s\": %m", ControlFilePath); +#endif + + /* Check the CRC. */ + INIT_CRC32C(crc); + COMP_CRC32C(crc, + (char *) ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32C(crc); + + *crc_ok_p = EQ_CRC32C(crc, ControlFile->crc); + +#ifdef FRONTEND + + /* + * If the server was writing at the same time, it is possible that we read + * partially updated contents on some systems. If the CRC doesn't match, + * retry a limited number of times until we compute the same bad CRC twice + * in a row with a short sleep in between. Then the failure is unlikely + * to be due to a concurrent write. + */ + if (!*crc_ok_p && + (retries == 0 || !EQ_CRC32C(crc, last_crc)) && + retries < 10) + { + retries++; + last_crc = crc; + pg_usleep(10000); + goto retry; + } +#endif + + /* Make sure the control file is valid byte order. */ + if (ControlFile->pg_control_version % 65536 == 0 && + ControlFile->pg_control_version / 65536 != 0) +#ifndef FRONTEND + elog(ERROR, _("byte ordering mismatch")); +#else + pg_log_warning("possible byte ordering mismatch\n" + "The byte ordering used to store the pg_control file might not match the one\n" + "used by this program. In that case the results below would be incorrect, and\n" + "the PostgreSQL installation would be incompatible with this data directory."); +#endif + + return ControlFile; +} + +/* + * update_controlfile() + * + * Update controlfile values with the contents given by caller. The + * contents to write are included in "ControlFile". "do_sync" can be + * optionally used to flush the updated control file. Note that it is up + * to the caller to properly lock ControlFileLock when calling this + * routine in the backend. + */ +void +update_controlfile(const char *DataDir, + ControlFileData *ControlFile, bool do_sync) +{ + int fd; + char buffer[PG_CONTROL_FILE_SIZE]; + char ControlFilePath[MAXPGPATH]; + + /* Update timestamp */ + ControlFile->time = (pg_time_t) time(NULL); + + /* Recalculate CRC of control file */ + INIT_CRC32C(ControlFile->crc); + COMP_CRC32C(ControlFile->crc, + (char *) ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32C(ControlFile->crc); + + /* + * Write out PG_CONTROL_FILE_SIZE bytes into pg_control by zero-padding + * the excess over sizeof(ControlFileData), to avoid premature EOF related + * errors when reading it. + */ + memset(buffer, 0, PG_CONTROL_FILE_SIZE); + memcpy(buffer, ControlFile, sizeof(ControlFileData)); + + snprintf(ControlFilePath, sizeof(ControlFilePath), "%s/%s", DataDir, XLOG_CONTROL_FILE); + +#ifndef FRONTEND + + /* + * All errors issue a PANIC, so no need to use OpenTransientFile() and to + * worry about file descriptor leaks. + */ + if ((fd = BasicOpenFile(ControlFilePath, O_RDWR | PG_BINARY)) < 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", + ControlFilePath))); +#else + if ((fd = open(ControlFilePath, O_WRONLY | PG_BINARY, + pg_file_create_mode)) == -1) + pg_fatal("could not open file \"%s\": %m", ControlFilePath); +#endif + + errno = 0; +#ifndef FRONTEND + pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE); +#endif + if (write(fd, buffer, PG_CONTROL_FILE_SIZE) != PG_CONTROL_FILE_SIZE) + { + /* if write didn't set errno, assume problem is no disk space */ + if (errno == 0) + errno = ENOSPC; + +#ifndef FRONTEND + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not write file \"%s\": %m", + ControlFilePath))); +#else + pg_fatal("could not write file \"%s\": %m", ControlFilePath); +#endif + } +#ifndef FRONTEND + pgstat_report_wait_end(); +#endif + + if (do_sync) + { +#ifndef FRONTEND + pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE); + if (pg_fsync(fd) != 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", + ControlFilePath))); + pgstat_report_wait_end(); +#else + if (fsync(fd) != 0) + pg_fatal("could not fsync file \"%s\": %m", ControlFilePath); +#endif + } + + if (close(fd) != 0) + { +#ifndef FRONTEND + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", + ControlFilePath))); +#else + pg_fatal("could not close file \"%s\": %m", ControlFilePath); +#endif + } +} diff --git a/src/common/cryptohash.c b/src/common/cryptohash.c new file mode 100644 index 0000000..42dbed7 --- /dev/null +++ b/src/common/cryptohash.c @@ -0,0 +1,273 @@ +/*------------------------------------------------------------------------- + * + * cryptohash.c + * Fallback implementations for cryptographic hash functions. + * + * This is the set of in-core functions used when there are no other + * alternative options like OpenSSL. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/cryptohash.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <sys/param.h> + +#include "common/cryptohash.h" +#include "md5_int.h" +#include "sha1_int.h" +#include "sha2_int.h" + +/* + * In backend, use palloc/pfree to ease the error handling. In frontend, + * use malloc to be able to return a failure status back to the caller. + */ +#ifndef FRONTEND +#define ALLOC(size) palloc(size) +#define FREE(ptr) pfree(ptr) +#else +#define ALLOC(size) malloc(size) +#define FREE(ptr) free(ptr) +#endif + +/* Set of error states */ +typedef enum pg_cryptohash_errno +{ + PG_CRYPTOHASH_ERROR_NONE = 0, + PG_CRYPTOHASH_ERROR_DEST_LEN +} pg_cryptohash_errno; + +/* Internal pg_cryptohash_ctx structure */ +struct pg_cryptohash_ctx +{ + pg_cryptohash_type type; + pg_cryptohash_errno error; + + union + { + pg_md5_ctx md5; + pg_sha1_ctx sha1; + pg_sha224_ctx sha224; + pg_sha256_ctx sha256; + pg_sha384_ctx sha384; + pg_sha512_ctx sha512; + } data; +}; + +/* + * pg_cryptohash_create + * + * Allocate a hash context. Returns NULL on failure for an OOM. The + * backend issues an error, without returning. + */ +pg_cryptohash_ctx * +pg_cryptohash_create(pg_cryptohash_type type) +{ + pg_cryptohash_ctx *ctx; + + /* + * Note that this always allocates enough space for the largest hash. A + * smaller allocation would be enough for md5, sha224 and sha256, but the + * small extra amount of memory does not make it worth complicating this + * code. + */ + ctx = ALLOC(sizeof(pg_cryptohash_ctx)); + if (ctx == NULL) + return NULL; + + memset(ctx, 0, sizeof(pg_cryptohash_ctx)); + ctx->type = type; + ctx->error = PG_CRYPTOHASH_ERROR_NONE; + return ctx; +} + +/* + * pg_cryptohash_init + * + * Initialize a hash context. Returns 0 on success, and -1 on failure. + */ +int +pg_cryptohash_init(pg_cryptohash_ctx *ctx) +{ + if (ctx == NULL) + return -1; + + switch (ctx->type) + { + case PG_MD5: + pg_md5_init(&ctx->data.md5); + break; + case PG_SHA1: + pg_sha1_init(&ctx->data.sha1); + break; + case PG_SHA224: + pg_sha224_init(&ctx->data.sha224); + break; + case PG_SHA256: + pg_sha256_init(&ctx->data.sha256); + break; + case PG_SHA384: + pg_sha384_init(&ctx->data.sha384); + break; + case PG_SHA512: + pg_sha512_init(&ctx->data.sha512); + break; + } + + return 0; +} + +/* + * pg_cryptohash_update + * + * Update a hash context. Returns 0 on success, and -1 on failure. + */ +int +pg_cryptohash_update(pg_cryptohash_ctx *ctx, const uint8 *data, size_t len) +{ + if (ctx == NULL) + return -1; + + switch (ctx->type) + { + case PG_MD5: + pg_md5_update(&ctx->data.md5, data, len); + break; + case PG_SHA1: + pg_sha1_update(&ctx->data.sha1, data, len); + break; + case PG_SHA224: + pg_sha224_update(&ctx->data.sha224, data, len); + break; + case PG_SHA256: + pg_sha256_update(&ctx->data.sha256, data, len); + break; + case PG_SHA384: + pg_sha384_update(&ctx->data.sha384, data, len); + break; + case PG_SHA512: + pg_sha512_update(&ctx->data.sha512, data, len); + break; + } + + return 0; +} + +/* + * pg_cryptohash_final + * + * Finalize a hash context. Returns 0 on success, and -1 on failure. + */ +int +pg_cryptohash_final(pg_cryptohash_ctx *ctx, uint8 *dest, size_t len) +{ + if (ctx == NULL) + return -1; + + switch (ctx->type) + { + case PG_MD5: + if (len < MD5_DIGEST_LENGTH) + { + ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; + return -1; + } + pg_md5_final(&ctx->data.md5, dest); + break; + case PG_SHA1: + if (len < SHA1_DIGEST_LENGTH) + { + ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; + return -1; + } + pg_sha1_final(&ctx->data.sha1, dest); + break; + case PG_SHA224: + if (len < PG_SHA224_DIGEST_LENGTH) + { + ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; + return -1; + } + pg_sha224_final(&ctx->data.sha224, dest); + break; + case PG_SHA256: + if (len < PG_SHA256_DIGEST_LENGTH) + { + ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; + return -1; + } + pg_sha256_final(&ctx->data.sha256, dest); + break; + case PG_SHA384: + if (len < PG_SHA384_DIGEST_LENGTH) + { + ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; + return -1; + } + pg_sha384_final(&ctx->data.sha384, dest); + break; + case PG_SHA512: + if (len < PG_SHA512_DIGEST_LENGTH) + { + ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; + return -1; + } + pg_sha512_final(&ctx->data.sha512, dest); + break; + } + + return 0; +} + +/* + * pg_cryptohash_free + * + * Free a hash context. + */ +void +pg_cryptohash_free(pg_cryptohash_ctx *ctx) +{ + if (ctx == NULL) + return; + + explicit_bzero(ctx, sizeof(pg_cryptohash_ctx)); + FREE(ctx); +} + +/* + * pg_cryptohash_error + * + * Returns a static string providing details about an error that + * happened during a computation. + */ +const char * +pg_cryptohash_error(pg_cryptohash_ctx *ctx) +{ + /* + * This implementation would never fail because of an out-of-memory error, + * except when creating the context. + */ + if (ctx == NULL) + return _("out of memory"); + + switch (ctx->error) + { + case PG_CRYPTOHASH_ERROR_NONE: + return _("success"); + case PG_CRYPTOHASH_ERROR_DEST_LEN: + return _("destination buffer too small"); + } + + Assert(false); + return _("success"); +} diff --git a/src/common/cryptohash_openssl.c b/src/common/cryptohash_openssl.c new file mode 100644 index 0000000..a654cd4 --- /dev/null +++ b/src/common/cryptohash_openssl.c @@ -0,0 +1,353 @@ +/*------------------------------------------------------------------------- + * + * cryptohash_openssl.c + * Set of wrapper routines on top of OpenSSL to support cryptographic + * hash functions. + * + * This should only be used if code is compiled with OpenSSL support. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/cryptohash_openssl.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <openssl/err.h> +#include <openssl/evp.h> + +#include "common/cryptohash.h" +#include "common/md5.h" +#include "common/sha1.h" +#include "common/sha2.h" +#ifndef FRONTEND +#include "utils/memutils.h" +#include "utils/resowner.h" +#include "utils/resowner_private.h" +#endif + +/* + * In the backend, use an allocation in TopMemoryContext to count for + * resowner cleanup handling. In the frontend, use malloc to be able + * to return a failure status back to the caller. + */ +#ifndef FRONTEND +#define ALLOC(size) MemoryContextAlloc(TopMemoryContext, size) +#define FREE(ptr) pfree(ptr) +#else +#define ALLOC(size) malloc(size) +#define FREE(ptr) free(ptr) +#endif + +/* Set of error states */ +typedef enum pg_cryptohash_errno +{ + PG_CRYPTOHASH_ERROR_NONE = 0, + PG_CRYPTOHASH_ERROR_DEST_LEN, + PG_CRYPTOHASH_ERROR_OPENSSL +} pg_cryptohash_errno; + +/* + * Internal pg_cryptohash_ctx structure. + * + * This tracks the resource owner associated to each EVP context data + * for the backend. + */ +struct pg_cryptohash_ctx +{ + pg_cryptohash_type type; + pg_cryptohash_errno error; + const char *errreason; + + EVP_MD_CTX *evpctx; + +#ifndef FRONTEND + ResourceOwner resowner; +#endif +}; + +static const char * +SSLerrmessage(unsigned long ecode) +{ + if (ecode == 0) + return NULL; + + /* + * This may return NULL, but we would fall back to a default error path if + * that were the case. + */ + return ERR_reason_error_string(ecode); +} + +/* + * pg_cryptohash_create + * + * Allocate a hash context. Returns NULL on failure for an OOM. The + * backend issues an error, without returning. + */ +pg_cryptohash_ctx * +pg_cryptohash_create(pg_cryptohash_type type) +{ + pg_cryptohash_ctx *ctx; + + /* + * Make sure that the resource owner has space to remember this reference. + * This can error out with "out of memory", so do this before any other + * allocation to avoid leaking. + */ +#ifndef FRONTEND + ResourceOwnerEnlargeCryptoHash(CurrentResourceOwner); +#endif + + ctx = ALLOC(sizeof(pg_cryptohash_ctx)); + if (ctx == NULL) + return NULL; + memset(ctx, 0, sizeof(pg_cryptohash_ctx)); + ctx->type = type; + ctx->error = PG_CRYPTOHASH_ERROR_NONE; + ctx->errreason = NULL; + + /* + * Initialization takes care of assigning the correct type for OpenSSL. + * Also ensure that there aren't any unconsumed errors in the queue from + * previous runs. + */ + ERR_clear_error(); + ctx->evpctx = EVP_MD_CTX_create(); + + if (ctx->evpctx == NULL) + { + explicit_bzero(ctx, sizeof(pg_cryptohash_ctx)); + FREE(ctx); +#ifndef FRONTEND + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); +#else + return NULL; +#endif + } + +#ifndef FRONTEND + ctx->resowner = CurrentResourceOwner; + ResourceOwnerRememberCryptoHash(CurrentResourceOwner, + PointerGetDatum(ctx)); +#endif + + return ctx; +} + +/* + * pg_cryptohash_init + * + * Initialize a hash context. Returns 0 on success, and -1 on failure. + */ +int +pg_cryptohash_init(pg_cryptohash_ctx *ctx) +{ + int status = 0; + + if (ctx == NULL) + return -1; + + switch (ctx->type) + { + case PG_MD5: + status = EVP_DigestInit_ex(ctx->evpctx, EVP_md5(), NULL); + break; + case PG_SHA1: + status = EVP_DigestInit_ex(ctx->evpctx, EVP_sha1(), NULL); + break; + case PG_SHA224: + status = EVP_DigestInit_ex(ctx->evpctx, EVP_sha224(), NULL); + break; + case PG_SHA256: + status = EVP_DigestInit_ex(ctx->evpctx, EVP_sha256(), NULL); + break; + case PG_SHA384: + status = EVP_DigestInit_ex(ctx->evpctx, EVP_sha384(), NULL); + break; + case PG_SHA512: + status = EVP_DigestInit_ex(ctx->evpctx, EVP_sha512(), NULL); + break; + } + + /* OpenSSL internals return 1 on success, 0 on failure */ + if (status <= 0) + { + ctx->errreason = SSLerrmessage(ERR_get_error()); + ctx->error = PG_CRYPTOHASH_ERROR_OPENSSL; + + /* + * The OpenSSL error queue should normally be empty since we've + * consumed an error, but cipher initialization can in FIPS-enabled + * OpenSSL builds generate two errors so clear the queue here as well. + */ + ERR_clear_error(); + return -1; + } + return 0; +} + +/* + * pg_cryptohash_update + * + * Update a hash context. Returns 0 on success, and -1 on failure. + */ +int +pg_cryptohash_update(pg_cryptohash_ctx *ctx, const uint8 *data, size_t len) +{ + int status = 0; + + if (ctx == NULL) + return -1; + + status = EVP_DigestUpdate(ctx->evpctx, data, len); + + /* OpenSSL internals return 1 on success, 0 on failure */ + if (status <= 0) + { + ctx->errreason = SSLerrmessage(ERR_get_error()); + ctx->error = PG_CRYPTOHASH_ERROR_OPENSSL; + return -1; + } + return 0; +} + +/* + * pg_cryptohash_final + * + * Finalize a hash context. Returns 0 on success, and -1 on failure. + */ +int +pg_cryptohash_final(pg_cryptohash_ctx *ctx, uint8 *dest, size_t len) +{ + int status = 0; + + if (ctx == NULL) + return -1; + + switch (ctx->type) + { + case PG_MD5: + if (len < MD5_DIGEST_LENGTH) + { + ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; + return -1; + } + break; + case PG_SHA1: + if (len < SHA1_DIGEST_LENGTH) + { + ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; + return -1; + } + break; + case PG_SHA224: + if (len < PG_SHA224_DIGEST_LENGTH) + { + ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; + return -1; + } + break; + case PG_SHA256: + if (len < PG_SHA256_DIGEST_LENGTH) + { + ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; + return -1; + } + break; + case PG_SHA384: + if (len < PG_SHA384_DIGEST_LENGTH) + { + ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; + return -1; + } + break; + case PG_SHA512: + if (len < PG_SHA512_DIGEST_LENGTH) + { + ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; + return -1; + } + break; + } + + status = EVP_DigestFinal_ex(ctx->evpctx, dest, 0); + + /* OpenSSL internals return 1 on success, 0 on failure */ + if (status <= 0) + { + ctx->errreason = SSLerrmessage(ERR_get_error()); + ctx->error = PG_CRYPTOHASH_ERROR_OPENSSL; + return -1; + } + return 0; +} + +/* + * pg_cryptohash_free + * + * Free a hash context. + */ +void +pg_cryptohash_free(pg_cryptohash_ctx *ctx) +{ + if (ctx == NULL) + return; + + EVP_MD_CTX_destroy(ctx->evpctx); + +#ifndef FRONTEND + ResourceOwnerForgetCryptoHash(ctx->resowner, + PointerGetDatum(ctx)); +#endif + + explicit_bzero(ctx, sizeof(pg_cryptohash_ctx)); + FREE(ctx); +} + +/* + * pg_cryptohash_error + * + * Returns a static string providing details about an error that + * happened during a computation. + */ +const char * +pg_cryptohash_error(pg_cryptohash_ctx *ctx) +{ + /* + * This implementation would never fail because of an out-of-memory error, + * except when creating the context. + */ + if (ctx == NULL) + return _("out of memory"); + + /* + * If a reason is provided, rely on it, else fallback to any error code + * set. + */ + if (ctx->errreason) + return ctx->errreason; + + switch (ctx->error) + { + case PG_CRYPTOHASH_ERROR_NONE: + return _("success"); + case PG_CRYPTOHASH_ERROR_DEST_LEN: + return _("destination buffer too small"); + case PG_CRYPTOHASH_ERROR_OPENSSL: + return _("OpenSSL failure"); + } + + Assert(false); /* cannot be reached */ + return _("success"); +} diff --git a/src/common/d2s.c b/src/common/d2s.c new file mode 100644 index 0000000..614e981 --- /dev/null +++ b/src/common/d2s.c @@ -0,0 +1,1076 @@ +/*--------------------------------------------------------------------------- + * + * Ryu floating-point output for double precision. + * + * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/d2s.c + * + * This is a modification of code taken from github.com/ulfjack/ryu under the + * terms of the Boost license (not the Apache license). The original copyright + * notice follows: + * + * Copyright 2018 Ulf Adams + * + * The contents of this file may be used under the terms of the Apache + * License, Version 2.0. + * + * (See accompanying file LICENSE-Apache or copy at + * http://www.apache.org/licenses/LICENSE-2.0) + * + * Alternatively, the contents of this file may be used under the terms of the + * Boost Software License, Version 1.0. + * + * (See accompanying file LICENSE-Boost or copy at + * https://www.boost.org/LICENSE_1_0.txt) + * + * Unless required by applicable law or agreed to in writing, this software is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. + * + *--------------------------------------------------------------------------- + */ + +/* + * Runtime compiler options: + * + * -DRYU_ONLY_64_BIT_OPS Avoid using uint128 or 64-bit intrinsics. Slower, + * depending on your compiler. + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/shortest_dec.h" + +/* + * For consistency, we use 128-bit types if and only if the rest of PG also + * does, even though we could use them here without worrying about the + * alignment concerns that apply elsewhere. + */ +#if !defined(HAVE_INT128) && defined(_MSC_VER) \ + && !defined(RYU_ONLY_64_BIT_OPS) && defined(_M_X64) +#define HAS_64_BIT_INTRINSICS +#endif + +#include "ryu_common.h" +#include "digit_table.h" +#include "d2s_full_table.h" +#include "d2s_intrinsics.h" + +#define DOUBLE_MANTISSA_BITS 52 +#define DOUBLE_EXPONENT_BITS 11 +#define DOUBLE_BIAS 1023 + +#define DOUBLE_POW5_INV_BITCOUNT 122 +#define DOUBLE_POW5_BITCOUNT 121 + + +static inline uint32 +pow5Factor(uint64 value) +{ + uint32 count = 0; + + for (;;) + { + Assert(value != 0); + const uint64 q = div5(value); + const uint32 r = (uint32) (value - 5 * q); + + if (r != 0) + break; + + value = q; + ++count; + } + return count; +} + +/* Returns true if value is divisible by 5^p. */ +static inline bool +multipleOfPowerOf5(const uint64 value, const uint32 p) +{ + /* + * I tried a case distinction on p, but there was no performance + * difference. + */ + return pow5Factor(value) >= p; +} + +/* Returns true if value is divisible by 2^p. */ +static inline bool +multipleOfPowerOf2(const uint64 value, const uint32 p) +{ + /* return __builtin_ctzll(value) >= p; */ + return (value & ((UINT64CONST(1) << p) - 1)) == 0; +} + +/* + * We need a 64x128-bit multiplication and a subsequent 128-bit shift. + * + * Multiplication: + * + * The 64-bit factor is variable and passed in, the 128-bit factor comes + * from a lookup table. We know that the 64-bit factor only has 55 + * significant bits (i.e., the 9 topmost bits are zeros). The 128-bit + * factor only has 124 significant bits (i.e., the 4 topmost bits are + * zeros). + * + * Shift: + * + * In principle, the multiplication result requires 55 + 124 = 179 bits to + * represent. However, we then shift this value to the right by j, which is + * at least j >= 115, so the result is guaranteed to fit into 179 - 115 = + * 64 bits. This means that we only need the topmost 64 significant bits of + * the 64x128-bit multiplication. + * + * There are several ways to do this: + * + * 1. Best case: the compiler exposes a 128-bit type. + * We perform two 64x64-bit multiplications, add the higher 64 bits of the + * lower result to the higher result, and shift by j - 64 bits. + * + * We explicitly cast from 64-bit to 128-bit, so the compiler can tell + * that these are only 64-bit inputs, and can map these to the best + * possible sequence of assembly instructions. x86-64 machines happen to + * have matching assembly instructions for 64x64-bit multiplications and + * 128-bit shifts. + * + * 2. Second best case: the compiler exposes intrinsics for the x86-64 + * assembly instructions mentioned in 1. + * + * 3. We only have 64x64 bit instructions that return the lower 64 bits of + * the result, i.e., we have to use plain C. + * + * Our inputs are less than the full width, so we have three options: + * a. Ignore this fact and just implement the intrinsics manually. + * b. Split both into 31-bit pieces, which guarantees no internal + * overflow, but requires extra work upfront (unless we change the + * lookup table). + * c. Split only the first factor into 31-bit pieces, which also + * guarantees no internal overflow, but requires extra work since the + * intermediate results are not perfectly aligned. + */ +#if defined(HAVE_INT128) + +/* Best case: use 128-bit type. */ +static inline uint64 +mulShift(const uint64 m, const uint64 *const mul, const int32 j) +{ + const uint128 b0 = ((uint128) m) * mul[0]; + const uint128 b2 = ((uint128) m) * mul[1]; + + return (uint64) (((b0 >> 64) + b2) >> (j - 64)); +} + +static inline uint64 +mulShiftAll(const uint64 m, const uint64 *const mul, const int32 j, + uint64 *const vp, uint64 *const vm, const uint32 mmShift) +{ + *vp = mulShift(4 * m + 2, mul, j); + *vm = mulShift(4 * m - 1 - mmShift, mul, j); + return mulShift(4 * m, mul, j); +} + +#elif defined(HAS_64_BIT_INTRINSICS) + +static inline uint64 +mulShift(const uint64 m, const uint64 *const mul, const int32 j) +{ + /* m is maximum 55 bits */ + uint64 high1; + + /* 128 */ + const uint64 low1 = umul128(m, mul[1], &high1); + + /* 64 */ + uint64 high0; + uint64 sum; + + /* 64 */ + umul128(m, mul[0], &high0); + /* 0 */ + sum = high0 + low1; + + if (sum < high0) + { + ++high1; + /* overflow into high1 */ + } + return shiftright128(sum, high1, j - 64); +} + +static inline uint64 +mulShiftAll(const uint64 m, const uint64 *const mul, const int32 j, + uint64 *const vp, uint64 *const vm, const uint32 mmShift) +{ + *vp = mulShift(4 * m + 2, mul, j); + *vm = mulShift(4 * m - 1 - mmShift, mul, j); + return mulShift(4 * m, mul, j); +} + +#else /* // !defined(HAVE_INT128) && + * !defined(HAS_64_BIT_INTRINSICS) */ + +static inline uint64 +mulShiftAll(uint64 m, const uint64 *const mul, const int32 j, + uint64 *const vp, uint64 *const vm, const uint32 mmShift) +{ + m <<= 1; /* m is maximum 55 bits */ + + uint64 tmp; + const uint64 lo = umul128(m, mul[0], &tmp); + uint64 hi; + const uint64 mid = tmp + umul128(m, mul[1], &hi); + + hi += mid < tmp; /* overflow into hi */ + + const uint64 lo2 = lo + mul[0]; + const uint64 mid2 = mid + mul[1] + (lo2 < lo); + const uint64 hi2 = hi + (mid2 < mid); + + *vp = shiftright128(mid2, hi2, j - 64 - 1); + + if (mmShift == 1) + { + const uint64 lo3 = lo - mul[0]; + const uint64 mid3 = mid - mul[1] - (lo3 > lo); + const uint64 hi3 = hi - (mid3 > mid); + + *vm = shiftright128(mid3, hi3, j - 64 - 1); + } + else + { + const uint64 lo3 = lo + lo; + const uint64 mid3 = mid + mid + (lo3 < lo); + const uint64 hi3 = hi + hi + (mid3 < mid); + const uint64 lo4 = lo3 - mul[0]; + const uint64 mid4 = mid3 - mul[1] - (lo4 > lo3); + const uint64 hi4 = hi3 - (mid4 > mid3); + + *vm = shiftright128(mid4, hi4, j - 64); + } + + return shiftright128(mid, hi, j - 64 - 1); +} + +#endif /* // HAS_64_BIT_INTRINSICS */ + +static inline uint32 +decimalLength(const uint64 v) +{ + /* This is slightly faster than a loop. */ + /* The average output length is 16.38 digits, so we check high-to-low. */ + /* Function precondition: v is not an 18, 19, or 20-digit number. */ + /* (17 digits are sufficient for round-tripping.) */ + Assert(v < 100000000000000000L); + if (v >= 10000000000000000L) + { + return 17; + } + if (v >= 1000000000000000L) + { + return 16; + } + if (v >= 100000000000000L) + { + return 15; + } + if (v >= 10000000000000L) + { + return 14; + } + if (v >= 1000000000000L) + { + return 13; + } + if (v >= 100000000000L) + { + return 12; + } + if (v >= 10000000000L) + { + return 11; + } + if (v >= 1000000000L) + { + return 10; + } + if (v >= 100000000L) + { + return 9; + } + if (v >= 10000000L) + { + return 8; + } + if (v >= 1000000L) + { + return 7; + } + if (v >= 100000L) + { + return 6; + } + if (v >= 10000L) + { + return 5; + } + if (v >= 1000L) + { + return 4; + } + if (v >= 100L) + { + return 3; + } + if (v >= 10L) + { + return 2; + } + return 1; +} + +/* A floating decimal representing m * 10^e. */ +typedef struct floating_decimal_64 +{ + uint64 mantissa; + int32 exponent; +} floating_decimal_64; + +static inline floating_decimal_64 +d2d(const uint64 ieeeMantissa, const uint32 ieeeExponent) +{ + int32 e2; + uint64 m2; + + if (ieeeExponent == 0) + { + /* We subtract 2 so that the bounds computation has 2 additional bits. */ + e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2; + m2 = ieeeMantissa; + } + else + { + e2 = ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2; + m2 = (UINT64CONST(1) << DOUBLE_MANTISSA_BITS) | ieeeMantissa; + } + +#if STRICTLY_SHORTEST + const bool even = (m2 & 1) == 0; + const bool acceptBounds = even; +#else + const bool acceptBounds = false; +#endif + + /* Step 2: Determine the interval of legal decimal representations. */ + const uint64 mv = 4 * m2; + + /* Implicit bool -> int conversion. True is 1, false is 0. */ + const uint32 mmShift = ieeeMantissa != 0 || ieeeExponent <= 1; + + /* We would compute mp and mm like this: */ + /* uint64 mp = 4 * m2 + 2; */ + /* uint64 mm = mv - 1 - mmShift; */ + + /* Step 3: Convert to a decimal power base using 128-bit arithmetic. */ + uint64 vr, + vp, + vm; + int32 e10; + bool vmIsTrailingZeros = false; + bool vrIsTrailingZeros = false; + + if (e2 >= 0) + { + /* + * I tried special-casing q == 0, but there was no effect on + * performance. + * + * This expr is slightly faster than max(0, log10Pow2(e2) - 1). + */ + const uint32 q = log10Pow2(e2) - (e2 > 3); + const int32 k = DOUBLE_POW5_INV_BITCOUNT + pow5bits(q) - 1; + const int32 i = -e2 + q + k; + + e10 = q; + + vr = mulShiftAll(m2, DOUBLE_POW5_INV_SPLIT[q], i, &vp, &vm, mmShift); + + if (q <= 21) + { + /* + * This should use q <= 22, but I think 21 is also safe. Smaller + * values may still be safe, but it's more difficult to reason + * about them. + * + * Only one of mp, mv, and mm can be a multiple of 5, if any. + */ + const uint32 mvMod5 = (uint32) (mv - 5 * div5(mv)); + + if (mvMod5 == 0) + { + vrIsTrailingZeros = multipleOfPowerOf5(mv, q); + } + else if (acceptBounds) + { + /*---- + * Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q + * <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q + * <=> true && pow5Factor(mm) >= q, since e2 >= q. + *---- + */ + vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q); + } + else + { + /* Same as min(e2 + 1, pow5Factor(mp)) >= q. */ + vp -= multipleOfPowerOf5(mv + 2, q); + } + } + } + else + { + /* + * This expression is slightly faster than max(0, log10Pow5(-e2) - 1). + */ + const uint32 q = log10Pow5(-e2) - (-e2 > 1); + const int32 i = -e2 - q; + const int32 k = pow5bits(i) - DOUBLE_POW5_BITCOUNT; + const int32 j = q - k; + + e10 = q + e2; + + vr = mulShiftAll(m2, DOUBLE_POW5_SPLIT[i], j, &vp, &vm, mmShift); + + if (q <= 1) + { + /* + * {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q + * trailing 0 bits. + */ + /* mv = 4 * m2, so it always has at least two trailing 0 bits. */ + vrIsTrailingZeros = true; + if (acceptBounds) + { + /* + * mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff + * mmShift == 1. + */ + vmIsTrailingZeros = mmShift == 1; + } + else + { + /* + * mp = mv + 2, so it always has at least one trailing 0 bit. + */ + --vp; + } + } + else if (q < 63) + { + /* TODO(ulfjack):Use a tighter bound here. */ + /* + * We need to compute min(ntz(mv), pow5Factor(mv) - e2) >= q - 1 + */ + /* <=> ntz(mv) >= q - 1 && pow5Factor(mv) - e2 >= q - 1 */ + /* <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q) */ + /* <=> (mv & ((1 << (q - 1)) - 1)) == 0 */ + + /* + * We also need to make sure that the left shift does not + * overflow. + */ + vrIsTrailingZeros = multipleOfPowerOf2(mv, q - 1); + } + } + + /* + * Step 4: Find the shortest decimal representation in the interval of + * legal representations. + */ + uint32 removed = 0; + uint8 lastRemovedDigit = 0; + uint64 output; + + /* On average, we remove ~2 digits. */ + if (vmIsTrailingZeros || vrIsTrailingZeros) + { + /* General case, which happens rarely (~0.7%). */ + for (;;) + { + const uint64 vpDiv10 = div10(vp); + const uint64 vmDiv10 = div10(vm); + + if (vpDiv10 <= vmDiv10) + break; + + const uint32 vmMod10 = (uint32) (vm - 10 * vmDiv10); + const uint64 vrDiv10 = div10(vr); + const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10); + + vmIsTrailingZeros &= vmMod10 == 0; + vrIsTrailingZeros &= lastRemovedDigit == 0; + lastRemovedDigit = (uint8) vrMod10; + vr = vrDiv10; + vp = vpDiv10; + vm = vmDiv10; + ++removed; + } + + if (vmIsTrailingZeros) + { + for (;;) + { + const uint64 vmDiv10 = div10(vm); + const uint32 vmMod10 = (uint32) (vm - 10 * vmDiv10); + + if (vmMod10 != 0) + break; + + const uint64 vpDiv10 = div10(vp); + const uint64 vrDiv10 = div10(vr); + const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10); + + vrIsTrailingZeros &= lastRemovedDigit == 0; + lastRemovedDigit = (uint8) vrMod10; + vr = vrDiv10; + vp = vpDiv10; + vm = vmDiv10; + ++removed; + } + } + + if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) + { + /* Round even if the exact number is .....50..0. */ + lastRemovedDigit = 4; + } + + /* + * We need to take vr + 1 if vr is outside bounds or we need to round + * up. + */ + output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5); + } + else + { + /* + * Specialized for the common case (~99.3%). Percentages below are + * relative to this. + */ + bool roundUp = false; + const uint64 vpDiv100 = div100(vp); + const uint64 vmDiv100 = div100(vm); + + if (vpDiv100 > vmDiv100) + { + /* Optimization:remove two digits at a time(~86.2 %). */ + const uint64 vrDiv100 = div100(vr); + const uint32 vrMod100 = (uint32) (vr - 100 * vrDiv100); + + roundUp = vrMod100 >= 50; + vr = vrDiv100; + vp = vpDiv100; + vm = vmDiv100; + removed += 2; + } + + /*---- + * Loop iterations below (approximately), without optimization + * above: + * + * 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, + * 6+: 0.02% + * + * Loop iterations below (approximately), with optimization + * above: + * + * 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02% + *---- + */ + for (;;) + { + const uint64 vpDiv10 = div10(vp); + const uint64 vmDiv10 = div10(vm); + + if (vpDiv10 <= vmDiv10) + break; + + const uint64 vrDiv10 = div10(vr); + const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10); + + roundUp = vrMod10 >= 5; + vr = vrDiv10; + vp = vpDiv10; + vm = vmDiv10; + ++removed; + } + + /* + * We need to take vr + 1 if vr is outside bounds or we need to round + * up. + */ + output = vr + (vr == vm || roundUp); + } + + const int32 exp = e10 + removed; + + floating_decimal_64 fd; + + fd.exponent = exp; + fd.mantissa = output; + return fd; +} + +static inline int +to_chars_df(const floating_decimal_64 v, const uint32 olength, char *const result) +{ + /* Step 5: Print the decimal representation. */ + int index = 0; + + uint64 output = v.mantissa; + int32 exp = v.exponent; + + /*---- + * On entry, mantissa * 10^exp is the result to be output. + * Caller has already done the - sign if needed. + * + * We want to insert the point somewhere depending on the output length + * and exponent, which might mean adding zeros: + * + * exp | format + * 1+ | ddddddddd000000 + * 0 | ddddddddd + * -1 .. -len+1 | dddddddd.d to d.ddddddddd + * -len ... | 0.ddddddddd to 0.000dddddd + */ + uint32 i = 0; + int32 nexp = exp + olength; + + if (nexp <= 0) + { + /* -nexp is number of 0s to add after '.' */ + Assert(nexp >= -3); + /* 0.000ddddd */ + index = 2 - nexp; + /* won't need more than this many 0s */ + memcpy(result, "0.000000", 8); + } + else if (exp < 0) + { + /* + * dddd.dddd; leave space at the start and move the '.' in after + */ + index = 1; + } + else + { + /* + * We can save some code later by pre-filling with zeros. We know that + * there can be no more than 16 output digits in this form, otherwise + * we would not choose fixed-point output. + */ + Assert(exp < 16 && exp + olength <= 16); + memset(result, '0', 16); + } + + /* + * We prefer 32-bit operations, even on 64-bit platforms. We have at most + * 17 digits, and uint32 can store 9 digits. If output doesn't fit into + * uint32, we cut off 8 digits, so the rest will fit into uint32. + */ + if ((output >> 32) != 0) + { + /* Expensive 64-bit division. */ + const uint64 q = div1e8(output); + uint32 output2 = (uint32) (output - 100000000 * q); + const uint32 c = output2 % 10000; + + output = q; + output2 /= 10000; + + const uint32 d = output2 % 10000; + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + const uint32 d0 = (d % 100) << 1; + const uint32 d1 = (d / 100) << 1; + + memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2); + memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2); + memcpy(result + index + olength - i - 6, DIGIT_TABLE + d0, 2); + memcpy(result + index + olength - i - 8, DIGIT_TABLE + d1, 2); + i += 8; + } + + uint32 output2 = (uint32) output; + + while (output2 >= 10000) + { + const uint32 c = output2 - 10000 * (output2 / 10000); + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + + output2 /= 10000; + memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2); + memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2); + i += 4; + } + if (output2 >= 100) + { + const uint32 c = (output2 % 100) << 1; + + output2 /= 100; + memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2); + i += 2; + } + if (output2 >= 10) + { + const uint32 c = output2 << 1; + + memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2); + } + else + { + result[index] = (char) ('0' + output2); + } + + if (index == 1) + { + /* + * nexp is 1..15 here, representing the number of digits before the + * point. A value of 16 is not possible because we switch to + * scientific notation when the display exponent reaches 15. + */ + Assert(nexp < 16); + /* gcc only seems to want to optimize memmove for small 2^n */ + if (nexp & 8) + { + memmove(result + index - 1, result + index, 8); + index += 8; + } + if (nexp & 4) + { + memmove(result + index - 1, result + index, 4); + index += 4; + } + if (nexp & 2) + { + memmove(result + index - 1, result + index, 2); + index += 2; + } + if (nexp & 1) + { + result[index - 1] = result[index]; + } + result[nexp] = '.'; + index = olength + 1; + } + else if (exp >= 0) + { + /* we supplied the trailing zeros earlier, now just set the length. */ + index = olength + exp; + } + else + { + index = olength + (2 - nexp); + } + + return index; +} + +static inline int +to_chars(floating_decimal_64 v, const bool sign, char *const result) +{ + /* Step 5: Print the decimal representation. */ + int index = 0; + + uint64 output = v.mantissa; + uint32 olength = decimalLength(output); + int32 exp = v.exponent + olength - 1; + + if (sign) + { + result[index++] = '-'; + } + + /* + * The thresholds for fixed-point output are chosen to match printf + * defaults. Beware that both the code of to_chars_df and the value of + * DOUBLE_SHORTEST_DECIMAL_LEN are sensitive to these thresholds. + */ + if (exp >= -4 && exp < 15) + return to_chars_df(v, olength, result + index) + sign; + + /* + * If v.exponent is exactly 0, we might have reached here via the small + * integer fast path, in which case v.mantissa might contain trailing + * (decimal) zeros. For scientific notation we need to move these zeros + * into the exponent. (For fixed point this doesn't matter, which is why + * we do this here rather than above.) + * + * Since we already calculated the display exponent (exp) above based on + * the old decimal length, that value does not change here. Instead, we + * just reduce the display length for each digit removed. + * + * If we didn't get here via the fast path, the raw exponent will not + * usually be 0, and there will be no trailing zeros, so we pay no more + * than one div10/multiply extra cost. We claw back half of that by + * checking for divisibility by 2 before dividing by 10. + */ + if (v.exponent == 0) + { + while ((output & 1) == 0) + { + const uint64 q = div10(output); + const uint32 r = (uint32) (output - 10 * q); + + if (r != 0) + break; + output = q; + --olength; + } + } + + /*---- + * Print the decimal digits. + * + * The following code is equivalent to: + * + * for (uint32 i = 0; i < olength - 1; ++i) { + * const uint32 c = output % 10; output /= 10; + * result[index + olength - i] = (char) ('0' + c); + * } + * result[index] = '0' + output % 10; + *---- + */ + + uint32 i = 0; + + /* + * We prefer 32-bit operations, even on 64-bit platforms. We have at most + * 17 digits, and uint32 can store 9 digits. If output doesn't fit into + * uint32, we cut off 8 digits, so the rest will fit into uint32. + */ + if ((output >> 32) != 0) + { + /* Expensive 64-bit division. */ + const uint64 q = div1e8(output); + uint32 output2 = (uint32) (output - 100000000 * q); + + output = q; + + const uint32 c = output2 % 10000; + + output2 /= 10000; + + const uint32 d = output2 % 10000; + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + const uint32 d0 = (d % 100) << 1; + const uint32 d1 = (d / 100) << 1; + + memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2); + memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2); + memcpy(result + index + olength - i - 5, DIGIT_TABLE + d0, 2); + memcpy(result + index + olength - i - 7, DIGIT_TABLE + d1, 2); + i += 8; + } + + uint32 output2 = (uint32) output; + + while (output2 >= 10000) + { + const uint32 c = output2 - 10000 * (output2 / 10000); + + output2 /= 10000; + + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + + memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2); + memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2); + i += 4; + } + if (output2 >= 100) + { + const uint32 c = (output2 % 100) << 1; + + output2 /= 100; + memcpy(result + index + olength - i - 1, DIGIT_TABLE + c, 2); + i += 2; + } + if (output2 >= 10) + { + const uint32 c = output2 << 1; + + /* + * We can't use memcpy here: the decimal dot goes between these two + * digits. + */ + result[index + olength - i] = DIGIT_TABLE[c + 1]; + result[index] = DIGIT_TABLE[c]; + } + else + { + result[index] = (char) ('0' + output2); + } + + /* Print decimal point if needed. */ + if (olength > 1) + { + result[index + 1] = '.'; + index += olength + 1; + } + else + { + ++index; + } + + /* Print the exponent. */ + result[index++] = 'e'; + if (exp < 0) + { + result[index++] = '-'; + exp = -exp; + } + else + result[index++] = '+'; + + if (exp >= 100) + { + const int32 c = exp % 10; + + memcpy(result + index, DIGIT_TABLE + 2 * (exp / 10), 2); + result[index + 2] = (char) ('0' + c); + index += 3; + } + else + { + memcpy(result + index, DIGIT_TABLE + 2 * exp, 2); + index += 2; + } + + return index; +} + +static inline bool +d2d_small_int(const uint64 ieeeMantissa, + const uint32 ieeeExponent, + floating_decimal_64 *v) +{ + const int32 e2 = (int32) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS; + + /* + * Avoid using multiple "return false;" here since it tends to provoke the + * compiler into inlining multiple copies of d2d, which is undesirable. + */ + + if (e2 >= -DOUBLE_MANTISSA_BITS && e2 <= 0) + { + /*---- + * Since 2^52 <= m2 < 2^53 and 0 <= -e2 <= 52: + * 1 <= f = m2 / 2^-e2 < 2^53. + * + * Test if the lower -e2 bits of the significand are 0, i.e. whether + * the fraction is 0. We can use ieeeMantissa here, since the implied + * 1 bit can never be tested by this; the implied 1 can only be part + * of a fraction if e2 < -DOUBLE_MANTISSA_BITS which we already + * checked. (e.g. 0.5 gives ieeeMantissa == 0 and e2 == -53) + */ + const uint64 mask = (UINT64CONST(1) << -e2) - 1; + const uint64 fraction = ieeeMantissa & mask; + + if (fraction == 0) + { + /*---- + * f is an integer in the range [1, 2^53). + * Note: mantissa might contain trailing (decimal) 0's. + * Note: since 2^53 < 10^16, there is no need to adjust + * decimalLength(). + */ + const uint64 m2 = (UINT64CONST(1) << DOUBLE_MANTISSA_BITS) | ieeeMantissa; + + v->mantissa = m2 >> -e2; + v->exponent = 0; + return true; + } + } + + return false; +} + +/* + * Store the shortest decimal representation of the given double as an + * UNTERMINATED string in the caller's supplied buffer (which must be at least + * DOUBLE_SHORTEST_DECIMAL_LEN-1 bytes long). + * + * Returns the number of bytes stored. + */ +int +double_to_shortest_decimal_bufn(double f, char *result) +{ + /* + * Step 1: Decode the floating-point number, and unify normalized and + * subnormal cases. + */ + const uint64 bits = double_to_bits(f); + + /* Decode bits into sign, mantissa, and exponent. */ + const bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0; + const uint64 ieeeMantissa = bits & ((UINT64CONST(1) << DOUBLE_MANTISSA_BITS) - 1); + const uint32 ieeeExponent = (uint32) ((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1)); + + /* Case distinction; exit early for the easy cases. */ + if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0)) + { + return copy_special_str(result, ieeeSign, (ieeeExponent != 0), (ieeeMantissa != 0)); + } + + floating_decimal_64 v; + const bool isSmallInt = d2d_small_int(ieeeMantissa, ieeeExponent, &v); + + if (!isSmallInt) + { + v = d2d(ieeeMantissa, ieeeExponent); + } + + return to_chars(v, ieeeSign, result); +} + +/* + * Store the shortest decimal representation of the given double as a + * null-terminated string in the caller's supplied buffer (which must be at + * least DOUBLE_SHORTEST_DECIMAL_LEN bytes long). + * + * Returns the string length. + */ +int +double_to_shortest_decimal_buf(double f, char *result) +{ + const int index = double_to_shortest_decimal_bufn(f, result); + + /* Terminate the string. */ + Assert(index < DOUBLE_SHORTEST_DECIMAL_LEN); + result[index] = '\0'; + return index; +} + +/* + * Return the shortest decimal representation as a null-terminated palloc'd + * string (outside the backend, uses malloc() instead). + * + * Caller is responsible for freeing the result. + */ +char * +double_to_shortest_decimal(double f) +{ + char *const result = (char *) palloc(DOUBLE_SHORTEST_DECIMAL_LEN); + + double_to_shortest_decimal_buf(f, result); + return result; +} diff --git a/src/common/d2s_full_table.h b/src/common/d2s_full_table.h new file mode 100644 index 0000000..23f5e9a --- /dev/null +++ b/src/common/d2s_full_table.h @@ -0,0 +1,358 @@ +/*--------------------------------------------------------------------------- + * + * Ryu floating-point output for double precision. + * + * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/d2s_full_table.h + * + * This is a modification of code taken from github.com/ulfjack/ryu under the + * terms of the Boost license (not the Apache license). The original copyright + * notice follows: + * + * Copyright 2018 Ulf Adams + * + * The contents of this file may be used under the terms of the Apache + * License, Version 2.0. + * + * (See accompanying file LICENSE-Apache or copy at + * http://www.apache.org/licenses/LICENSE-2.0) + * + * Alternatively, the contents of this file may be used under the terms of the + * Boost Software License, Version 1.0. + * + * (See accompanying file LICENSE-Boost or copy at + * https://www.boost.org/LICENSE_1_0.txt) + * + * Unless required by applicable law or agreed to in writing, this software is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. + * + *--------------------------------------------------------------------------- + */ + +#ifndef RYU_D2S_FULL_TABLE_H +#define RYU_D2S_FULL_TABLE_H + +/* + * These tables are generated (by the upstream) using PrintDoubleLookupTable + * from the upstream sources at github.com/ulfjack/ryu, and then modified (by + * us) by adding UINT64CONST. + */ +static const uint64 DOUBLE_POW5_INV_SPLIT[292][2] = { + {UINT64CONST(1), UINT64CONST(288230376151711744)}, {UINT64CONST(3689348814741910324), UINT64CONST(230584300921369395)}, + {UINT64CONST(2951479051793528259), UINT64CONST(184467440737095516)}, {UINT64CONST(17118578500402463900), UINT64CONST(147573952589676412)}, + {UINT64CONST(12632330341676300947), UINT64CONST(236118324143482260)}, {UINT64CONST(10105864273341040758), UINT64CONST(188894659314785808)}, + {UINT64CONST(15463389048156653253), UINT64CONST(151115727451828646)}, {UINT64CONST(17362724847566824558), UINT64CONST(241785163922925834)}, + {UINT64CONST(17579528692795369969), UINT64CONST(193428131138340667)}, {UINT64CONST(6684925324752475329), UINT64CONST(154742504910672534)}, + {UINT64CONST(18074578149087781173), UINT64CONST(247588007857076054)}, {UINT64CONST(18149011334012135262), UINT64CONST(198070406285660843)}, + {UINT64CONST(3451162622983977240), UINT64CONST(158456325028528675)}, {UINT64CONST(5521860196774363583), UINT64CONST(253530120045645880)}, + {UINT64CONST(4417488157419490867), UINT64CONST(202824096036516704)}, {UINT64CONST(7223339340677503017), UINT64CONST(162259276829213363)}, + {UINT64CONST(7867994130342094503), UINT64CONST(259614842926741381)}, {UINT64CONST(2605046489531765280), UINT64CONST(207691874341393105)}, + {UINT64CONST(2084037191625412224), UINT64CONST(166153499473114484)}, {UINT64CONST(10713157136084480204), UINT64CONST(265845599156983174)}, + {UINT64CONST(12259874523609494487), UINT64CONST(212676479325586539)}, {UINT64CONST(13497248433629505913), UINT64CONST(170141183460469231)}, + {UINT64CONST(14216899864323388813), UINT64CONST(272225893536750770)}, {UINT64CONST(11373519891458711051), UINT64CONST(217780714829400616)}, + {UINT64CONST(5409467098425058518), UINT64CONST(174224571863520493)}, {UINT64CONST(4965798542738183305), UINT64CONST(278759314981632789)}, + {UINT64CONST(7661987648932456967), UINT64CONST(223007451985306231)}, {UINT64CONST(2440241304404055250), UINT64CONST(178405961588244985)}, + {UINT64CONST(3904386087046488400), UINT64CONST(285449538541191976)}, {UINT64CONST(17880904128604832013), UINT64CONST(228359630832953580)}, + {UINT64CONST(14304723302883865611), UINT64CONST(182687704666362864)}, {UINT64CONST(15133127457049002812), UINT64CONST(146150163733090291)}, + {UINT64CONST(16834306301794583852), UINT64CONST(233840261972944466)}, {UINT64CONST(9778096226693756759), UINT64CONST(187072209578355573)}, + {UINT64CONST(15201174610838826053), UINT64CONST(149657767662684458)}, {UINT64CONST(2185786488890659746), UINT64CONST(239452428260295134)}, + {UINT64CONST(5437978005854438120), UINT64CONST(191561942608236107)}, {UINT64CONST(15418428848909281466), UINT64CONST(153249554086588885)}, + {UINT64CONST(6222742084545298729), UINT64CONST(245199286538542217)}, {UINT64CONST(16046240111861969953), UINT64CONST(196159429230833773)}, + {UINT64CONST(1768945645263844993), UINT64CONST(156927543384667019)}, {UINT64CONST(10209010661905972635), UINT64CONST(251084069415467230)}, + {UINT64CONST(8167208529524778108), UINT64CONST(200867255532373784)}, {UINT64CONST(10223115638361732810), UINT64CONST(160693804425899027)}, + {UINT64CONST(1599589762411131202), UINT64CONST(257110087081438444)}, {UINT64CONST(4969020624670815285), UINT64CONST(205688069665150755)}, + {UINT64CONST(3975216499736652228), UINT64CONST(164550455732120604)}, {UINT64CONST(13739044029062464211), UINT64CONST(263280729171392966)}, + {UINT64CONST(7301886408508061046), UINT64CONST(210624583337114373)}, {UINT64CONST(13220206756290269483), UINT64CONST(168499666669691498)}, + {UINT64CONST(17462981995322520850), UINT64CONST(269599466671506397)}, {UINT64CONST(6591687966774196033), UINT64CONST(215679573337205118)}, + {UINT64CONST(12652048002903177473), UINT64CONST(172543658669764094)}, {UINT64CONST(9175230360419352987), UINT64CONST(276069853871622551)}, + {UINT64CONST(3650835473593572067), UINT64CONST(220855883097298041)}, {UINT64CONST(17678063637842498946), UINT64CONST(176684706477838432)}, + {UINT64CONST(13527506561580357021), UINT64CONST(282695530364541492)}, {UINT64CONST(3443307619780464970), UINT64CONST(226156424291633194)}, + {UINT64CONST(6443994910566282300), UINT64CONST(180925139433306555)}, {UINT64CONST(5155195928453025840), UINT64CONST(144740111546645244)}, + {UINT64CONST(15627011115008661990), UINT64CONST(231584178474632390)}, {UINT64CONST(12501608892006929592), UINT64CONST(185267342779705912)}, + {UINT64CONST(2622589484121723027), UINT64CONST(148213874223764730)}, {UINT64CONST(4196143174594756843), UINT64CONST(237142198758023568)}, + {UINT64CONST(10735612169159626121), UINT64CONST(189713759006418854)}, {UINT64CONST(12277838550069611220), UINT64CONST(151771007205135083)}, + {UINT64CONST(15955192865369467629), UINT64CONST(242833611528216133)}, {UINT64CONST(1696107848069843133), UINT64CONST(194266889222572907)}, + {UINT64CONST(12424932722681605476), UINT64CONST(155413511378058325)}, {UINT64CONST(1433148282581017146), UINT64CONST(248661618204893321)}, + {UINT64CONST(15903913885032455010), UINT64CONST(198929294563914656)}, {UINT64CONST(9033782293284053685), UINT64CONST(159143435651131725)}, + {UINT64CONST(14454051669254485895), UINT64CONST(254629497041810760)}, {UINT64CONST(11563241335403588716), UINT64CONST(203703597633448608)}, + {UINT64CONST(16629290697806691620), UINT64CONST(162962878106758886)}, {UINT64CONST(781423413297334329), UINT64CONST(260740604970814219)}, + {UINT64CONST(4314487545379777786), UINT64CONST(208592483976651375)}, {UINT64CONST(3451590036303822229), UINT64CONST(166873987181321100)}, + {UINT64CONST(5522544058086115566), UINT64CONST(266998379490113760)}, {UINT64CONST(4418035246468892453), UINT64CONST(213598703592091008)}, + {UINT64CONST(10913125826658934609), UINT64CONST(170878962873672806)}, {UINT64CONST(10082303693170474728), UINT64CONST(273406340597876490)}, + {UINT64CONST(8065842954536379782), UINT64CONST(218725072478301192)}, {UINT64CONST(17520720807854834795), UINT64CONST(174980057982640953)}, + {UINT64CONST(5897060404116273733), UINT64CONST(279968092772225526)}, {UINT64CONST(1028299508551108663), UINT64CONST(223974474217780421)}, + {UINT64CONST(15580034865808528224), UINT64CONST(179179579374224336)}, {UINT64CONST(17549358155809824511), UINT64CONST(286687326998758938)}, + {UINT64CONST(2971440080422128639), UINT64CONST(229349861599007151)}, {UINT64CONST(17134547323305344204), UINT64CONST(183479889279205720)}, + {UINT64CONST(13707637858644275364), UINT64CONST(146783911423364576)}, {UINT64CONST(14553522944347019935), UINT64CONST(234854258277383322)}, + {UINT64CONST(4264120725993795302), UINT64CONST(187883406621906658)}, {UINT64CONST(10789994210278856888), UINT64CONST(150306725297525326)}, + {UINT64CONST(9885293106962350374), UINT64CONST(240490760476040522)}, {UINT64CONST(529536856086059653), UINT64CONST(192392608380832418)}, + {UINT64CONST(7802327114352668369), UINT64CONST(153914086704665934)}, {UINT64CONST(1415676938738538420), UINT64CONST(246262538727465495)}, + {UINT64CONST(1132541550990830736), UINT64CONST(197010030981972396)}, {UINT64CONST(15663428499760305882), UINT64CONST(157608024785577916)}, + {UINT64CONST(17682787970132668764), UINT64CONST(252172839656924666)}, {UINT64CONST(10456881561364224688), UINT64CONST(201738271725539733)}, + {UINT64CONST(15744202878575200397), UINT64CONST(161390617380431786)}, {UINT64CONST(17812026976236499989), UINT64CONST(258224987808690858)}, + {UINT64CONST(3181575136763469022), UINT64CONST(206579990246952687)}, {UINT64CONST(13613306553636506187), UINT64CONST(165263992197562149)}, + {UINT64CONST(10713244041592678929), UINT64CONST(264422387516099439)}, {UINT64CONST(12259944048016053467), UINT64CONST(211537910012879551)}, + {UINT64CONST(6118606423670932450), UINT64CONST(169230328010303641)}, {UINT64CONST(2411072648389671274), UINT64CONST(270768524816485826)}, + {UINT64CONST(16686253377679378312), UINT64CONST(216614819853188660)}, {UINT64CONST(13349002702143502650), UINT64CONST(173291855882550928)}, + {UINT64CONST(17669055508687693916), UINT64CONST(277266969412081485)}, {UINT64CONST(14135244406950155133), UINT64CONST(221813575529665188)}, + {UINT64CONST(240149081334393137), UINT64CONST(177450860423732151)}, {UINT64CONST(11452284974360759988), UINT64CONST(283921376677971441)}, + {UINT64CONST(5472479164746697667), UINT64CONST(227137101342377153)}, {UINT64CONST(11756680961281178780), UINT64CONST(181709681073901722)}, + {UINT64CONST(2026647139541122378), UINT64CONST(145367744859121378)}, {UINT64CONST(18000030682233437097), UINT64CONST(232588391774594204)}, + {UINT64CONST(18089373360528660001), UINT64CONST(186070713419675363)}, {UINT64CONST(3403452244197197031), UINT64CONST(148856570735740291)}, + {UINT64CONST(16513570034941246220), UINT64CONST(238170513177184465)}, {UINT64CONST(13210856027952996976), UINT64CONST(190536410541747572)}, + {UINT64CONST(3189987192878576934), UINT64CONST(152429128433398058)}, {UINT64CONST(1414630693863812771), UINT64CONST(243886605493436893)}, + {UINT64CONST(8510402184574870864), UINT64CONST(195109284394749514)}, {UINT64CONST(10497670562401807014), UINT64CONST(156087427515799611)}, + {UINT64CONST(9417575270359070576), UINT64CONST(249739884025279378)}, {UINT64CONST(14912757845771077107), UINT64CONST(199791907220223502)}, + {UINT64CONST(4551508647133041040), UINT64CONST(159833525776178802)}, {UINT64CONST(10971762650154775986), UINT64CONST(255733641241886083)}, + {UINT64CONST(16156107749607641435), UINT64CONST(204586912993508866)}, {UINT64CONST(9235537384944202825), UINT64CONST(163669530394807093)}, + {UINT64CONST(11087511001168814197), UINT64CONST(261871248631691349)}, {UINT64CONST(12559357615676961681), UINT64CONST(209496998905353079)}, + {UINT64CONST(13736834907283479668), UINT64CONST(167597599124282463)}, {UINT64CONST(18289587036911657145), UINT64CONST(268156158598851941)}, + {UINT64CONST(10942320814787415393), UINT64CONST(214524926879081553)}, {UINT64CONST(16132554281313752961), UINT64CONST(171619941503265242)}, + {UINT64CONST(11054691591134363444), UINT64CONST(274591906405224388)}, {UINT64CONST(16222450902391311402), UINT64CONST(219673525124179510)}, + {UINT64CONST(12977960721913049122), UINT64CONST(175738820099343608)}, {UINT64CONST(17075388340318968271), UINT64CONST(281182112158949773)}, + {UINT64CONST(2592264228029443648), UINT64CONST(224945689727159819)}, {UINT64CONST(5763160197165465241), UINT64CONST(179956551781727855)}, + {UINT64CONST(9221056315464744386), UINT64CONST(287930482850764568)}, {UINT64CONST(14755542681855616155), UINT64CONST(230344386280611654)}, + {UINT64CONST(15493782960226403247), UINT64CONST(184275509024489323)}, {UINT64CONST(1326979923955391628), UINT64CONST(147420407219591459)}, + {UINT64CONST(9501865507812447252), UINT64CONST(235872651551346334)}, {UINT64CONST(11290841220991868125), UINT64CONST(188698121241077067)}, + {UINT64CONST(1653975347309673853), UINT64CONST(150958496992861654)}, {UINT64CONST(10025058185179298811), UINT64CONST(241533595188578646)}, + {UINT64CONST(4330697733401528726), UINT64CONST(193226876150862917)}, {UINT64CONST(14532604630946953951), UINT64CONST(154581500920690333)}, + {UINT64CONST(1116074521063664381), UINT64CONST(247330401473104534)}, {UINT64CONST(4582208431592841828), UINT64CONST(197864321178483627)}, + {UINT64CONST(14733813189500004432), UINT64CONST(158291456942786901)}, {UINT64CONST(16195403473716186445), UINT64CONST(253266331108459042)}, + {UINT64CONST(5577625149489128510), UINT64CONST(202613064886767234)}, {UINT64CONST(8151448934333213131), UINT64CONST(162090451909413787)}, + {UINT64CONST(16731667109675051333), UINT64CONST(259344723055062059)}, {UINT64CONST(17074682502481951390), UINT64CONST(207475778444049647)}, + {UINT64CONST(6281048372501740465), UINT64CONST(165980622755239718)}, {UINT64CONST(6360328581260874421), UINT64CONST(265568996408383549)}, + {UINT64CONST(8777611679750609860), UINT64CONST(212455197126706839)}, {UINT64CONST(10711438158542398211), UINT64CONST(169964157701365471)}, + {UINT64CONST(9759603424184016492), UINT64CONST(271942652322184754)}, {UINT64CONST(11497031554089123517), UINT64CONST(217554121857747803)}, + {UINT64CONST(16576322872755119460), UINT64CONST(174043297486198242)}, {UINT64CONST(11764721337440549842), UINT64CONST(278469275977917188)}, + {UINT64CONST(16790474699436260520), UINT64CONST(222775420782333750)}, {UINT64CONST(13432379759549008416), UINT64CONST(178220336625867000)}, + {UINT64CONST(3045063541568861850), UINT64CONST(285152538601387201)}, {UINT64CONST(17193446092222730773), UINT64CONST(228122030881109760)}, + {UINT64CONST(13754756873778184618), UINT64CONST(182497624704887808)}, {UINT64CONST(18382503128506368341), UINT64CONST(145998099763910246)}, + {UINT64CONST(3586563302416817083), UINT64CONST(233596959622256395)}, {UINT64CONST(2869250641933453667), UINT64CONST(186877567697805116)}, + {UINT64CONST(17052795772514404226), UINT64CONST(149502054158244092)}, {UINT64CONST(12527077977055405469), UINT64CONST(239203286653190548)}, + {UINT64CONST(17400360011128145022), UINT64CONST(191362629322552438)}, {UINT64CONST(2852241564676785048), UINT64CONST(153090103458041951)}, + {UINT64CONST(15631632947708587046), UINT64CONST(244944165532867121)}, {UINT64CONST(8815957543424959314), UINT64CONST(195955332426293697)}, + {UINT64CONST(18120812478965698421), UINT64CONST(156764265941034957)}, {UINT64CONST(14235904707377476180), UINT64CONST(250822825505655932)}, + {UINT64CONST(4010026136418160298), UINT64CONST(200658260404524746)}, {UINT64CONST(17965416168102169531), UINT64CONST(160526608323619796)}, + {UINT64CONST(2919224165770098987), UINT64CONST(256842573317791675)}, {UINT64CONST(2335379332616079190), UINT64CONST(205474058654233340)}, + {UINT64CONST(1868303466092863352), UINT64CONST(164379246923386672)}, {UINT64CONST(6678634360490491686), UINT64CONST(263006795077418675)}, + {UINT64CONST(5342907488392393349), UINT64CONST(210405436061934940)}, {UINT64CONST(4274325990713914679), UINT64CONST(168324348849547952)}, + {UINT64CONST(10528270399884173809), UINT64CONST(269318958159276723)}, {UINT64CONST(15801313949391159694), UINT64CONST(215455166527421378)}, + {UINT64CONST(1573004715287196786), UINT64CONST(172364133221937103)}, {UINT64CONST(17274202803427156150), UINT64CONST(275782613155099364)}, + {UINT64CONST(17508711057483635243), UINT64CONST(220626090524079491)}, {UINT64CONST(10317620031244997871), UINT64CONST(176500872419263593)}, + {UINT64CONST(12818843235250086271), UINT64CONST(282401395870821749)}, {UINT64CONST(13944423402941979340), UINT64CONST(225921116696657399)}, + {UINT64CONST(14844887537095493795), UINT64CONST(180736893357325919)}, {UINT64CONST(15565258844418305359), UINT64CONST(144589514685860735)}, + {UINT64CONST(6457670077359736959), UINT64CONST(231343223497377177)}, {UINT64CONST(16234182506113520537), UINT64CONST(185074578797901741)}, + {UINT64CONST(9297997190148906106), UINT64CONST(148059663038321393)}, {UINT64CONST(11187446689496339446), UINT64CONST(236895460861314229)}, + {UINT64CONST(12639306166338981880), UINT64CONST(189516368689051383)}, {UINT64CONST(17490142562555006151), UINT64CONST(151613094951241106)}, + {UINT64CONST(2158786396894637579), UINT64CONST(242580951921985771)}, {UINT64CONST(16484424376483351356), UINT64CONST(194064761537588616)}, + {UINT64CONST(9498190686444770762), UINT64CONST(155251809230070893)}, {UINT64CONST(11507756283569722895), UINT64CONST(248402894768113429)}, + {UINT64CONST(12895553841597688639), UINT64CONST(198722315814490743)}, {UINT64CONST(17695140702761971558), UINT64CONST(158977852651592594)}, + {UINT64CONST(17244178680193423523), UINT64CONST(254364564242548151)}, {UINT64CONST(10105994129412828495), UINT64CONST(203491651394038521)}, + {UINT64CONST(4395446488788352473), UINT64CONST(162793321115230817)}, {UINT64CONST(10722063196803274280), UINT64CONST(260469313784369307)}, + {UINT64CONST(1198952927958798777), UINT64CONST(208375451027495446)}, {UINT64CONST(15716557601334680315), UINT64CONST(166700360821996356)}, + {UINT64CONST(17767794532651667857), UINT64CONST(266720577315194170)}, {UINT64CONST(14214235626121334286), UINT64CONST(213376461852155336)}, + {UINT64CONST(7682039686155157106), UINT64CONST(170701169481724269)}, {UINT64CONST(1223217053622520399), UINT64CONST(273121871170758831)}, + {UINT64CONST(15735968901865657612), UINT64CONST(218497496936607064)}, {UINT64CONST(16278123936234436413), UINT64CONST(174797997549285651)}, + {UINT64CONST(219556594781725998), UINT64CONST(279676796078857043)}, {UINT64CONST(7554342905309201445), UINT64CONST(223741436863085634)}, + {UINT64CONST(9732823138989271479), UINT64CONST(178993149490468507)}, {UINT64CONST(815121763415193074), UINT64CONST(286389039184749612)}, + {UINT64CONST(11720143854957885429), UINT64CONST(229111231347799689)}, {UINT64CONST(13065463898708218666), UINT64CONST(183288985078239751)}, + {UINT64CONST(6763022304224664610), UINT64CONST(146631188062591801)}, {UINT64CONST(3442138057275642729), UINT64CONST(234609900900146882)}, + {UINT64CONST(13821756890046245153), UINT64CONST(187687920720117505)}, {UINT64CONST(11057405512036996122), UINT64CONST(150150336576094004)}, + {UINT64CONST(6623802375033462826), UINT64CONST(240240538521750407)}, {UINT64CONST(16367088344252501231), UINT64CONST(192192430817400325)}, + {UINT64CONST(13093670675402000985), UINT64CONST(153753944653920260)}, {UINT64CONST(2503129006933649959), UINT64CONST(246006311446272417)}, + {UINT64CONST(13070549649772650937), UINT64CONST(196805049157017933)}, {UINT64CONST(17835137349301941396), UINT64CONST(157444039325614346)}, + {UINT64CONST(2710778055689733971), UINT64CONST(251910462920982955)}, {UINT64CONST(2168622444551787177), UINT64CONST(201528370336786364)}, + {UINT64CONST(5424246770383340065), UINT64CONST(161222696269429091)}, {UINT64CONST(1300097203129523457), UINT64CONST(257956314031086546)}, + {UINT64CONST(15797473021471260058), UINT64CONST(206365051224869236)}, {UINT64CONST(8948629602435097724), UINT64CONST(165092040979895389)}, + {UINT64CONST(3249760919670425388), UINT64CONST(264147265567832623)}, {UINT64CONST(9978506365220160957), UINT64CONST(211317812454266098)}, + {UINT64CONST(15361502721659949412), UINT64CONST(169054249963412878)}, {UINT64CONST(2442311466204457120), UINT64CONST(270486799941460606)}, + {UINT64CONST(16711244431931206989), UINT64CONST(216389439953168484)}, {UINT64CONST(17058344360286875914), UINT64CONST(173111551962534787)}, + {UINT64CONST(12535955717491360170), UINT64CONST(276978483140055660)}, {UINT64CONST(10028764573993088136), UINT64CONST(221582786512044528)}, + {UINT64CONST(15401709288678291155), UINT64CONST(177266229209635622)}, {UINT64CONST(9885339602917624555), UINT64CONST(283625966735416996)}, + {UINT64CONST(4218922867592189321), UINT64CONST(226900773388333597)}, {UINT64CONST(14443184738299482427), UINT64CONST(181520618710666877)}, + {UINT64CONST(4175850161155765295), UINT64CONST(145216494968533502)}, {UINT64CONST(10370709072591134795), UINT64CONST(232346391949653603)}, + {UINT64CONST(15675264887556728482), UINT64CONST(185877113559722882)}, {UINT64CONST(5161514280561562140), UINT64CONST(148701690847778306)}, + {UINT64CONST(879725219414678777), UINT64CONST(237922705356445290)}, {UINT64CONST(703780175531743021), UINT64CONST(190338164285156232)}, + {UINT64CONST(11631070584651125387), UINT64CONST(152270531428124985)}, {UINT64CONST(162968861732249003), UINT64CONST(243632850284999977)}, + {UINT64CONST(11198421533611530172), UINT64CONST(194906280227999981)}, {UINT64CONST(5269388412147313814), UINT64CONST(155925024182399985)}, + {UINT64CONST(8431021459435702103), UINT64CONST(249480038691839976)}, {UINT64CONST(3055468352806651359), UINT64CONST(199584030953471981)}, + {UINT64CONST(17201769941212962380), UINT64CONST(159667224762777584)}, {UINT64CONST(16454785461715008838), UINT64CONST(255467559620444135)}, + {UINT64CONST(13163828369372007071), UINT64CONST(204374047696355308)}, {UINT64CONST(17909760324981426303), UINT64CONST(163499238157084246)}, + {UINT64CONST(2830174816776909822), UINT64CONST(261598781051334795)}, {UINT64CONST(2264139853421527858), UINT64CONST(209279024841067836)}, + {UINT64CONST(16568707141704863579), UINT64CONST(167423219872854268)}, {UINT64CONST(4373838538276319787), UINT64CONST(267877151796566830)}, + {UINT64CONST(3499070830621055830), UINT64CONST(214301721437253464)}, {UINT64CONST(6488605479238754987), UINT64CONST(171441377149802771)}, + {UINT64CONST(3003071137298187333), UINT64CONST(274306203439684434)}, {UINT64CONST(6091805724580460189), UINT64CONST(219444962751747547)}, + {UINT64CONST(15941491023890099121), UINT64CONST(175555970201398037)}, {UINT64CONST(10748990379256517301), UINT64CONST(280889552322236860)}, + {UINT64CONST(8599192303405213841), UINT64CONST(224711641857789488)}, {UINT64CONST(14258051472207991719), UINT64CONST(179769313486231590)} +}; + +static const uint64 DOUBLE_POW5_SPLIT[326][2] = { + {UINT64CONST(0), UINT64CONST(72057594037927936)}, {UINT64CONST(0), UINT64CONST(90071992547409920)}, + {UINT64CONST(0), UINT64CONST(112589990684262400)}, {UINT64CONST(0), UINT64CONST(140737488355328000)}, + {UINT64CONST(0), UINT64CONST(87960930222080000)}, {UINT64CONST(0), UINT64CONST(109951162777600000)}, + {UINT64CONST(0), UINT64CONST(137438953472000000)}, {UINT64CONST(0), UINT64CONST(85899345920000000)}, + {UINT64CONST(0), UINT64CONST(107374182400000000)}, {UINT64CONST(0), UINT64CONST(134217728000000000)}, + {UINT64CONST(0), UINT64CONST(83886080000000000)}, {UINT64CONST(0), UINT64CONST(104857600000000000)}, + {UINT64CONST(0), UINT64CONST(131072000000000000)}, {UINT64CONST(0), UINT64CONST(81920000000000000)}, + {UINT64CONST(0), UINT64CONST(102400000000000000)}, {UINT64CONST(0), UINT64CONST(128000000000000000)}, + {UINT64CONST(0), UINT64CONST(80000000000000000)}, {UINT64CONST(0), UINT64CONST(100000000000000000)}, + {UINT64CONST(0), UINT64CONST(125000000000000000)}, {UINT64CONST(0), UINT64CONST(78125000000000000)}, + {UINT64CONST(0), UINT64CONST(97656250000000000)}, {UINT64CONST(0), UINT64CONST(122070312500000000)}, + {UINT64CONST(0), UINT64CONST(76293945312500000)}, {UINT64CONST(0), UINT64CONST(95367431640625000)}, + {UINT64CONST(0), UINT64CONST(119209289550781250)}, {UINT64CONST(4611686018427387904), UINT64CONST(74505805969238281)}, + {UINT64CONST(10376293541461622784), UINT64CONST(93132257461547851)}, {UINT64CONST(8358680908399640576), UINT64CONST(116415321826934814)}, + {UINT64CONST(612489549322387456), UINT64CONST(72759576141834259)}, {UINT64CONST(14600669991935148032), UINT64CONST(90949470177292823)}, + {UINT64CONST(13639151471491547136), UINT64CONST(113686837721616029)}, {UINT64CONST(3213881284082270208), UINT64CONST(142108547152020037)}, + {UINT64CONST(4314518811765112832), UINT64CONST(88817841970012523)}, {UINT64CONST(781462496279003136), UINT64CONST(111022302462515654)}, + {UINT64CONST(10200200157203529728), UINT64CONST(138777878078144567)}, {UINT64CONST(13292654125893287936), UINT64CONST(86736173798840354)}, + {UINT64CONST(7392445620511834112), UINT64CONST(108420217248550443)}, {UINT64CONST(4628871007212404736), UINT64CONST(135525271560688054)}, + {UINT64CONST(16728102434789916672), UINT64CONST(84703294725430033)}, {UINT64CONST(7075069988205232128), UINT64CONST(105879118406787542)}, + {UINT64CONST(18067209522111315968), UINT64CONST(132348898008484427)}, {UINT64CONST(8986162942105878528), UINT64CONST(82718061255302767)}, + {UINT64CONST(6621017659204960256), UINT64CONST(103397576569128459)}, {UINT64CONST(3664586055578812416), UINT64CONST(129246970711410574)}, + {UINT64CONST(16125424340018921472), UINT64CONST(80779356694631608)}, {UINT64CONST(1710036351314100224), UINT64CONST(100974195868289511)}, + {UINT64CONST(15972603494424788992), UINT64CONST(126217744835361888)}, {UINT64CONST(9982877184015493120), UINT64CONST(78886090522101180)}, + {UINT64CONST(12478596480019366400), UINT64CONST(98607613152626475)}, {UINT64CONST(10986559581596820096), UINT64CONST(123259516440783094)}, + {UINT64CONST(2254913720070624656), UINT64CONST(77037197775489434)}, {UINT64CONST(12042014186943056628), UINT64CONST(96296497219361792)}, + {UINT64CONST(15052517733678820785), UINT64CONST(120370621524202240)}, {UINT64CONST(9407823583549262990), UINT64CONST(75231638452626400)}, + {UINT64CONST(11759779479436578738), UINT64CONST(94039548065783000)}, {UINT64CONST(14699724349295723422), UINT64CONST(117549435082228750)}, + {UINT64CONST(4575641699882439235), UINT64CONST(73468396926392969)}, {UINT64CONST(10331238143280436948), UINT64CONST(91835496157991211)}, + {UINT64CONST(8302361660673158281), UINT64CONST(114794370197489014)}, {UINT64CONST(1154580038986672043), UINT64CONST(143492962746861268)}, + {UINT64CONST(9944984561221445835), UINT64CONST(89683101716788292)}, {UINT64CONST(12431230701526807293), UINT64CONST(112103877145985365)}, + {UINT64CONST(1703980321626345405), UINT64CONST(140129846432481707)}, {UINT64CONST(17205888765512323542), UINT64CONST(87581154020301066)}, + {UINT64CONST(12283988920035628619), UINT64CONST(109476442525376333)}, {UINT64CONST(1519928094762372062), UINT64CONST(136845553156720417)}, + {UINT64CONST(12479170105294952299), UINT64CONST(85528470722950260)}, {UINT64CONST(15598962631618690374), UINT64CONST(106910588403687825)}, + {UINT64CONST(5663645234241199255), UINT64CONST(133638235504609782)}, {UINT64CONST(17374836326682913246), UINT64CONST(83523897190381113)}, + {UINT64CONST(7883487353071477846), UINT64CONST(104404871487976392)}, {UINT64CONST(9854359191339347308), UINT64CONST(130506089359970490)}, + {UINT64CONST(10770660513014479971), UINT64CONST(81566305849981556)}, {UINT64CONST(13463325641268099964), UINT64CONST(101957882312476945)}, + {UINT64CONST(2994098996302961243), UINT64CONST(127447352890596182)}, {UINT64CONST(15706369927971514489), UINT64CONST(79654595556622613)}, + {UINT64CONST(5797904354682229399), UINT64CONST(99568244445778267)}, {UINT64CONST(2635694424925398845), UINT64CONST(124460305557222834)}, + {UINT64CONST(6258995034005762182), UINT64CONST(77787690973264271)}, {UINT64CONST(3212057774079814824), UINT64CONST(97234613716580339)}, + {UINT64CONST(17850130272881932242), UINT64CONST(121543267145725423)}, {UINT64CONST(18073860448192289507), UINT64CONST(75964541966078389)}, + {UINT64CONST(8757267504958198172), UINT64CONST(94955677457597987)}, {UINT64CONST(6334898362770359811), UINT64CONST(118694596821997484)}, + {UINT64CONST(13182683513586250689), UINT64CONST(74184123013748427)}, {UINT64CONST(11866668373555425458), UINT64CONST(92730153767185534)}, + {UINT64CONST(5609963430089506015), UINT64CONST(115912692208981918)}, {UINT64CONST(17341285199088104971), UINT64CONST(72445432630613698)}, + {UINT64CONST(12453234462005355406), UINT64CONST(90556790788267123)}, {UINT64CONST(10954857059079306353), UINT64CONST(113195988485333904)}, + {UINT64CONST(13693571323849132942), UINT64CONST(141494985606667380)}, {UINT64CONST(17781854114260483896), UINT64CONST(88434366004167112)}, + {UINT64CONST(3780573569116053255), UINT64CONST(110542957505208891)}, {UINT64CONST(114030942967678664), UINT64CONST(138178696881511114)}, + {UINT64CONST(4682955357782187069), UINT64CONST(86361685550944446)}, {UINT64CONST(15077066234082509644), UINT64CONST(107952106938680557)}, + {UINT64CONST(5011274737320973344), UINT64CONST(134940133673350697)}, {UINT64CONST(14661261756894078100), UINT64CONST(84337583545844185)}, + {UINT64CONST(4491519140835433913), UINT64CONST(105421979432305232)}, {UINT64CONST(5614398926044292391), UINT64CONST(131777474290381540)}, + {UINT64CONST(12732371365632458552), UINT64CONST(82360921431488462)}, {UINT64CONST(6692092170185797382), UINT64CONST(102951151789360578)}, + {UINT64CONST(17588487249587022536), UINT64CONST(128688939736700722)}, {UINT64CONST(15604490549419276989), UINT64CONST(80430587335437951)}, + {UINT64CONST(14893927168346708332), UINT64CONST(100538234169297439)}, {UINT64CONST(14005722942005997511), UINT64CONST(125672792711621799)}, + {UINT64CONST(15671105866394830300), UINT64CONST(78545495444763624)}, {UINT64CONST(1142138259283986260), UINT64CONST(98181869305954531)}, + {UINT64CONST(15262730879387146537), UINT64CONST(122727336632443163)}, {UINT64CONST(7233363790403272633), UINT64CONST(76704585395276977)}, + {UINT64CONST(13653390756431478696), UINT64CONST(95880731744096221)}, {UINT64CONST(3231680390257184658), UINT64CONST(119850914680120277)}, + {UINT64CONST(4325643253124434363), UINT64CONST(74906821675075173)}, {UINT64CONST(10018740084832930858), UINT64CONST(93633527093843966)}, + {UINT64CONST(3300053069186387764), UINT64CONST(117041908867304958)}, {UINT64CONST(15897591223523656064), UINT64CONST(73151193042065598)}, + {UINT64CONST(10648616992549794273), UINT64CONST(91438991302581998)}, {UINT64CONST(4087399203832467033), UINT64CONST(114298739128227498)}, + {UINT64CONST(14332621041645359599), UINT64CONST(142873423910284372)}, {UINT64CONST(18181260187883125557), UINT64CONST(89295889943927732)}, + {UINT64CONST(4279831161144355331), UINT64CONST(111619862429909666)}, {UINT64CONST(14573160988285219972), UINT64CONST(139524828037387082)}, + {UINT64CONST(13719911636105650386), UINT64CONST(87203017523366926)}, {UINT64CONST(7926517508277287175), UINT64CONST(109003771904208658)}, + {UINT64CONST(684774848491833161), UINT64CONST(136254714880260823)}, {UINT64CONST(7345513307948477581), UINT64CONST(85159196800163014)}, + {UINT64CONST(18405263671790372785), UINT64CONST(106448996000203767)}, {UINT64CONST(18394893571310578077), UINT64CONST(133061245000254709)}, + {UINT64CONST(13802651491282805250), UINT64CONST(83163278125159193)}, {UINT64CONST(3418256308821342851), UINT64CONST(103954097656448992)}, + {UINT64CONST(4272820386026678563), UINT64CONST(129942622070561240)}, {UINT64CONST(2670512741266674102), UINT64CONST(81214138794100775)}, + {UINT64CONST(17173198981865506339), UINT64CONST(101517673492625968)}, {UINT64CONST(3019754653622331308), UINT64CONST(126897091865782461)}, + {UINT64CONST(4193189667727651020), UINT64CONST(79310682416114038)}, {UINT64CONST(14464859121514339583), UINT64CONST(99138353020142547)}, + {UINT64CONST(13469387883465536574), UINT64CONST(123922941275178184)}, {UINT64CONST(8418367427165960359), UINT64CONST(77451838296986365)}, + {UINT64CONST(15134645302384838353), UINT64CONST(96814797871232956)}, {UINT64CONST(471562554271496325), UINT64CONST(121018497339041196)}, + {UINT64CONST(9518098633274461011), UINT64CONST(75636560836900747)}, {UINT64CONST(7285937273165688360), UINT64CONST(94545701046125934)}, + {UINT64CONST(18330793628311886258), UINT64CONST(118182126307657417)}, {UINT64CONST(4539216990053847055), UINT64CONST(73863828942285886)}, + {UINT64CONST(14897393274422084627), UINT64CONST(92329786177857357)}, {UINT64CONST(4786683537745442072), UINT64CONST(115412232722321697)}, + {UINT64CONST(14520892257159371055), UINT64CONST(72132645451451060)}, {UINT64CONST(18151115321449213818), UINT64CONST(90165806814313825)}, + {UINT64CONST(8853836096529353561), UINT64CONST(112707258517892282)}, {UINT64CONST(1843923083806916143), UINT64CONST(140884073147365353)}, + {UINT64CONST(12681666973447792349), UINT64CONST(88052545717103345)}, {UINT64CONST(2017025661527576725), UINT64CONST(110065682146379182)}, + {UINT64CONST(11744654113764246714), UINT64CONST(137582102682973977)}, {UINT64CONST(422879793461572340), UINT64CONST(85988814176858736)}, + {UINT64CONST(528599741826965425), UINT64CONST(107486017721073420)}, {UINT64CONST(660749677283706782), UINT64CONST(134357522151341775)}, + {UINT64CONST(7330497575943398595), UINT64CONST(83973451344588609)}, {UINT64CONST(13774807988356636147), UINT64CONST(104966814180735761)}, + {UINT64CONST(3383451930163631472), UINT64CONST(131208517725919702)}, {UINT64CONST(15949715511634433382), UINT64CONST(82005323578699813)}, + {UINT64CONST(6102086334260878016), UINT64CONST(102506654473374767)}, {UINT64CONST(3015921899398709616), UINT64CONST(128133318091718459)}, + {UINT64CONST(18025852251620051174), UINT64CONST(80083323807324036)}, {UINT64CONST(4085571240815512351), UINT64CONST(100104154759155046)}, + {UINT64CONST(14330336087874166247), UINT64CONST(125130193448943807)}, {UINT64CONST(15873989082562435760), UINT64CONST(78206370905589879)}, + {UINT64CONST(15230800334775656796), UINT64CONST(97757963631987349)}, {UINT64CONST(5203442363187407284), UINT64CONST(122197454539984187)}, + {UINT64CONST(946308467778435600), UINT64CONST(76373409087490117)}, {UINT64CONST(5794571603150432404), UINT64CONST(95466761359362646)}, + {UINT64CONST(16466586540792816313), UINT64CONST(119333451699203307)}, {UINT64CONST(7985773578781816244), UINT64CONST(74583407312002067)}, + {UINT64CONST(5370530955049882401), UINT64CONST(93229259140002584)}, {UINT64CONST(6713163693812353001), UINT64CONST(116536573925003230)}, + {UINT64CONST(18030785363914884337), UINT64CONST(72835358703127018)}, {UINT64CONST(13315109668038829614), UINT64CONST(91044198378908773)}, + {UINT64CONST(2808829029766373305), UINT64CONST(113805247973635967)}, {UINT64CONST(17346094342490130344), UINT64CONST(142256559967044958)}, + {UINT64CONST(6229622945628943561), UINT64CONST(88910349979403099)}, {UINT64CONST(3175342663608791547), UINT64CONST(111137937474253874)}, + {UINT64CONST(13192550366365765242), UINT64CONST(138922421842817342)}, {UINT64CONST(3633657960551215372), UINT64CONST(86826513651760839)}, + {UINT64CONST(18377130505971182927), UINT64CONST(108533142064701048)}, {UINT64CONST(4524669058754427043), UINT64CONST(135666427580876311)}, + {UINT64CONST(9745447189362598758), UINT64CONST(84791517238047694)}, {UINT64CONST(2958436949848472639), UINT64CONST(105989396547559618)}, + {UINT64CONST(12921418224165366607), UINT64CONST(132486745684449522)}, {UINT64CONST(12687572408530742033), UINT64CONST(82804216052780951)}, + {UINT64CONST(11247779492236039638), UINT64CONST(103505270065976189)}, {UINT64CONST(224666310012885835), UINT64CONST(129381587582470237)}, + {UINT64CONST(2446259452971747599), UINT64CONST(80863492239043898)}, {UINT64CONST(12281196353069460307), UINT64CONST(101079365298804872)}, + {UINT64CONST(15351495441336825384), UINT64CONST(126349206623506090)}, {UINT64CONST(14206370669262903769), UINT64CONST(78968254139691306)}, + {UINT64CONST(8534591299723853903), UINT64CONST(98710317674614133)}, {UINT64CONST(15279925143082205283), UINT64CONST(123387897093267666)}, + {UINT64CONST(14161639232853766206), UINT64CONST(77117435683292291)}, {UINT64CONST(13090363022639819853), UINT64CONST(96396794604115364)}, + {UINT64CONST(16362953778299774816), UINT64CONST(120495993255144205)}, {UINT64CONST(12532689120651053212), UINT64CONST(75309995784465128)}, + {UINT64CONST(15665861400813816515), UINT64CONST(94137494730581410)}, {UINT64CONST(10358954714162494836), UINT64CONST(117671868413226763)}, + {UINT64CONST(4168503687137865320), UINT64CONST(73544917758266727)}, {UINT64CONST(598943590494943747), UINT64CONST(91931147197833409)}, + {UINT64CONST(5360365506546067587), UINT64CONST(114913933997291761)}, {UINT64CONST(11312142901609972388), UINT64CONST(143642417496614701)}, + {UINT64CONST(9375932322719926695), UINT64CONST(89776510935384188)}, {UINT64CONST(11719915403399908368), UINT64CONST(112220638669230235)}, + {UINT64CONST(10038208235822497557), UINT64CONST(140275798336537794)}, {UINT64CONST(10885566165816448877), UINT64CONST(87672373960336121)}, + {UINT64CONST(18218643725697949000), UINT64CONST(109590467450420151)}, {UINT64CONST(18161618638695048346), UINT64CONST(136988084313025189)}, + {UINT64CONST(13656854658398099168), UINT64CONST(85617552695640743)}, {UINT64CONST(12459382304570236056), UINT64CONST(107021940869550929)}, + {UINT64CONST(1739169825430631358), UINT64CONST(133777426086938662)}, {UINT64CONST(14922039196176308311), UINT64CONST(83610891304336663)}, + {UINT64CONST(14040862976792997485), UINT64CONST(104513614130420829)}, {UINT64CONST(3716020665709083144), UINT64CONST(130642017663026037)}, + {UINT64CONST(4628355925281870917), UINT64CONST(81651261039391273)}, {UINT64CONST(10397130925029726550), UINT64CONST(102064076299239091)}, + {UINT64CONST(8384727637859770284), UINT64CONST(127580095374048864)}, {UINT64CONST(5240454773662356427), UINT64CONST(79737559608780540)}, + {UINT64CONST(6550568467077945534), UINT64CONST(99671949510975675)}, {UINT64CONST(3576524565420044014), UINT64CONST(124589936888719594)}, + {UINT64CONST(6847013871814915412), UINT64CONST(77868710555449746)}, {UINT64CONST(17782139376623420074), UINT64CONST(97335888194312182)}, + {UINT64CONST(13004302183924499284), UINT64CONST(121669860242890228)}, {UINT64CONST(17351060901807587860), UINT64CONST(76043662651806392)}, + {UINT64CONST(3242082053549933210), UINT64CONST(95054578314757991)}, {UINT64CONST(17887660622219580224), UINT64CONST(118818222893447488)}, + {UINT64CONST(11179787888887237640), UINT64CONST(74261389308404680)}, {UINT64CONST(13974734861109047050), UINT64CONST(92826736635505850)}, + {UINT64CONST(8245046539531533005), UINT64CONST(116033420794382313)}, {UINT64CONST(16682369133275677888), UINT64CONST(72520887996488945)}, + {UINT64CONST(7017903361312433648), UINT64CONST(90651109995611182)}, {UINT64CONST(17995751238495317868), UINT64CONST(113313887494513977)}, + {UINT64CONST(8659630992836983623), UINT64CONST(141642359368142472)}, {UINT64CONST(5412269370523114764), UINT64CONST(88526474605089045)}, + {UINT64CONST(11377022731581281359), UINT64CONST(110658093256361306)}, {UINT64CONST(4997906377621825891), UINT64CONST(138322616570451633)}, + {UINT64CONST(14652906532082110942), UINT64CONST(86451635356532270)}, {UINT64CONST(9092761128247862869), UINT64CONST(108064544195665338)}, + {UINT64CONST(2142579373455052779), UINT64CONST(135080680244581673)}, {UINT64CONST(12868327154477877747), UINT64CONST(84425425152863545)}, + {UINT64CONST(2250350887815183471), UINT64CONST(105531781441079432)}, {UINT64CONST(2812938609768979339), UINT64CONST(131914726801349290)}, + {UINT64CONST(6369772649532999991), UINT64CONST(82446704250843306)}, {UINT64CONST(17185587848771025797), UINT64CONST(103058380313554132)}, + {UINT64CONST(3035240737254230630), UINT64CONST(128822975391942666)}, {UINT64CONST(6508711479211282048), UINT64CONST(80514359619964166)}, + {UINT64CONST(17359261385868878368), UINT64CONST(100642949524955207)}, {UINT64CONST(17087390713908710056), UINT64CONST(125803686906194009)}, + {UINT64CONST(3762090168551861929), UINT64CONST(78627304316371256)}, {UINT64CONST(4702612710689827411), UINT64CONST(98284130395464070)}, + {UINT64CONST(15101637925217060072), UINT64CONST(122855162994330087)}, {UINT64CONST(16356052730901744401), UINT64CONST(76784476871456304)}, + {UINT64CONST(1998321839917628885), UINT64CONST(95980596089320381)}, {UINT64CONST(7109588318324424010), UINT64CONST(119975745111650476)}, + {UINT64CONST(13666864735807540814), UINT64CONST(74984840694781547)}, {UINT64CONST(12471894901332038114), UINT64CONST(93731050868476934)}, + {UINT64CONST(6366496589810271835), UINT64CONST(117163813585596168)}, {UINT64CONST(3979060368631419896), UINT64CONST(73227383490997605)}, + {UINT64CONST(9585511479216662775), UINT64CONST(91534229363747006)}, {UINT64CONST(2758517312166052660), UINT64CONST(114417786704683758)}, + {UINT64CONST(12671518677062341634), UINT64CONST(143022233380854697)}, {UINT64CONST(1002170145522881665), UINT64CONST(89388895863034186)}, + {UINT64CONST(10476084718758377889), UINT64CONST(111736119828792732)}, {UINT64CONST(13095105898447972362), UINT64CONST(139670149785990915)}, + {UINT64CONST(5878598177316288774), UINT64CONST(87293843616244322)}, {UINT64CONST(16571619758500136775), UINT64CONST(109117304520305402)}, + {UINT64CONST(11491152661270395161), UINT64CONST(136396630650381753)}, {UINT64CONST(264441385652915120), UINT64CONST(85247894156488596)}, + {UINT64CONST(330551732066143900), UINT64CONST(106559867695610745)}, {UINT64CONST(5024875683510067779), UINT64CONST(133199834619513431)}, + {UINT64CONST(10058076329834874218), UINT64CONST(83249896637195894)}, {UINT64CONST(3349223375438816964), UINT64CONST(104062370796494868)}, + {UINT64CONST(4186529219298521205), UINT64CONST(130077963495618585)}, {UINT64CONST(14145795808130045513), UINT64CONST(81298727184761615)}, + {UINT64CONST(13070558741735168987), UINT64CONST(101623408980952019)}, {UINT64CONST(11726512408741573330), UINT64CONST(127029261226190024)}, + {UINT64CONST(7329070255463483331), UINT64CONST(79393288266368765)}, {UINT64CONST(13773023837756742068), UINT64CONST(99241610332960956)}, + {UINT64CONST(17216279797195927585), UINT64CONST(124052012916201195)}, {UINT64CONST(8454331864033760789), UINT64CONST(77532508072625747)}, + {UINT64CONST(5956228811614813082), UINT64CONST(96915635090782184)}, {UINT64CONST(7445286014518516353), UINT64CONST(121144543863477730)}, + {UINT64CONST(9264989777501460624), UINT64CONST(75715339914673581)}, {UINT64CONST(16192923240304213684), UINT64CONST(94644174893341976)}, + {UINT64CONST(1794409976670715490), UINT64CONST(118305218616677471)}, {UINT64CONST(8039035263060279037), UINT64CONST(73940761635423419)}, + {UINT64CONST(5437108060397960892), UINT64CONST(92425952044279274)}, {UINT64CONST(16019757112352226923), UINT64CONST(115532440055349092)}, + {UINT64CONST(788976158365366019), UINT64CONST(72207775034593183)}, {UINT64CONST(14821278253238871236), UINT64CONST(90259718793241478)}, + {UINT64CONST(9303225779693813237), UINT64CONST(112824648491551848)}, {UINT64CONST(11629032224617266546), UINT64CONST(141030810614439810)}, + {UINT64CONST(11879831158813179495), UINT64CONST(88144256634024881)}, {UINT64CONST(1014730893234310657), UINT64CONST(110180320792531102)}, + {UINT64CONST(10491785653397664129), UINT64CONST(137725400990663877)}, {UINT64CONST(8863209042587234033), UINT64CONST(86078375619164923)}, + {UINT64CONST(6467325284806654637), UINT64CONST(107597969523956154)}, {UINT64CONST(17307528642863094104), UINT64CONST(134497461904945192)}, + {UINT64CONST(10817205401789433815), UINT64CONST(84060913690590745)}, {UINT64CONST(18133192770664180173), UINT64CONST(105076142113238431)}, + {UINT64CONST(18054804944902837312), UINT64CONST(131345177641548039)}, {UINT64CONST(18201782118205355176), UINT64CONST(82090736025967524)}, + {UINT64CONST(4305483574047142354), UINT64CONST(102613420032459406)}, {UINT64CONST(14605226504413703751), UINT64CONST(128266775040574257)}, + {UINT64CONST(2210737537617482988), UINT64CONST(80166734400358911)}, {UINT64CONST(16598479977304017447), UINT64CONST(100208418000448638)}, + {UINT64CONST(11524727934775246001), UINT64CONST(125260522500560798)}, {UINT64CONST(2591268940807140847), UINT64CONST(78287826562850499)}, + {UINT64CONST(17074144231291089770), UINT64CONST(97859783203563123)}, {UINT64CONST(16730994270686474309), UINT64CONST(122324729004453904)}, + {UINT64CONST(10456871419179046443), UINT64CONST(76452955627783690)}, {UINT64CONST(3847717237119032246), UINT64CONST(95566194534729613)}, + {UINT64CONST(9421332564826178211), UINT64CONST(119457743168412016)}, {UINT64CONST(5888332853016361382), UINT64CONST(74661089480257510)}, + {UINT64CONST(16583788103125227536), UINT64CONST(93326361850321887)}, {UINT64CONST(16118049110479146516), UINT64CONST(116657952312902359)}, + {UINT64CONST(16991309721690548428), UINT64CONST(72911220195563974)}, {UINT64CONST(12015765115258409727), UINT64CONST(91139025244454968)}, + {UINT64CONST(15019706394073012159), UINT64CONST(113923781555568710)}, {UINT64CONST(9551260955736489391), UINT64CONST(142404726944460888)}, + {UINT64CONST(5969538097335305869), UINT64CONST(89002954340288055)}, {UINT64CONST(2850236603241744433), UINT64CONST(111253692925360069)} +}; + +#endif /* RYU_D2S_FULL_TABLE_H */ diff --git a/src/common/d2s_intrinsics.h b/src/common/d2s_intrinsics.h new file mode 100644 index 0000000..ae0f28d --- /dev/null +++ b/src/common/d2s_intrinsics.h @@ -0,0 +1,202 @@ +/*--------------------------------------------------------------------------- + * + * Ryu floating-point output for double precision. + * + * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/d2s_intrinsics.h + * + * This is a modification of code taken from github.com/ulfjack/ryu under the + * terms of the Boost license (not the Apache license). The original copyright + * notice follows: + * + * Copyright 2018 Ulf Adams + * + * The contents of this file may be used under the terms of the Apache + * License, Version 2.0. + * + * (See accompanying file LICENSE-Apache or copy at + * http://www.apache.org/licenses/LICENSE-2.0) + * + * Alternatively, the contents of this file may be used under the terms of the + * Boost Software License, Version 1.0. + * + * (See accompanying file LICENSE-Boost or copy at + * https://www.boost.org/LICENSE_1_0.txt) + * + * Unless required by applicable law or agreed to in writing, this software is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. + * + *--------------------------------------------------------------------------- + */ +#ifndef RYU_D2S_INTRINSICS_H +#define RYU_D2S_INTRINSICS_H + +#if defined(HAS_64_BIT_INTRINSICS) + +#include <intrin.h> + +static inline uint64 +umul128(const uint64 a, const uint64 b, uint64 *const productHi) +{ + return _umul128(a, b, productHi); +} + +static inline uint64 +shiftright128(const uint64 lo, const uint64 hi, const uint32 dist) +{ + /* + * For the __shiftright128 intrinsic, the shift value is always modulo 64. + * In the current implementation of the double-precision version of Ryu, + * the shift value is always < 64. (In the case RYU_OPTIMIZE_SIZE == 0, + * the shift value is in the range [49, 58]. Otherwise in the range [2, + * 59].) Check this here in case a future change requires larger shift + * values. In this case this function needs to be adjusted. + */ + Assert(dist < 64); + return __shiftright128(lo, hi, (unsigned char) dist); +} + +#else /* defined(HAS_64_BIT_INTRINSICS) */ + +static inline uint64 +umul128(const uint64 a, const uint64 b, uint64 *const productHi) +{ + /* + * The casts here help MSVC to avoid calls to the __allmul library + * function. + */ + const uint32 aLo = (uint32) a; + const uint32 aHi = (uint32) (a >> 32); + const uint32 bLo = (uint32) b; + const uint32 bHi = (uint32) (b >> 32); + + const uint64 b00 = (uint64) aLo * bLo; + const uint64 b01 = (uint64) aLo * bHi; + const uint64 b10 = (uint64) aHi * bLo; + const uint64 b11 = (uint64) aHi * bHi; + + const uint32 b00Lo = (uint32) b00; + const uint32 b00Hi = (uint32) (b00 >> 32); + + const uint64 mid1 = b10 + b00Hi; + const uint32 mid1Lo = (uint32) (mid1); + const uint32 mid1Hi = (uint32) (mid1 >> 32); + + const uint64 mid2 = b01 + mid1Lo; + const uint32 mid2Lo = (uint32) (mid2); + const uint32 mid2Hi = (uint32) (mid2 >> 32); + + const uint64 pHi = b11 + mid1Hi + mid2Hi; + const uint64 pLo = ((uint64) mid2Lo << 32) + b00Lo; + + *productHi = pHi; + return pLo; +} + +static inline uint64 +shiftright128(const uint64 lo, const uint64 hi, const uint32 dist) +{ + /* We don't need to handle the case dist >= 64 here (see above). */ + Assert(dist < 64); +#if !defined(RYU_32_BIT_PLATFORM) + Assert(dist > 0); + return (hi << (64 - dist)) | (lo >> dist); +#else + /* Avoid a 64-bit shift by taking advantage of the range of shift values. */ + Assert(dist >= 32); + return (hi << (64 - dist)) | ((uint32) (lo >> 32) >> (dist - 32)); +#endif +} + +#endif /* // defined(HAS_64_BIT_INTRINSICS) */ + +#ifdef RYU_32_BIT_PLATFORM + +/* Returns the high 64 bits of the 128-bit product of a and b. */ +static inline uint64 +umulh(const uint64 a, const uint64 b) +{ + /* + * Reuse the umul128 implementation. Optimizers will likely eliminate the + * instructions used to compute the low part of the product. + */ + uint64 hi; + + umul128(a, b, &hi); + return hi; +} + +/*---- + * On 32-bit platforms, compilers typically generate calls to library + * functions for 64-bit divisions, even if the divisor is a constant. + * + * E.g.: + * https://bugs.llvm.org/show_bug.cgi?id=37932 + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=17958 + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37443 + * + * The functions here perform division-by-constant using multiplications + * in the same way as 64-bit compilers would do. + * + * NB: + * The multipliers and shift values are the ones generated by clang x64 + * for expressions like x/5, x/10, etc. + *---- + */ + +static inline uint64 +div5(const uint64 x) +{ + return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 2; +} + +static inline uint64 +div10(const uint64 x) +{ + return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 3; +} + +static inline uint64 +div100(const uint64 x) +{ + return umulh(x >> 2, UINT64CONST(0x28F5C28F5C28F5C3)) >> 2; +} + +static inline uint64 +div1e8(const uint64 x) +{ + return umulh(x, UINT64CONST(0xABCC77118461CEFD)) >> 26; +} + +#else /* RYU_32_BIT_PLATFORM */ + +static inline uint64 +div5(const uint64 x) +{ + return x / 5; +} + +static inline uint64 +div10(const uint64 x) +{ + return x / 10; +} + +static inline uint64 +div100(const uint64 x) +{ + return x / 100; +} + +static inline uint64 +div1e8(const uint64 x) +{ + return x / 100000000; +} + +#endif /* RYU_32_BIT_PLATFORM */ + +#endif /* RYU_D2S_INTRINSICS_H */ diff --git a/src/common/digit_table.h b/src/common/digit_table.h new file mode 100644 index 0000000..483aa17 --- /dev/null +++ b/src/common/digit_table.h @@ -0,0 +1,21 @@ +#ifndef RYU_DIGIT_TABLE_H +#define RYU_DIGIT_TABLE_H + +/* + * A table of all two-digit numbers. This is used to speed up decimal digit + * generation by copying pairs of digits into the final output. + */ +static const char DIGIT_TABLE[200] = { + '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', + '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', + '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', + '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', + '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', + '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', + '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', + '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', + '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', + '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9' +}; + +#endif /* RYU_DIGIT_TABLE_H */ diff --git a/src/common/encnames.c b/src/common/encnames.c new file mode 100644 index 0000000..0412a82 --- /dev/null +++ b/src/common/encnames.c @@ -0,0 +1,598 @@ +/*------------------------------------------------------------------------- + * + * encnames.c + * Encoding names and routines for working with them. + * + * Portions Copyright (c) 2001-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/encnames.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include <ctype.h> +#include <unistd.h> + +#include "mb/pg_wchar.h" + + +/* ---------- + * All encoding names, sorted: *** A L P H A B E T I C *** + * + * All names must be without irrelevant chars, search routines use + * isalnum() chars only. It means ISO-8859-1, iso_8859-1 and Iso8859_1 + * are always converted to 'iso88591'. All must be lower case. + * + * The table doesn't contain 'cs' aliases (like csISOLatin1). It's needed? + * + * Karel Zak, Aug 2001 + * ---------- + */ +typedef struct pg_encname +{ + const char *name; + pg_enc encoding; +} pg_encname; + +static const pg_encname pg_encname_tbl[] = +{ + { + "abc", PG_WIN1258 + }, /* alias for WIN1258 */ + { + "alt", PG_WIN866 + }, /* IBM866 */ + { + "big5", PG_BIG5 + }, /* Big5; Chinese for Taiwan multibyte set */ + { + "euccn", PG_EUC_CN + }, /* EUC-CN; Extended Unix Code for simplified + * Chinese */ + { + "eucjis2004", PG_EUC_JIS_2004 + }, /* EUC-JIS-2004; Extended UNIX Code fixed + * Width for Japanese, standard JIS X 0213 */ + { + "eucjp", PG_EUC_JP + }, /* EUC-JP; Extended UNIX Code fixed Width for + * Japanese, standard OSF */ + { + "euckr", PG_EUC_KR + }, /* EUC-KR; Extended Unix Code for Korean , KS + * X 1001 standard */ + { + "euctw", PG_EUC_TW + }, /* EUC-TW; Extended Unix Code for + * + * traditional Chinese */ + { + "gb18030", PG_GB18030 + }, /* GB18030;GB18030 */ + { + "gbk", PG_GBK + }, /* GBK; Chinese Windows CodePage 936 + * simplified Chinese */ + { + "iso88591", PG_LATIN1 + }, /* ISO-8859-1; RFC1345,KXS2 */ + { + "iso885910", PG_LATIN6 + }, /* ISO-8859-10; RFC1345,KXS2 */ + { + "iso885913", PG_LATIN7 + }, /* ISO-8859-13; RFC1345,KXS2 */ + { + "iso885914", PG_LATIN8 + }, /* ISO-8859-14; RFC1345,KXS2 */ + { + "iso885915", PG_LATIN9 + }, /* ISO-8859-15; RFC1345,KXS2 */ + { + "iso885916", PG_LATIN10 + }, /* ISO-8859-16; RFC1345,KXS2 */ + { + "iso88592", PG_LATIN2 + }, /* ISO-8859-2; RFC1345,KXS2 */ + { + "iso88593", PG_LATIN3 + }, /* ISO-8859-3; RFC1345,KXS2 */ + { + "iso88594", PG_LATIN4 + }, /* ISO-8859-4; RFC1345,KXS2 */ + { + "iso88595", PG_ISO_8859_5 + }, /* ISO-8859-5; RFC1345,KXS2 */ + { + "iso88596", PG_ISO_8859_6 + }, /* ISO-8859-6; RFC1345,KXS2 */ + { + "iso88597", PG_ISO_8859_7 + }, /* ISO-8859-7; RFC1345,KXS2 */ + { + "iso88598", PG_ISO_8859_8 + }, /* ISO-8859-8; RFC1345,KXS2 */ + { + "iso88599", PG_LATIN5 + }, /* ISO-8859-9; RFC1345,KXS2 */ + { + "johab", PG_JOHAB + }, /* JOHAB; Extended Unix Code for simplified + * Chinese */ + { + "koi8", PG_KOI8R + }, /* _dirty_ alias for KOI8-R (backward + * compatibility) */ + { + "koi8r", PG_KOI8R + }, /* KOI8-R; RFC1489 */ + { + "koi8u", PG_KOI8U + }, /* KOI8-U; RFC2319 */ + { + "latin1", PG_LATIN1 + }, /* alias for ISO-8859-1 */ + { + "latin10", PG_LATIN10 + }, /* alias for ISO-8859-16 */ + { + "latin2", PG_LATIN2 + }, /* alias for ISO-8859-2 */ + { + "latin3", PG_LATIN3 + }, /* alias for ISO-8859-3 */ + { + "latin4", PG_LATIN4 + }, /* alias for ISO-8859-4 */ + { + "latin5", PG_LATIN5 + }, /* alias for ISO-8859-9 */ + { + "latin6", PG_LATIN6 + }, /* alias for ISO-8859-10 */ + { + "latin7", PG_LATIN7 + }, /* alias for ISO-8859-13 */ + { + "latin8", PG_LATIN8 + }, /* alias for ISO-8859-14 */ + { + "latin9", PG_LATIN9 + }, /* alias for ISO-8859-15 */ + { + "mskanji", PG_SJIS + }, /* alias for Shift_JIS */ + { + "muleinternal", PG_MULE_INTERNAL + }, + { + "shiftjis", PG_SJIS + }, /* Shift_JIS; JIS X 0202-1991 */ + + { + "shiftjis2004", PG_SHIFT_JIS_2004 + }, /* SHIFT-JIS-2004; Shift JIS for Japanese, + * standard JIS X 0213 */ + { + "sjis", PG_SJIS + }, /* alias for Shift_JIS */ + { + "sqlascii", PG_SQL_ASCII + }, + { + "tcvn", PG_WIN1258 + }, /* alias for WIN1258 */ + { + "tcvn5712", PG_WIN1258 + }, /* alias for WIN1258 */ + { + "uhc", PG_UHC + }, /* UHC; Korean Windows CodePage 949 */ + { + "unicode", PG_UTF8 + }, /* alias for UTF8 */ + { + "utf8", PG_UTF8 + }, /* alias for UTF8 */ + { + "vscii", PG_WIN1258 + }, /* alias for WIN1258 */ + { + "win", PG_WIN1251 + }, /* _dirty_ alias for windows-1251 (backward + * compatibility) */ + { + "win1250", PG_WIN1250 + }, /* alias for Windows-1250 */ + { + "win1251", PG_WIN1251 + }, /* alias for Windows-1251 */ + { + "win1252", PG_WIN1252 + }, /* alias for Windows-1252 */ + { + "win1253", PG_WIN1253 + }, /* alias for Windows-1253 */ + { + "win1254", PG_WIN1254 + }, /* alias for Windows-1254 */ + { + "win1255", PG_WIN1255 + }, /* alias for Windows-1255 */ + { + "win1256", PG_WIN1256 + }, /* alias for Windows-1256 */ + { + "win1257", PG_WIN1257 + }, /* alias for Windows-1257 */ + { + "win1258", PG_WIN1258 + }, /* alias for Windows-1258 */ + { + "win866", PG_WIN866 + }, /* IBM866 */ + { + "win874", PG_WIN874 + }, /* alias for Windows-874 */ + { + "win932", PG_SJIS + }, /* alias for Shift_JIS */ + { + "win936", PG_GBK + }, /* alias for GBK */ + { + "win949", PG_UHC + }, /* alias for UHC */ + { + "win950", PG_BIG5 + }, /* alias for BIG5 */ + { + "windows1250", PG_WIN1250 + }, /* Windows-1251; Microsoft */ + { + "windows1251", PG_WIN1251 + }, /* Windows-1251; Microsoft */ + { + "windows1252", PG_WIN1252 + }, /* Windows-1252; Microsoft */ + { + "windows1253", PG_WIN1253 + }, /* Windows-1253; Microsoft */ + { + "windows1254", PG_WIN1254 + }, /* Windows-1254; Microsoft */ + { + "windows1255", PG_WIN1255 + }, /* Windows-1255; Microsoft */ + { + "windows1256", PG_WIN1256 + }, /* Windows-1256; Microsoft */ + { + "windows1257", PG_WIN1257 + }, /* Windows-1257; Microsoft */ + { + "windows1258", PG_WIN1258 + }, /* Windows-1258; Microsoft */ + { + "windows866", PG_WIN866 + }, /* IBM866 */ + { + "windows874", PG_WIN874 + }, /* Windows-874; Microsoft */ + { + "windows932", PG_SJIS + }, /* alias for Shift_JIS */ + { + "windows936", PG_GBK + }, /* alias for GBK */ + { + "windows949", PG_UHC + }, /* alias for UHC */ + { + "windows950", PG_BIG5 + } /* alias for BIG5 */ +}; + +/* ---------- + * These are "official" encoding names. + * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h) + * ---------- + */ +#ifndef WIN32 +#define DEF_ENC2NAME(name, codepage) { #name, PG_##name } +#else +#define DEF_ENC2NAME(name, codepage) { #name, PG_##name, codepage } +#endif + +const pg_enc2name pg_enc2name_tbl[] = +{ + DEF_ENC2NAME(SQL_ASCII, 0), + DEF_ENC2NAME(EUC_JP, 20932), + DEF_ENC2NAME(EUC_CN, 20936), + DEF_ENC2NAME(EUC_KR, 51949), + DEF_ENC2NAME(EUC_TW, 0), + DEF_ENC2NAME(EUC_JIS_2004, 20932), + DEF_ENC2NAME(UTF8, 65001), + DEF_ENC2NAME(MULE_INTERNAL, 0), + DEF_ENC2NAME(LATIN1, 28591), + DEF_ENC2NAME(LATIN2, 28592), + DEF_ENC2NAME(LATIN3, 28593), + DEF_ENC2NAME(LATIN4, 28594), + DEF_ENC2NAME(LATIN5, 28599), + DEF_ENC2NAME(LATIN6, 0), + DEF_ENC2NAME(LATIN7, 0), + DEF_ENC2NAME(LATIN8, 0), + DEF_ENC2NAME(LATIN9, 28605), + DEF_ENC2NAME(LATIN10, 0), + DEF_ENC2NAME(WIN1256, 1256), + DEF_ENC2NAME(WIN1258, 1258), + DEF_ENC2NAME(WIN866, 866), + DEF_ENC2NAME(WIN874, 874), + DEF_ENC2NAME(KOI8R, 20866), + DEF_ENC2NAME(WIN1251, 1251), + DEF_ENC2NAME(WIN1252, 1252), + DEF_ENC2NAME(ISO_8859_5, 28595), + DEF_ENC2NAME(ISO_8859_6, 28596), + DEF_ENC2NAME(ISO_8859_7, 28597), + DEF_ENC2NAME(ISO_8859_8, 28598), + DEF_ENC2NAME(WIN1250, 1250), + DEF_ENC2NAME(WIN1253, 1253), + DEF_ENC2NAME(WIN1254, 1254), + DEF_ENC2NAME(WIN1255, 1255), + DEF_ENC2NAME(WIN1257, 1257), + DEF_ENC2NAME(KOI8U, 21866), + DEF_ENC2NAME(SJIS, 932), + DEF_ENC2NAME(BIG5, 950), + DEF_ENC2NAME(GBK, 936), + DEF_ENC2NAME(UHC, 949), + DEF_ENC2NAME(GB18030, 54936), + DEF_ENC2NAME(JOHAB, 0), + DEF_ENC2NAME(SHIFT_JIS_2004, 932) +}; + +/* ---------- + * These are encoding names for gettext. + * + * This covers all encodings except MULE_INTERNAL, which is alien to gettext. + * ---------- + */ +const pg_enc2gettext pg_enc2gettext_tbl[] = +{ + {PG_SQL_ASCII, "US-ASCII"}, + {PG_UTF8, "UTF-8"}, + {PG_LATIN1, "LATIN1"}, + {PG_LATIN2, "LATIN2"}, + {PG_LATIN3, "LATIN3"}, + {PG_LATIN4, "LATIN4"}, + {PG_ISO_8859_5, "ISO-8859-5"}, + {PG_ISO_8859_6, "ISO_8859-6"}, + {PG_ISO_8859_7, "ISO-8859-7"}, + {PG_ISO_8859_8, "ISO-8859-8"}, + {PG_LATIN5, "LATIN5"}, + {PG_LATIN6, "LATIN6"}, + {PG_LATIN7, "LATIN7"}, + {PG_LATIN8, "LATIN8"}, + {PG_LATIN9, "LATIN-9"}, + {PG_LATIN10, "LATIN10"}, + {PG_KOI8R, "KOI8-R"}, + {PG_KOI8U, "KOI8-U"}, + {PG_WIN1250, "CP1250"}, + {PG_WIN1251, "CP1251"}, + {PG_WIN1252, "CP1252"}, + {PG_WIN1253, "CP1253"}, + {PG_WIN1254, "CP1254"}, + {PG_WIN1255, "CP1255"}, + {PG_WIN1256, "CP1256"}, + {PG_WIN1257, "CP1257"}, + {PG_WIN1258, "CP1258"}, + {PG_WIN866, "CP866"}, + {PG_WIN874, "CP874"}, + {PG_EUC_CN, "EUC-CN"}, + {PG_EUC_JP, "EUC-JP"}, + {PG_EUC_KR, "EUC-KR"}, + {PG_EUC_TW, "EUC-TW"}, + {PG_EUC_JIS_2004, "EUC-JP"}, + {PG_SJIS, "SHIFT-JIS"}, + {PG_BIG5, "BIG5"}, + {PG_GBK, "GBK"}, + {PG_UHC, "UHC"}, + {PG_GB18030, "GB18030"}, + {PG_JOHAB, "JOHAB"}, + {PG_SHIFT_JIS_2004, "SHIFT_JISX0213"}, + {0, NULL} +}; + + +/* + * Table of encoding names for ICU (currently covers backend encodings only) + * + * Reference: <https://ssl.icu-project.org/icu-bin/convexp> + * + * NULL entries are not supported by ICU, or their mapping is unclear. + */ +static const char *const pg_enc2icu_tbl[] = +{ + NULL, /* PG_SQL_ASCII */ + "EUC-JP", /* PG_EUC_JP */ + "EUC-CN", /* PG_EUC_CN */ + "EUC-KR", /* PG_EUC_KR */ + "EUC-TW", /* PG_EUC_TW */ + NULL, /* PG_EUC_JIS_2004 */ + "UTF-8", /* PG_UTF8 */ + NULL, /* PG_MULE_INTERNAL */ + "ISO-8859-1", /* PG_LATIN1 */ + "ISO-8859-2", /* PG_LATIN2 */ + "ISO-8859-3", /* PG_LATIN3 */ + "ISO-8859-4", /* PG_LATIN4 */ + "ISO-8859-9", /* PG_LATIN5 */ + "ISO-8859-10", /* PG_LATIN6 */ + "ISO-8859-13", /* PG_LATIN7 */ + "ISO-8859-14", /* PG_LATIN8 */ + "ISO-8859-15", /* PG_LATIN9 */ + NULL, /* PG_LATIN10 */ + "CP1256", /* PG_WIN1256 */ + "CP1258", /* PG_WIN1258 */ + "CP866", /* PG_WIN866 */ + NULL, /* PG_WIN874 */ + "KOI8-R", /* PG_KOI8R */ + "CP1251", /* PG_WIN1251 */ + "CP1252", /* PG_WIN1252 */ + "ISO-8859-5", /* PG_ISO_8859_5 */ + "ISO-8859-6", /* PG_ISO_8859_6 */ + "ISO-8859-7", /* PG_ISO_8859_7 */ + "ISO-8859-8", /* PG_ISO_8859_8 */ + "CP1250", /* PG_WIN1250 */ + "CP1253", /* PG_WIN1253 */ + "CP1254", /* PG_WIN1254 */ + "CP1255", /* PG_WIN1255 */ + "CP1257", /* PG_WIN1257 */ + "KOI8-U", /* PG_KOI8U */ +}; + +StaticAssertDecl(lengthof(pg_enc2icu_tbl) == PG_ENCODING_BE_LAST + 1, + "pg_enc2icu_tbl incomplete"); + + +/* + * Is this encoding supported by ICU? + */ +bool +is_encoding_supported_by_icu(int encoding) +{ + if (!PG_VALID_BE_ENCODING(encoding)) + return false; + return (pg_enc2icu_tbl[encoding] != NULL); +} + +/* + * Returns ICU's name for encoding, or NULL if not supported + */ +const char * +get_encoding_name_for_icu(int encoding) +{ + if (!PG_VALID_BE_ENCODING(encoding)) + return NULL; + return pg_enc2icu_tbl[encoding]; +} + + +/* ---------- + * Encoding checks, for error returns -1 else encoding id + * ---------- + */ +int +pg_valid_client_encoding(const char *name) +{ + int enc; + + if ((enc = pg_char_to_encoding(name)) < 0) + return -1; + + if (!PG_VALID_FE_ENCODING(enc)) + return -1; + + return enc; +} + +int +pg_valid_server_encoding(const char *name) +{ + int enc; + + if ((enc = pg_char_to_encoding(name)) < 0) + return -1; + + if (!PG_VALID_BE_ENCODING(enc)) + return -1; + + return enc; +} + +int +pg_valid_server_encoding_id(int encoding) +{ + return PG_VALID_BE_ENCODING(encoding); +} + +/* + * Remove irrelevant chars from encoding name, store at *newkey + * + * (Caller's responsibility to provide a large enough buffer) + */ +static char * +clean_encoding_name(const char *key, char *newkey) +{ + const char *p; + char *np; + + for (p = key, np = newkey; *p != '\0'; p++) + { + if (isalnum((unsigned char) *p)) + { + if (*p >= 'A' && *p <= 'Z') + *np++ = *p + 'a' - 'A'; + else + *np++ = *p; + } + } + *np = '\0'; + return newkey; +} + +/* + * Search encoding by encoding name + * + * Returns encoding ID, or -1 if not recognized + */ +int +pg_char_to_encoding(const char *name) +{ + unsigned int nel = lengthof(pg_encname_tbl); + const pg_encname *base = pg_encname_tbl, + *last = base + nel - 1, + *position; + int result; + char buff[NAMEDATALEN], + *key; + + if (name == NULL || *name == '\0') + return -1; + + if (strlen(name) >= NAMEDATALEN) + return -1; /* it's certainly not in the table */ + + key = clean_encoding_name(name, buff); + + while (last >= base) + { + position = base + ((last - base) >> 1); + result = key[0] - position->name[0]; + + if (result == 0) + { + result = strcmp(key, position->name); + if (result == 0) + return position->encoding; + } + if (result < 0) + last = position - 1; + else + base = position + 1; + } + return -1; +} + +const char * +pg_encoding_to_char(int encoding) +{ + if (PG_VALID_ENCODING(encoding)) + { + const pg_enc2name *p = &pg_enc2name_tbl[encoding]; + + Assert(encoding == p->encoding); + return p->name; + } + return ""; +} diff --git a/src/common/exec.c b/src/common/exec.c new file mode 100644 index 0000000..f209b93 --- /dev/null +++ b/src/common/exec.c @@ -0,0 +1,719 @@ +/*------------------------------------------------------------------------- + * + * exec.c + * Functions for finding and validating executable files + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/exec.c + * + *------------------------------------------------------------------------- + */ + +/* + * On macOS, "man realpath" avers: + * Defining _DARWIN_C_SOURCE or _DARWIN_BETTER_REALPATH before including + * stdlib.h will cause the provided implementation of realpath() to use + * F_GETPATH from fcntl(2) to discover the path. + * This should be harmless everywhere else. + */ +#define _DARWIN_BETTER_REALPATH + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <signal.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <unistd.h> + +#ifdef EXEC_BACKEND +#if defined(HAVE_SYS_PERSONALITY_H) +#include <sys/personality.h> +#elif defined(HAVE_SYS_PROCCTL_H) +#include <sys/procctl.h> +#endif +#endif + +/* Inhibit mingw CRT's auto-globbing of command line arguments */ +#if defined(WIN32) && !defined(_MSC_VER) +extern int _CRT_glob = 0; /* 0 turns off globbing; 1 turns it on */ +#endif + +/* + * Hacky solution to allow expressing both frontend and backend error reports + * in one macro call. First argument of log_error is an errcode() call of + * some sort (ignored if FRONTEND); the rest are errmsg_internal() arguments, + * i.e. message string and any parameters for it. + * + * Caller must provide the gettext wrapper around the message string, if + * appropriate, so that it gets translated in the FRONTEND case; this + * motivates using errmsg_internal() not errmsg(). We handle appending a + * newline, if needed, inside the macro, so that there's only one translatable + * string per call not two. + */ +#ifndef FRONTEND +#define log_error(errcodefn, ...) \ + ereport(LOG, (errcodefn, errmsg_internal(__VA_ARGS__))) +#else +#define log_error(errcodefn, ...) \ + (fprintf(stderr, __VA_ARGS__), fputc('\n', stderr)) +#endif + +static int normalize_exec_path(char *path); +static char *pg_realpath(const char *fname); + +#ifdef WIN32 +static BOOL GetTokenUser(HANDLE hToken, PTOKEN_USER *ppTokenUser); +#endif + +/* + * validate_exec -- validate "path" as an executable file + * + * returns 0 if the file is found and no error is encountered. + * -1 if the regular file "path" does not exist or cannot be executed. + * -2 if the file is otherwise valid but cannot be read. + * in the failure cases, errno is set appropriately + */ +int +validate_exec(const char *path) +{ + struct stat buf; + int is_r; + int is_x; + +#ifdef WIN32 + char path_exe[MAXPGPATH + sizeof(".exe") - 1]; + + /* Win32 requires a .exe suffix for stat() */ + if (strlen(path) < strlen(".exe") || + pg_strcasecmp(path + strlen(path) - strlen(".exe"), ".exe") != 0) + { + strlcpy(path_exe, path, sizeof(path_exe) - 4); + strcat(path_exe, ".exe"); + path = path_exe; + } +#endif + + /* + * Ensure that the file exists and is a regular file. + * + * XXX if you have a broken system where stat() looks at the symlink + * instead of the underlying file, you lose. + */ + if (stat(path, &buf) < 0) + return -1; + + if (!S_ISREG(buf.st_mode)) + { + /* + * POSIX offers no errno code that's simply "not a regular file". If + * it's a directory we can use EISDIR. Otherwise, it's most likely a + * device special file, and EPERM (Operation not permitted) isn't too + * horribly off base. + */ + errno = S_ISDIR(buf.st_mode) ? EISDIR : EPERM; + return -1; + } + + /* + * Ensure that the file is both executable and readable (required for + * dynamic loading). + */ +#ifndef WIN32 + is_r = (access(path, R_OK) == 0); + is_x = (access(path, X_OK) == 0); + /* access() will set errno if it returns -1 */ +#else + is_r = buf.st_mode & S_IRUSR; + is_x = buf.st_mode & S_IXUSR; + errno = EACCES; /* appropriate thing if we return nonzero */ +#endif + return is_x ? (is_r ? 0 : -2) : -1; +} + + +/* + * find_my_exec -- find an absolute path to this program's executable + * + * argv0 is the name passed on the command line + * retpath is the output area (must be of size MAXPGPATH) + * Returns 0 if OK, -1 if error. + * + * The reason we have to work so hard to find an absolute path is that + * on some platforms we can't do dynamic loading unless we know the + * executable's location. Also, we need an absolute path not a relative + * path because we may later change working directory. Finally, we want + * a true path not a symlink location, so that we can locate other files + * that are part of our installation relative to the executable. + */ +int +find_my_exec(const char *argv0, char *retpath) +{ + char *path; + + /* + * If argv0 contains a separator, then PATH wasn't used. + */ + strlcpy(retpath, argv0, MAXPGPATH); + if (first_dir_separator(retpath) != NULL) + { + if (validate_exec(retpath) == 0) + return normalize_exec_path(retpath); + + log_error(errcode(ERRCODE_WRONG_OBJECT_TYPE), + _("invalid binary \"%s\": %m"), retpath); + return -1; + } + +#ifdef WIN32 + /* Win32 checks the current directory first for names without slashes */ + if (validate_exec(retpath) == 0) + return normalize_exec_path(retpath); +#endif + + /* + * Since no explicit path was supplied, the user must have been relying on + * PATH. We'll search the same PATH. + */ + if ((path = getenv("PATH")) && *path) + { + char *startp = NULL, + *endp = NULL; + + do + { + if (!startp) + startp = path; + else + startp = endp + 1; + + endp = first_path_var_separator(startp); + if (!endp) + endp = startp + strlen(startp); /* point to end */ + + strlcpy(retpath, startp, Min(endp - startp + 1, MAXPGPATH)); + + join_path_components(retpath, retpath, argv0); + canonicalize_path(retpath); + + switch (validate_exec(retpath)) + { + case 0: /* found ok */ + return normalize_exec_path(retpath); + case -1: /* wasn't even a candidate, keep looking */ + break; + case -2: /* found but disqualified */ + log_error(errcode(ERRCODE_WRONG_OBJECT_TYPE), + _("could not read binary \"%s\": %m"), + retpath); + break; + } + } while (*endp); + } + + log_error(errcode(ERRCODE_UNDEFINED_FILE), + _("could not find a \"%s\" to execute"), argv0); + return -1; +} + + +/* + * normalize_exec_path - resolve symlinks and convert to absolute path + * + * Given a path that refers to an executable, chase through any symlinks + * to find the real file location; then convert that to an absolute path. + * + * On success, replaces the contents of "path" with the absolute path. + * ("path" is assumed to be of size MAXPGPATH.) + * Returns 0 if OK, -1 if error. + */ +static int +normalize_exec_path(char *path) +{ + /* + * We used to do a lot of work ourselves here, but now we just let + * realpath(3) do all the heavy lifting. + */ + char *abspath = pg_realpath(path); + + if (abspath == NULL) + { + log_error(errcode_for_file_access(), + _("could not resolve path \"%s\" to absolute form: %m"), + path); + return -1; + } + strlcpy(path, abspath, MAXPGPATH); + free(abspath); + +#ifdef WIN32 + /* On Windows, be sure to convert '\' to '/' */ + canonicalize_path(path); +#endif + + return 0; +} + + +/* + * pg_realpath() - realpath(3) with POSIX.1-2008 semantics + * + * This is equivalent to realpath(fname, NULL), in that it returns a + * malloc'd buffer containing the absolute path equivalent to fname. + * On error, returns NULL with errno set. + * + * On Windows, what you get is spelled per platform conventions, + * so you probably want to apply canonicalize_path() to the result. + * + * For now, this is needed only here so mark it static. If you choose to + * move it into its own file, move the _DARWIN_BETTER_REALPATH #define too! + */ +static char * +pg_realpath(const char *fname) +{ + char *path; + +#ifndef WIN32 + path = realpath(fname, NULL); + if (path == NULL && errno == EINVAL) + { + /* + * Cope with old-POSIX systems that require a user-provided buffer. + * Assume MAXPGPATH is enough room on all such systems. + */ + char *buf = malloc(MAXPGPATH); + + if (buf == NULL) + return NULL; /* assume errno is set */ + path = realpath(fname, buf); + if (path == NULL) /* don't leak memory */ + { + int save_errno = errno; + + free(buf); + errno = save_errno; + } + } +#else /* WIN32 */ + + /* + * Microsoft is resolutely non-POSIX, but _fullpath() does the same thing. + * The documentation claims it reports errors by setting errno, which is a + * bit surprising for Microsoft, but we'll believe that until it's proven + * wrong. Clear errno first, though, so we can at least tell if a failure + * occurs and doesn't set it. + */ + errno = 0; + path = _fullpath(NULL, fname, 0); +#endif + + return path; +} + + +/* + * Find another program in our binary's directory, + * then make sure it is the proper version. + */ +int +find_other_exec(const char *argv0, const char *target, + const char *versionstr, char *retpath) +{ + char cmd[MAXPGPATH]; + char line[MAXPGPATH]; + + if (find_my_exec(argv0, retpath) < 0) + return -1; + + /* Trim off program name and keep just directory */ + *last_dir_separator(retpath) = '\0'; + canonicalize_path(retpath); + + /* Now append the other program's name */ + snprintf(retpath + strlen(retpath), MAXPGPATH - strlen(retpath), + "/%s%s", target, EXE); + + if (validate_exec(retpath) != 0) + return -1; + + snprintf(cmd, sizeof(cmd), "\"%s\" -V", retpath); + + if (!pipe_read_line(cmd, line, sizeof(line))) + return -1; + + if (strcmp(line, versionstr) != 0) + return -2; + + return 0; +} + + +/* + * Execute a command in a pipe and read the first line from it. + */ +char * +pipe_read_line(char *cmd, char *line, int maxsize) +{ + FILE *pgver; + + fflush(NULL); + + errno = 0; + if ((pgver = popen(cmd, "r")) == NULL) + { + perror("popen failure"); + return NULL; + } + + errno = 0; + if (fgets(line, maxsize, pgver) == NULL) + { + if (feof(pgver)) + fprintf(stderr, "no data was returned by command \"%s\"\n", cmd); + else + perror("fgets failure"); + pclose(pgver); /* no error checking */ + return NULL; + } + + if (pclose_check(pgver)) + return NULL; + + return line; +} + + +/* + * pclose() plus useful error reporting + */ +int +pclose_check(FILE *stream) +{ + int exitstatus; + char *reason; + + exitstatus = pclose(stream); + + if (exitstatus == 0) + return 0; /* all is well */ + + if (exitstatus == -1) + { + /* pclose() itself failed, and hopefully set errno */ + log_error(errcode(ERRCODE_SYSTEM_ERROR), + _("%s() failed: %m"), "pclose"); + } + else + { + reason = wait_result_to_str(exitstatus); + log_error(errcode(ERRCODE_SYSTEM_ERROR), + "%s", reason); + pfree(reason); + } + return exitstatus; +} + +/* + * set_pglocale_pgservice + * + * Set application-specific locale and service directory + * + * This function takes the value of argv[0] rather than a full path. + * + * (You may be wondering why this is in exec.c. It requires this module's + * services and doesn't introduce any new dependencies, so this seems as + * good as anyplace.) + */ +void +set_pglocale_pgservice(const char *argv0, const char *app) +{ + char path[MAXPGPATH]; + char my_exec_path[MAXPGPATH]; + + /* don't set LC_ALL in the backend */ + if (strcmp(app, PG_TEXTDOMAIN("postgres")) != 0) + { + setlocale(LC_ALL, ""); + + /* + * One could make a case for reproducing here PostmasterMain()'s test + * for whether the process is multithreaded. Unlike the postmaster, + * no frontend program calls sigprocmask() or otherwise provides for + * mutual exclusion between signal handlers. While frontends using + * fork(), if multithreaded, are formally exposed to undefined + * behavior, we have not witnessed a concrete bug. Therefore, + * complaining about multithreading here may be mere pedantry. + */ + } + + if (find_my_exec(argv0, my_exec_path) < 0) + return; + +#ifdef ENABLE_NLS + get_locale_path(my_exec_path, path); + bindtextdomain(app, path); + textdomain(app); + /* set for libpq to use, but don't override existing setting */ + setenv("PGLOCALEDIR", path, 0); +#endif + + if (getenv("PGSYSCONFDIR") == NULL) + { + get_etc_path(my_exec_path, path); + /* set for libpq to use */ + setenv("PGSYSCONFDIR", path, 0); + } +} + +#ifdef EXEC_BACKEND +/* + * For the benefit of PostgreSQL developers testing EXEC_BACKEND on Unix + * systems (code paths normally exercised only on Windows), provide a way to + * disable address space layout randomization, if we know how on this platform. + * Otherwise, backends may fail to attach to shared memory at the fixed address + * chosen by the postmaster. (See also the macOS-specific hack in + * sysv_shmem.c.) + */ +int +pg_disable_aslr(void) +{ +#if defined(HAVE_SYS_PERSONALITY_H) + return personality(ADDR_NO_RANDOMIZE); +#elif defined(HAVE_SYS_PROCCTL_H) && defined(PROC_ASLR_FORCE_DISABLE) + int data = PROC_ASLR_FORCE_DISABLE; + + return procctl(P_PID, 0, PROC_ASLR_CTL, &data); +#else + errno = ENOSYS; + return -1; +#endif +} +#endif + +#ifdef WIN32 + +/* + * AddUserToTokenDacl(HANDLE hToken) + * + * This function adds the current user account to the restricted + * token used when we create a restricted process. + * + * This is required because of some security changes in Windows + * that appeared in patches to XP/2K3 and in Vista/2008. + * + * On these machines, the Administrator account is not included in + * the default DACL - you just get Administrators + System. For + * regular users you get User + System. Because we strip Administrators + * when we create the restricted token, we are left with only System + * in the DACL which leads to access denied errors for later CreatePipe() + * and CreateProcess() calls when running as Administrator. + * + * This function fixes this problem by modifying the DACL of the + * token the process will use, and explicitly re-adding the current + * user account. This is still secure because the Administrator account + * inherits its privileges from the Administrators group - it doesn't + * have any of its own. + */ +BOOL +AddUserToTokenDacl(HANDLE hToken) +{ + int i; + ACL_SIZE_INFORMATION asi; + ACCESS_ALLOWED_ACE *pace; + DWORD dwNewAclSize; + DWORD dwSize = 0; + DWORD dwTokenInfoLength = 0; + PACL pacl = NULL; + PTOKEN_USER pTokenUser = NULL; + TOKEN_DEFAULT_DACL tddNew; + TOKEN_DEFAULT_DACL *ptdd = NULL; + TOKEN_INFORMATION_CLASS tic = TokenDefaultDacl; + BOOL ret = FALSE; + + /* Figure out the buffer size for the DACL info */ + if (!GetTokenInformation(hToken, tic, (LPVOID) NULL, dwTokenInfoLength, &dwSize)) + { + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) + { + ptdd = (TOKEN_DEFAULT_DACL *) LocalAlloc(LPTR, dwSize); + if (ptdd == NULL) + { + log_error(errcode(ERRCODE_OUT_OF_MEMORY), + _("out of memory")); + goto cleanup; + } + + if (!GetTokenInformation(hToken, tic, (LPVOID) ptdd, dwSize, &dwSize)) + { + log_error(errcode(ERRCODE_SYSTEM_ERROR), + "could not get token information: error code %lu", + GetLastError()); + goto cleanup; + } + } + else + { + log_error(errcode(ERRCODE_SYSTEM_ERROR), + "could not get token information buffer size: error code %lu", + GetLastError()); + goto cleanup; + } + } + + /* Get the ACL info */ + if (!GetAclInformation(ptdd->DefaultDacl, (LPVOID) &asi, + (DWORD) sizeof(ACL_SIZE_INFORMATION), + AclSizeInformation)) + { + log_error(errcode(ERRCODE_SYSTEM_ERROR), + "could not get ACL information: error code %lu", + GetLastError()); + goto cleanup; + } + + /* Get the current user SID */ + if (!GetTokenUser(hToken, &pTokenUser)) + goto cleanup; /* callee printed a message */ + + /* Figure out the size of the new ACL */ + dwNewAclSize = asi.AclBytesInUse + sizeof(ACCESS_ALLOWED_ACE) + + GetLengthSid(pTokenUser->User.Sid) - sizeof(DWORD); + + /* Allocate the ACL buffer & initialize it */ + pacl = (PACL) LocalAlloc(LPTR, dwNewAclSize); + if (pacl == NULL) + { + log_error(errcode(ERRCODE_OUT_OF_MEMORY), + _("out of memory")); + goto cleanup; + } + + if (!InitializeAcl(pacl, dwNewAclSize, ACL_REVISION)) + { + log_error(errcode(ERRCODE_SYSTEM_ERROR), + "could not initialize ACL: error code %lu", GetLastError()); + goto cleanup; + } + + /* Loop through the existing ACEs, and build the new ACL */ + for (i = 0; i < (int) asi.AceCount; i++) + { + if (!GetAce(ptdd->DefaultDacl, i, (LPVOID *) &pace)) + { + log_error(errcode(ERRCODE_SYSTEM_ERROR), + "could not get ACE: error code %lu", GetLastError()); + goto cleanup; + } + + if (!AddAce(pacl, ACL_REVISION, MAXDWORD, pace, ((PACE_HEADER) pace)->AceSize)) + { + log_error(errcode(ERRCODE_SYSTEM_ERROR), + "could not add ACE: error code %lu", GetLastError()); + goto cleanup; + } + } + + /* Add the new ACE for the current user */ + if (!AddAccessAllowedAceEx(pacl, ACL_REVISION, OBJECT_INHERIT_ACE, GENERIC_ALL, pTokenUser->User.Sid)) + { + log_error(errcode(ERRCODE_SYSTEM_ERROR), + "could not add access allowed ACE: error code %lu", + GetLastError()); + goto cleanup; + } + + /* Set the new DACL in the token */ + tddNew.DefaultDacl = pacl; + + if (!SetTokenInformation(hToken, tic, (LPVOID) &tddNew, dwNewAclSize)) + { + log_error(errcode(ERRCODE_SYSTEM_ERROR), + "could not set token information: error code %lu", + GetLastError()); + goto cleanup; + } + + ret = TRUE; + +cleanup: + if (pTokenUser) + LocalFree((HLOCAL) pTokenUser); + + if (pacl) + LocalFree((HLOCAL) pacl); + + if (ptdd) + LocalFree((HLOCAL) ptdd); + + return ret; +} + +/* + * GetTokenUser(HANDLE hToken, PTOKEN_USER *ppTokenUser) + * + * Get the users token information from a process token. + * + * The caller of this function is responsible for calling LocalFree() on the + * returned TOKEN_USER memory. + */ +static BOOL +GetTokenUser(HANDLE hToken, PTOKEN_USER *ppTokenUser) +{ + DWORD dwLength; + + *ppTokenUser = NULL; + + if (!GetTokenInformation(hToken, + TokenUser, + NULL, + 0, + &dwLength)) + { + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) + { + *ppTokenUser = (PTOKEN_USER) LocalAlloc(LPTR, dwLength); + + if (*ppTokenUser == NULL) + { + log_error(errcode(ERRCODE_OUT_OF_MEMORY), + _("out of memory")); + return FALSE; + } + } + else + { + log_error(errcode(ERRCODE_SYSTEM_ERROR), + "could not get token information buffer size: error code %lu", + GetLastError()); + return FALSE; + } + } + + if (!GetTokenInformation(hToken, + TokenUser, + *ppTokenUser, + dwLength, + &dwLength)) + { + LocalFree(*ppTokenUser); + *ppTokenUser = NULL; + + log_error(errcode(ERRCODE_SYSTEM_ERROR), + "could not get token information: error code %lu", + GetLastError()); + return FALSE; + } + + /* Memory in *ppTokenUser is LocalFree():d by the caller */ + return TRUE; +} + +#endif diff --git a/src/common/f2s.c b/src/common/f2s.c new file mode 100644 index 0000000..ba08dcb --- /dev/null +++ b/src/common/f2s.c @@ -0,0 +1,803 @@ +/*--------------------------------------------------------------------------- + * + * Ryu floating-point output for single precision. + * + * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/f2s.c + * + * This is a modification of code taken from github.com/ulfjack/ryu under the + * terms of the Boost license (not the Apache license). The original copyright + * notice follows: + * + * Copyright 2018 Ulf Adams + * + * The contents of this file may be used under the terms of the Apache + * License, Version 2.0. + * + * (See accompanying file LICENSE-Apache or copy at + * http://www.apache.org/licenses/LICENSE-2.0) + * + * Alternatively, the contents of this file may be used under the terms of the + * Boost Software License, Version 1.0. + * + * (See accompanying file LICENSE-Boost or copy at + * https://www.boost.org/LICENSE_1_0.txt) + * + * Unless required by applicable law or agreed to in writing, this software is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. + * + *--------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/shortest_dec.h" +#include "digit_table.h" +#include "ryu_common.h" + +#define FLOAT_MANTISSA_BITS 23 +#define FLOAT_EXPONENT_BITS 8 +#define FLOAT_BIAS 127 + +/* + * This table is generated (by the upstream) by PrintFloatLookupTable, + * and modified (by us) to add UINT64CONST. + */ +#define FLOAT_POW5_INV_BITCOUNT 59 +static const uint64 FLOAT_POW5_INV_SPLIT[31] = { + UINT64CONST(576460752303423489), UINT64CONST(461168601842738791), UINT64CONST(368934881474191033), UINT64CONST(295147905179352826), + UINT64CONST(472236648286964522), UINT64CONST(377789318629571618), UINT64CONST(302231454903657294), UINT64CONST(483570327845851670), + UINT64CONST(386856262276681336), UINT64CONST(309485009821345069), UINT64CONST(495176015714152110), UINT64CONST(396140812571321688), + UINT64CONST(316912650057057351), UINT64CONST(507060240091291761), UINT64CONST(405648192073033409), UINT64CONST(324518553658426727), + UINT64CONST(519229685853482763), UINT64CONST(415383748682786211), UINT64CONST(332306998946228969), UINT64CONST(531691198313966350), + UINT64CONST(425352958651173080), UINT64CONST(340282366920938464), UINT64CONST(544451787073501542), UINT64CONST(435561429658801234), + UINT64CONST(348449143727040987), UINT64CONST(557518629963265579), UINT64CONST(446014903970612463), UINT64CONST(356811923176489971), + UINT64CONST(570899077082383953), UINT64CONST(456719261665907162), UINT64CONST(365375409332725730) +}; +#define FLOAT_POW5_BITCOUNT 61 +static const uint64 FLOAT_POW5_SPLIT[47] = { + UINT64CONST(1152921504606846976), UINT64CONST(1441151880758558720), UINT64CONST(1801439850948198400), UINT64CONST(2251799813685248000), + UINT64CONST(1407374883553280000), UINT64CONST(1759218604441600000), UINT64CONST(2199023255552000000), UINT64CONST(1374389534720000000), + UINT64CONST(1717986918400000000), UINT64CONST(2147483648000000000), UINT64CONST(1342177280000000000), UINT64CONST(1677721600000000000), + UINT64CONST(2097152000000000000), UINT64CONST(1310720000000000000), UINT64CONST(1638400000000000000), UINT64CONST(2048000000000000000), + UINT64CONST(1280000000000000000), UINT64CONST(1600000000000000000), UINT64CONST(2000000000000000000), UINT64CONST(1250000000000000000), + UINT64CONST(1562500000000000000), UINT64CONST(1953125000000000000), UINT64CONST(1220703125000000000), UINT64CONST(1525878906250000000), + UINT64CONST(1907348632812500000), UINT64CONST(1192092895507812500), UINT64CONST(1490116119384765625), UINT64CONST(1862645149230957031), + UINT64CONST(1164153218269348144), UINT64CONST(1455191522836685180), UINT64CONST(1818989403545856475), UINT64CONST(2273736754432320594), + UINT64CONST(1421085471520200371), UINT64CONST(1776356839400250464), UINT64CONST(2220446049250313080), UINT64CONST(1387778780781445675), + UINT64CONST(1734723475976807094), UINT64CONST(2168404344971008868), UINT64CONST(1355252715606880542), UINT64CONST(1694065894508600678), + UINT64CONST(2117582368135750847), UINT64CONST(1323488980084844279), UINT64CONST(1654361225106055349), UINT64CONST(2067951531382569187), + UINT64CONST(1292469707114105741), UINT64CONST(1615587133892632177), UINT64CONST(2019483917365790221) +}; + +static inline uint32 +pow5Factor(uint32 value) +{ + uint32 count = 0; + + for (;;) + { + Assert(value != 0); + const uint32 q = value / 5; + const uint32 r = value % 5; + + if (r != 0) + break; + + value = q; + ++count; + } + return count; +} + +/* Returns true if value is divisible by 5^p. */ +static inline bool +multipleOfPowerOf5(const uint32 value, const uint32 p) +{ + return pow5Factor(value) >= p; +} + +/* Returns true if value is divisible by 2^p. */ +static inline bool +multipleOfPowerOf2(const uint32 value, const uint32 p) +{ + /* return __builtin_ctz(value) >= p; */ + return (value & ((1u << p) - 1)) == 0; +} + +/* + * It seems to be slightly faster to avoid uint128_t here, although the + * generated code for uint128_t looks slightly nicer. + */ +static inline uint32 +mulShift(const uint32 m, const uint64 factor, const int32 shift) +{ + /* + * The casts here help MSVC to avoid calls to the __allmul library + * function. + */ + const uint32 factorLo = (uint32) (factor); + const uint32 factorHi = (uint32) (factor >> 32); + const uint64 bits0 = (uint64) m * factorLo; + const uint64 bits1 = (uint64) m * factorHi; + + Assert(shift > 32); + +#ifdef RYU_32_BIT_PLATFORM + + /* + * On 32-bit platforms we can avoid a 64-bit shift-right since we only + * need the upper 32 bits of the result and the shift value is > 32. + */ + const uint32 bits0Hi = (uint32) (bits0 >> 32); + uint32 bits1Lo = (uint32) (bits1); + uint32 bits1Hi = (uint32) (bits1 >> 32); + + bits1Lo += bits0Hi; + bits1Hi += (bits1Lo < bits0Hi); + + const int32 s = shift - 32; + + return (bits1Hi << (32 - s)) | (bits1Lo >> s); + +#else /* RYU_32_BIT_PLATFORM */ + + const uint64 sum = (bits0 >> 32) + bits1; + const uint64 shiftedSum = sum >> (shift - 32); + + Assert(shiftedSum <= PG_UINT32_MAX); + return (uint32) shiftedSum; + +#endif /* RYU_32_BIT_PLATFORM */ +} + +static inline uint32 +mulPow5InvDivPow2(const uint32 m, const uint32 q, const int32 j) +{ + return mulShift(m, FLOAT_POW5_INV_SPLIT[q], j); +} + +static inline uint32 +mulPow5divPow2(const uint32 m, const uint32 i, const int32 j) +{ + return mulShift(m, FLOAT_POW5_SPLIT[i], j); +} + +static inline uint32 +decimalLength(const uint32 v) +{ + /* Function precondition: v is not a 10-digit number. */ + /* (9 digits are sufficient for round-tripping.) */ + Assert(v < 1000000000); + if (v >= 100000000) + { + return 9; + } + if (v >= 10000000) + { + return 8; + } + if (v >= 1000000) + { + return 7; + } + if (v >= 100000) + { + return 6; + } + if (v >= 10000) + { + return 5; + } + if (v >= 1000) + { + return 4; + } + if (v >= 100) + { + return 3; + } + if (v >= 10) + { + return 2; + } + return 1; +} + +/* A floating decimal representing m * 10^e. */ +typedef struct floating_decimal_32 +{ + uint32 mantissa; + int32 exponent; +} floating_decimal_32; + +static inline floating_decimal_32 +f2d(const uint32 ieeeMantissa, const uint32 ieeeExponent) +{ + int32 e2; + uint32 m2; + + if (ieeeExponent == 0) + { + /* We subtract 2 so that the bounds computation has 2 additional bits. */ + e2 = 1 - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2; + m2 = ieeeMantissa; + } + else + { + e2 = ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2; + m2 = (1u << FLOAT_MANTISSA_BITS) | ieeeMantissa; + } + +#if STRICTLY_SHORTEST + const bool even = (m2 & 1) == 0; + const bool acceptBounds = even; +#else + const bool acceptBounds = false; +#endif + + /* Step 2: Determine the interval of legal decimal representations. */ + const uint32 mv = 4 * m2; + const uint32 mp = 4 * m2 + 2; + + /* Implicit bool -> int conversion. True is 1, false is 0. */ + const uint32 mmShift = ieeeMantissa != 0 || ieeeExponent <= 1; + const uint32 mm = 4 * m2 - 1 - mmShift; + + /* Step 3: Convert to a decimal power base using 64-bit arithmetic. */ + uint32 vr, + vp, + vm; + int32 e10; + bool vmIsTrailingZeros = false; + bool vrIsTrailingZeros = false; + uint8 lastRemovedDigit = 0; + + if (e2 >= 0) + { + const uint32 q = log10Pow2(e2); + + e10 = q; + + const int32 k = FLOAT_POW5_INV_BITCOUNT + pow5bits(q) - 1; + const int32 i = -e2 + q + k; + + vr = mulPow5InvDivPow2(mv, q, i); + vp = mulPow5InvDivPow2(mp, q, i); + vm = mulPow5InvDivPow2(mm, q, i); + + if (q != 0 && (vp - 1) / 10 <= vm / 10) + { + /* + * We need to know one removed digit even if we are not going to + * loop below. We could use q = X - 1 above, except that would + * require 33 bits for the result, and we've found that 32-bit + * arithmetic is faster even on 64-bit machines. + */ + const int32 l = FLOAT_POW5_INV_BITCOUNT + pow5bits(q - 1) - 1; + + lastRemovedDigit = (uint8) (mulPow5InvDivPow2(mv, q - 1, -e2 + q - 1 + l) % 10); + } + if (q <= 9) + { + /* + * The largest power of 5 that fits in 24 bits is 5^10, but q <= 9 + * seems to be safe as well. + * + * Only one of mp, mv, and mm can be a multiple of 5, if any. + */ + if (mv % 5 == 0) + { + vrIsTrailingZeros = multipleOfPowerOf5(mv, q); + } + else if (acceptBounds) + { + vmIsTrailingZeros = multipleOfPowerOf5(mm, q); + } + else + { + vp -= multipleOfPowerOf5(mp, q); + } + } + } + else + { + const uint32 q = log10Pow5(-e2); + + e10 = q + e2; + + const int32 i = -e2 - q; + const int32 k = pow5bits(i) - FLOAT_POW5_BITCOUNT; + int32 j = q - k; + + vr = mulPow5divPow2(mv, i, j); + vp = mulPow5divPow2(mp, i, j); + vm = mulPow5divPow2(mm, i, j); + + if (q != 0 && (vp - 1) / 10 <= vm / 10) + { + j = q - 1 - (pow5bits(i + 1) - FLOAT_POW5_BITCOUNT); + lastRemovedDigit = (uint8) (mulPow5divPow2(mv, i + 1, j) % 10); + } + if (q <= 1) + { + /* + * {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q + * trailing 0 bits. + */ + /* mv = 4 * m2, so it always has at least two trailing 0 bits. */ + vrIsTrailingZeros = true; + if (acceptBounds) + { + /* + * mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff + * mmShift == 1. + */ + vmIsTrailingZeros = mmShift == 1; + } + else + { + /* + * mp = mv + 2, so it always has at least one trailing 0 bit. + */ + --vp; + } + } + else if (q < 31) + { + /* TODO(ulfjack):Use a tighter bound here. */ + vrIsTrailingZeros = multipleOfPowerOf2(mv, q - 1); + } + } + + /* + * Step 4: Find the shortest decimal representation in the interval of + * legal representations. + */ + uint32 removed = 0; + uint32 output; + + if (vmIsTrailingZeros || vrIsTrailingZeros) + { + /* General case, which happens rarely (~4.0%). */ + while (vp / 10 > vm / 10) + { + vmIsTrailingZeros &= vm - (vm / 10) * 10 == 0; + vrIsTrailingZeros &= lastRemovedDigit == 0; + lastRemovedDigit = (uint8) (vr % 10); + vr /= 10; + vp /= 10; + vm /= 10; + ++removed; + } + if (vmIsTrailingZeros) + { + while (vm % 10 == 0) + { + vrIsTrailingZeros &= lastRemovedDigit == 0; + lastRemovedDigit = (uint8) (vr % 10); + vr /= 10; + vp /= 10; + vm /= 10; + ++removed; + } + } + + if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) + { + /* Round even if the exact number is .....50..0. */ + lastRemovedDigit = 4; + } + + /* + * We need to take vr + 1 if vr is outside bounds or we need to round + * up. + */ + output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5); + } + else + { + /* + * Specialized for the common case (~96.0%). Percentages below are + * relative to this. + * + * Loop iterations below (approximately): 0: 13.6%, 1: 70.7%, 2: + * 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01% + */ + while (vp / 10 > vm / 10) + { + lastRemovedDigit = (uint8) (vr % 10); + vr /= 10; + vp /= 10; + vm /= 10; + ++removed; + } + + /* + * We need to take vr + 1 if vr is outside bounds or we need to round + * up. + */ + output = vr + (vr == vm || lastRemovedDigit >= 5); + } + + const int32 exp = e10 + removed; + + floating_decimal_32 fd; + + fd.exponent = exp; + fd.mantissa = output; + return fd; +} + +static inline int +to_chars_f(const floating_decimal_32 v, const uint32 olength, char *const result) +{ + /* Step 5: Print the decimal representation. */ + int index = 0; + + uint32 output = v.mantissa; + int32 exp = v.exponent; + + /*---- + * On entry, mantissa * 10^exp is the result to be output. + * Caller has already done the - sign if needed. + * + * We want to insert the point somewhere depending on the output length + * and exponent, which might mean adding zeros: + * + * exp | format + * 1+ | ddddddddd000000 + * 0 | ddddddddd + * -1 .. -len+1 | dddddddd.d to d.ddddddddd + * -len ... | 0.ddddddddd to 0.000dddddd + */ + uint32 i = 0; + int32 nexp = exp + olength; + + if (nexp <= 0) + { + /* -nexp is number of 0s to add after '.' */ + Assert(nexp >= -3); + /* 0.000ddddd */ + index = 2 - nexp; + /* copy 8 bytes rather than 5 to let compiler optimize */ + memcpy(result, "0.000000", 8); + } + else if (exp < 0) + { + /* + * dddd.dddd; leave space at the start and move the '.' in after + */ + index = 1; + } + else + { + /* + * We can save some code later by pre-filling with zeros. We know that + * there can be no more than 6 output digits in this form, otherwise + * we would not choose fixed-point output. memset 8 rather than 6 + * bytes to let the compiler optimize it. + */ + Assert(exp < 6 && exp + olength <= 6); + memset(result, '0', 8); + } + + while (output >= 10000) + { + const uint32 c = output - 10000 * (output / 10000); + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + + output /= 10000; + + memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2); + memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2); + i += 4; + } + if (output >= 100) + { + const uint32 c = (output % 100) << 1; + + output /= 100; + memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2); + i += 2; + } + if (output >= 10) + { + const uint32 c = output << 1; + + memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2); + } + else + { + result[index] = (char) ('0' + output); + } + + if (index == 1) + { + /* + * nexp is 1..6 here, representing the number of digits before the + * point. A value of 7+ is not possible because we switch to + * scientific notation when the display exponent reaches 6. + */ + Assert(nexp < 7); + /* gcc only seems to want to optimize memmove for small 2^n */ + if (nexp & 4) + { + memmove(result + index - 1, result + index, 4); + index += 4; + } + if (nexp & 2) + { + memmove(result + index - 1, result + index, 2); + index += 2; + } + if (nexp & 1) + { + result[index - 1] = result[index]; + } + result[nexp] = '.'; + index = olength + 1; + } + else if (exp >= 0) + { + /* we supplied the trailing zeros earlier, now just set the length. */ + index = olength + exp; + } + else + { + index = olength + (2 - nexp); + } + + return index; +} + +static inline int +to_chars(const floating_decimal_32 v, const bool sign, char *const result) +{ + /* Step 5: Print the decimal representation. */ + int index = 0; + + uint32 output = v.mantissa; + uint32 olength = decimalLength(output); + int32 exp = v.exponent + olength - 1; + + if (sign) + result[index++] = '-'; + + /* + * The thresholds for fixed-point output are chosen to match printf + * defaults. Beware that both the code of to_chars_f and the value of + * FLOAT_SHORTEST_DECIMAL_LEN are sensitive to these thresholds. + */ + if (exp >= -4 && exp < 6) + return to_chars_f(v, olength, result + index) + sign; + + /* + * If v.exponent is exactly 0, we might have reached here via the small + * integer fast path, in which case v.mantissa might contain trailing + * (decimal) zeros. For scientific notation we need to move these zeros + * into the exponent. (For fixed point this doesn't matter, which is why + * we do this here rather than above.) + * + * Since we already calculated the display exponent (exp) above based on + * the old decimal length, that value does not change here. Instead, we + * just reduce the display length for each digit removed. + * + * If we didn't get here via the fast path, the raw exponent will not + * usually be 0, and there will be no trailing zeros, so we pay no more + * than one div10/multiply extra cost. We claw back half of that by + * checking for divisibility by 2 before dividing by 10. + */ + if (v.exponent == 0) + { + while ((output & 1) == 0) + { + const uint32 q = output / 10; + const uint32 r = output - 10 * q; + + if (r != 0) + break; + output = q; + --olength; + } + } + + /*---- + * Print the decimal digits. + * The following code is equivalent to: + * + * for (uint32 i = 0; i < olength - 1; ++i) { + * const uint32 c = output % 10; output /= 10; + * result[index + olength - i] = (char) ('0' + c); + * } + * result[index] = '0' + output % 10; + */ + uint32 i = 0; + + while (output >= 10000) + { + const uint32 c = output - 10000 * (output / 10000); + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + + output /= 10000; + + memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2); + memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2); + i += 4; + } + if (output >= 100) + { + const uint32 c = (output % 100) << 1; + + output /= 100; + memcpy(result + index + olength - i - 1, DIGIT_TABLE + c, 2); + i += 2; + } + if (output >= 10) + { + const uint32 c = output << 1; + + /* + * We can't use memcpy here: the decimal dot goes between these two + * digits. + */ + result[index + olength - i] = DIGIT_TABLE[c + 1]; + result[index] = DIGIT_TABLE[c]; + } + else + { + result[index] = (char) ('0' + output); + } + + /* Print decimal point if needed. */ + if (olength > 1) + { + result[index + 1] = '.'; + index += olength + 1; + } + else + { + ++index; + } + + /* Print the exponent. */ + result[index++] = 'e'; + if (exp < 0) + { + result[index++] = '-'; + exp = -exp; + } + else + result[index++] = '+'; + + memcpy(result + index, DIGIT_TABLE + 2 * exp, 2); + index += 2; + + return index; +} + +static inline bool +f2d_small_int(const uint32 ieeeMantissa, + const uint32 ieeeExponent, + floating_decimal_32 *v) +{ + const int32 e2 = (int32) ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS; + + /* + * Avoid using multiple "return false;" here since it tends to provoke the + * compiler into inlining multiple copies of f2d, which is undesirable. + */ + + if (e2 >= -FLOAT_MANTISSA_BITS && e2 <= 0) + { + /*---- + * Since 2^23 <= m2 < 2^24 and 0 <= -e2 <= 23: + * 1 <= f = m2 / 2^-e2 < 2^24. + * + * Test if the lower -e2 bits of the significand are 0, i.e. whether + * the fraction is 0. We can use ieeeMantissa here, since the implied + * 1 bit can never be tested by this; the implied 1 can only be part + * of a fraction if e2 < -FLOAT_MANTISSA_BITS which we already + * checked. (e.g. 0.5 gives ieeeMantissa == 0 and e2 == -24) + */ + const uint32 mask = (1U << -e2) - 1; + const uint32 fraction = ieeeMantissa & mask; + + if (fraction == 0) + { + /*---- + * f is an integer in the range [1, 2^24). + * Note: mantissa might contain trailing (decimal) 0's. + * Note: since 2^24 < 10^9, there is no need to adjust + * decimalLength(). + */ + const uint32 m2 = (1U << FLOAT_MANTISSA_BITS) | ieeeMantissa; + + v->mantissa = m2 >> -e2; + v->exponent = 0; + return true; + } + } + + return false; +} + +/* + * Store the shortest decimal representation of the given float as an + * UNTERMINATED string in the caller's supplied buffer (which must be at least + * FLOAT_SHORTEST_DECIMAL_LEN-1 bytes long). + * + * Returns the number of bytes stored. + */ +int +float_to_shortest_decimal_bufn(float f, char *result) +{ + /* + * Step 1: Decode the floating-point number, and unify normalized and + * subnormal cases. + */ + const uint32 bits = float_to_bits(f); + + /* Decode bits into sign, mantissa, and exponent. */ + const bool ieeeSign = ((bits >> (FLOAT_MANTISSA_BITS + FLOAT_EXPONENT_BITS)) & 1) != 0; + const uint32 ieeeMantissa = bits & ((1u << FLOAT_MANTISSA_BITS) - 1); + const uint32 ieeeExponent = (bits >> FLOAT_MANTISSA_BITS) & ((1u << FLOAT_EXPONENT_BITS) - 1); + + /* Case distinction; exit early for the easy cases. */ + if (ieeeExponent == ((1u << FLOAT_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0)) + { + return copy_special_str(result, ieeeSign, (ieeeExponent != 0), (ieeeMantissa != 0)); + } + + floating_decimal_32 v; + const bool isSmallInt = f2d_small_int(ieeeMantissa, ieeeExponent, &v); + + if (!isSmallInt) + { + v = f2d(ieeeMantissa, ieeeExponent); + } + + return to_chars(v, ieeeSign, result); +} + +/* + * Store the shortest decimal representation of the given float as a + * null-terminated string in the caller's supplied buffer (which must be at + * least FLOAT_SHORTEST_DECIMAL_LEN bytes long). + * + * Returns the string length. + */ +int +float_to_shortest_decimal_buf(float f, char *result) +{ + const int index = float_to_shortest_decimal_bufn(f, result); + + /* Terminate the string. */ + Assert(index < FLOAT_SHORTEST_DECIMAL_LEN); + result[index] = '\0'; + return index; +} + +/* + * Return the shortest decimal representation as a null-terminated palloc'd + * string (outside the backend, uses malloc() instead). + * + * Caller is responsible for freeing the result. + */ +char * +float_to_shortest_decimal(float f) +{ + char *const result = (char *) palloc(FLOAT_SHORTEST_DECIMAL_LEN); + + float_to_shortest_decimal_buf(f, result); + return result; +} diff --git a/src/common/fe_memutils.c b/src/common/fe_memutils.c new file mode 100644 index 0000000..3bad81e --- /dev/null +++ b/src/common/fe_memutils.c @@ -0,0 +1,175 @@ +/*------------------------------------------------------------------------- + * + * fe_memutils.c + * memory management support for frontend code + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/fe_memutils.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#error "This file is not expected to be compiled for backend code" +#endif + +#include "postgres_fe.h" + +static inline void * +pg_malloc_internal(size_t size, int flags) +{ + void *tmp; + + /* Avoid unportable behavior of malloc(0) */ + if (size == 0) + size = 1; + tmp = malloc(size); + if (tmp == NULL) + { + if ((flags & MCXT_ALLOC_NO_OOM) == 0) + { + fprintf(stderr, _("out of memory\n")); + exit(EXIT_FAILURE); + } + return NULL; + } + + if ((flags & MCXT_ALLOC_ZERO) != 0) + MemSet(tmp, 0, size); + return tmp; +} + +void * +pg_malloc(size_t size) +{ + return pg_malloc_internal(size, 0); +} + +void * +pg_malloc0(size_t size) +{ + return pg_malloc_internal(size, MCXT_ALLOC_ZERO); +} + +void * +pg_malloc_extended(size_t size, int flags) +{ + return pg_malloc_internal(size, flags); +} + +void * +pg_realloc(void *ptr, size_t size) +{ + void *tmp; + + /* Avoid unportable behavior of realloc(NULL, 0) */ + if (ptr == NULL && size == 0) + size = 1; + tmp = realloc(ptr, size); + if (!tmp) + { + fprintf(stderr, _("out of memory\n")); + exit(EXIT_FAILURE); + } + return tmp; +} + +/* + * "Safe" wrapper around strdup(). + */ +char * +pg_strdup(const char *in) +{ + char *tmp; + + if (!in) + { + fprintf(stderr, + _("cannot duplicate null pointer (internal error)\n")); + exit(EXIT_FAILURE); + } + tmp = strdup(in); + if (!tmp) + { + fprintf(stderr, _("out of memory\n")); + exit(EXIT_FAILURE); + } + return tmp; +} + +void +pg_free(void *ptr) +{ + free(ptr); +} + +/* + * Frontend emulation of backend memory management functions. Useful for + * programs that compile backend files. + */ +void * +palloc(Size size) +{ + return pg_malloc_internal(size, 0); +} + +void * +palloc0(Size size) +{ + return pg_malloc_internal(size, MCXT_ALLOC_ZERO); +} + +void * +palloc_extended(Size size, int flags) +{ + return pg_malloc_internal(size, flags); +} + +void +pfree(void *pointer) +{ + pg_free(pointer); +} + +char * +pstrdup(const char *in) +{ + return pg_strdup(in); +} + +char * +pnstrdup(const char *in, Size size) +{ + char *tmp; + int len; + + if (!in) + { + fprintf(stderr, + _("cannot duplicate null pointer (internal error)\n")); + exit(EXIT_FAILURE); + } + + len = strnlen(in, size); + tmp = malloc(len + 1); + if (tmp == NULL) + { + fprintf(stderr, _("out of memory\n")); + exit(EXIT_FAILURE); + } + + memcpy(tmp, in, len); + tmp[len] = '\0'; + + return tmp; +} + +void * +repalloc(void *pointer, Size size) +{ + return pg_realloc(pointer, size); +} diff --git a/src/common/file_perm.c b/src/common/file_perm.c new file mode 100644 index 0000000..60f88d2 --- /dev/null +++ b/src/common/file_perm.c @@ -0,0 +1,91 @@ +/*------------------------------------------------------------------------- + * + * File and directory permission routines + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/common/file_perm.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "common/file_perm.h" + +/* Modes for creating directories and files in the data directory */ +int pg_dir_create_mode = PG_DIR_MODE_OWNER; +int pg_file_create_mode = PG_FILE_MODE_OWNER; + +/* + * Mode mask to pass to umask(). This is more of a preventative measure since + * all file/directory creates should be performed using the create modes above. + */ +int pg_mode_mask = PG_MODE_MASK_OWNER; + +/* + * Set create modes and mask to use when writing to PGDATA based on the data + * directory mode passed. If group read/execute are present in the mode, then + * create modes and mask will be relaxed to allow group read/execute on all + * newly created files and directories. + */ +void +SetDataDirectoryCreatePerm(int dataDirMode) +{ + /* If the data directory mode has group access */ + if ((PG_DIR_MODE_GROUP & dataDirMode) == PG_DIR_MODE_GROUP) + { + pg_dir_create_mode = PG_DIR_MODE_GROUP; + pg_file_create_mode = PG_FILE_MODE_GROUP; + pg_mode_mask = PG_MODE_MASK_GROUP; + } + /* Else use default permissions */ + else + { + pg_dir_create_mode = PG_DIR_MODE_OWNER; + pg_file_create_mode = PG_FILE_MODE_OWNER; + pg_mode_mask = PG_MODE_MASK_OWNER; + } +} + +#ifdef FRONTEND + +/* + * Get the create modes and mask to use when writing to PGDATA by examining the + * mode of the PGDATA directory and calling SetDataDirectoryCreatePerm(). + * + * Errors are not handled here and should be reported by the application when + * false is returned. + * + * Suppress when on Windows, because there may not be proper support for Unix-y + * file permissions. + */ +bool +GetDataDirectoryCreatePerm(const char *dataDir) +{ +#if !defined(WIN32) && !defined(__CYGWIN__) + struct stat statBuf; + + /* + * If an error occurs getting the mode then return false. The caller is + * responsible for generating an error, if appropriate, indicating that we + * were unable to access the data directory. + */ + if (stat(dataDir, &statBuf) == -1) + return false; + + /* Set permissions */ + SetDataDirectoryCreatePerm(statBuf.st_mode); + return true; +#else /* !defined(WIN32) && !defined(__CYGWIN__) */ + /* + * On Windows, we don't have anything to do here since they don't have + * Unix-y permissions. + */ + return true; +#endif +} + + +#endif /* FRONTEND */ diff --git a/src/common/file_utils.c b/src/common/file_utils.c new file mode 100644 index 0000000..74833c4 --- /dev/null +++ b/src/common/file_utils.c @@ -0,0 +1,582 @@ +/*------------------------------------------------------------------------- + * + * File-processing utility routines. + * + * Assorted utility functions to work on files. + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/common/file_utils.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <dirent.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "common/file_utils.h" +#ifdef FRONTEND +#include "common/logging.h" +#endif +#include "port/pg_iovec.h" + +#ifdef FRONTEND + +/* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */ +#if defined(HAVE_SYNC_FILE_RANGE) +#define PG_FLUSH_DATA_WORKS 1 +#elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) +#define PG_FLUSH_DATA_WORKS 1 +#endif + +/* + * pg_xlog has been renamed to pg_wal in version 10. + */ +#define MINIMUM_VERSION_FOR_PG_WAL 100000 + +#ifdef PG_FLUSH_DATA_WORKS +static int pre_sync_fname(const char *fname, bool isdir); +#endif +static void walkdir(const char *path, + int (*action) (const char *fname, bool isdir), + bool process_symlinks); + +/* + * Issue fsync recursively on PGDATA and all its contents. + * + * We fsync regular files and directories wherever they are, but we follow + * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc. + * Other symlinks are presumed to point at files we're not responsible for + * fsyncing, and might not have privileges to write at all. + * + * serverVersion indicates the version of the server to be fsync'd. + */ +void +fsync_pgdata(const char *pg_data, + int serverVersion) +{ + bool xlog_is_symlink; + char pg_wal[MAXPGPATH]; + char pg_tblspc[MAXPGPATH]; + + /* handle renaming of pg_xlog to pg_wal in post-10 clusters */ + snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data, + serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal"); + snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data); + + /* + * If pg_wal is a symlink, we'll need to recurse into it separately, + * because the first walkdir below will ignore it. + */ + xlog_is_symlink = false; + + { + struct stat st; + + if (lstat(pg_wal, &st) < 0) + pg_log_error("could not stat file \"%s\": %m", pg_wal); + else if (S_ISLNK(st.st_mode)) + xlog_is_symlink = true; + } + + /* + * If possible, hint to the kernel that we're soon going to fsync the data + * directory and its contents. + */ +#ifdef PG_FLUSH_DATA_WORKS + walkdir(pg_data, pre_sync_fname, false); + if (xlog_is_symlink) + walkdir(pg_wal, pre_sync_fname, false); + walkdir(pg_tblspc, pre_sync_fname, true); +#endif + + /* + * Now we do the fsync()s in the same order. + * + * The main call ignores symlinks, so in addition to specially processing + * pg_wal if it's a symlink, pg_tblspc has to be visited separately with + * process_symlinks = true. Note that if there are any plain directories + * in pg_tblspc, they'll get fsync'd twice. That's not an expected case + * so we don't worry about optimizing it. + */ + walkdir(pg_data, fsync_fname, false); + if (xlog_is_symlink) + walkdir(pg_wal, fsync_fname, false); + walkdir(pg_tblspc, fsync_fname, true); +} + +/* + * Issue fsync recursively on the given directory and all its contents. + * + * This is a convenient wrapper on top of walkdir(). + */ +void +fsync_dir_recurse(const char *dir) +{ + /* + * If possible, hint to the kernel that we're soon going to fsync the data + * directory and its contents. + */ +#ifdef PG_FLUSH_DATA_WORKS + walkdir(dir, pre_sync_fname, false); +#endif + + walkdir(dir, fsync_fname, false); +} + +/* + * walkdir: recursively walk a directory, applying the action to each + * regular file and directory (including the named directory itself). + * + * If process_symlinks is true, the action and recursion are also applied + * to regular files and directories that are pointed to by symlinks in the + * given directory; otherwise symlinks are ignored. Symlinks are always + * ignored in subdirectories, ie we intentionally don't pass down the + * process_symlinks flag to recursive calls. + * + * Errors are reported but not considered fatal. + * + * See also walkdir in fd.c, which is a backend version of this logic. + */ +static void +walkdir(const char *path, + int (*action) (const char *fname, bool isdir), + bool process_symlinks) +{ + DIR *dir; + struct dirent *de; + + dir = opendir(path); + if (dir == NULL) + { + pg_log_error("could not open directory \"%s\": %m", path); + return; + } + + while (errno = 0, (de = readdir(dir)) != NULL) + { + char subpath[MAXPGPATH * 2]; + + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) + continue; + + snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name); + + switch (get_dirent_type(subpath, de, process_symlinks, PG_LOG_ERROR)) + { + case PGFILETYPE_REG: + (*action) (subpath, false); + break; + case PGFILETYPE_DIR: + walkdir(subpath, action, false); + break; + default: + + /* + * Errors are already reported directly by get_dirent_type(), + * and any remaining symlinks and unknown file types are + * ignored. + */ + break; + } + } + + if (errno) + pg_log_error("could not read directory \"%s\": %m", path); + + (void) closedir(dir); + + /* + * It's important to fsync the destination directory itself as individual + * file fsyncs don't guarantee that the directory entry for the file is + * synced. Recent versions of ext4 have made the window much wider but + * it's been an issue for ext3 and other filesystems in the past. + */ + (*action) (path, true); +} + +/* + * Hint to the OS that it should get ready to fsync() this file. + * + * Ignores errors trying to open unreadable files, and reports other errors + * non-fatally. + */ +#ifdef PG_FLUSH_DATA_WORKS + +static int +pre_sync_fname(const char *fname, bool isdir) +{ + int fd; + + fd = open(fname, O_RDONLY | PG_BINARY, 0); + + if (fd < 0) + { + if (errno == EACCES || (isdir && errno == EISDIR)) + return 0; + pg_log_error("could not open file \"%s\": %m", fname); + return -1; + } + + /* + * We do what pg_flush_data() would do in the backend: prefer to use + * sync_file_range, but fall back to posix_fadvise. We ignore errors + * because this is only a hint. + */ +#if defined(HAVE_SYNC_FILE_RANGE) + (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE); +#elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) + (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); +#else +#error PG_FLUSH_DATA_WORKS should not have been defined +#endif + + (void) close(fd); + return 0; +} + +#endif /* PG_FLUSH_DATA_WORKS */ + +/* + * fsync_fname -- Try to fsync a file or directory + * + * Ignores errors trying to open unreadable files, or trying to fsync + * directories on systems where that isn't allowed/required. All other errors + * are fatal. + */ +int +fsync_fname(const char *fname, bool isdir) +{ + int fd; + int flags; + int returncode; + + /* + * Some OSs require directories to be opened read-only whereas other + * systems don't allow us to fsync files opened read-only; so we need both + * cases here. Using O_RDWR will cause us to fail to fsync files that are + * not writable by our userid, but we assume that's OK. + */ + flags = PG_BINARY; + if (!isdir) + flags |= O_RDWR; + else + flags |= O_RDONLY; + + /* + * Open the file, silently ignoring errors about unreadable files (or + * unsupported operations, e.g. opening a directory under Windows), and + * logging others. + */ + fd = open(fname, flags, 0); + if (fd < 0) + { + if (errno == EACCES || (isdir && errno == EISDIR)) + return 0; + pg_log_error("could not open file \"%s\": %m", fname); + return -1; + } + + returncode = fsync(fd); + + /* + * Some OSes don't allow us to fsync directories at all, so we can ignore + * those errors. Anything else needs to be reported. + */ + if (returncode != 0 && !(isdir && (errno == EBADF || errno == EINVAL))) + { + pg_log_error("could not fsync file \"%s\": %m", fname); + (void) close(fd); + exit(EXIT_FAILURE); + } + + (void) close(fd); + return 0; +} + +/* + * fsync_parent_path -- fsync the parent path of a file or directory + * + * This is aimed at making file operations persistent on disk in case of + * an OS crash or power failure. + */ +int +fsync_parent_path(const char *fname) +{ + char parentpath[MAXPGPATH]; + + strlcpy(parentpath, fname, MAXPGPATH); + get_parent_directory(parentpath); + + /* + * get_parent_directory() returns an empty string if the input argument is + * just a file name (see comments in path.c), so handle that as being the + * current directory. + */ + if (strlen(parentpath) == 0) + strlcpy(parentpath, ".", MAXPGPATH); + + if (fsync_fname(parentpath, true) != 0) + return -1; + + return 0; +} + +/* + * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability + * + * Wrapper around rename, similar to the backend version. + */ +int +durable_rename(const char *oldfile, const char *newfile) +{ + int fd; + + /* + * First fsync the old and target path (if it exists), to ensure that they + * are properly persistent on disk. Syncing the target file is not + * strictly necessary, but it makes it easier to reason about crashes; + * because it's then guaranteed that either source or target file exists + * after a crash. + */ + if (fsync_fname(oldfile, false) != 0) + return -1; + + fd = open(newfile, PG_BINARY | O_RDWR, 0); + if (fd < 0) + { + if (errno != ENOENT) + { + pg_log_error("could not open file \"%s\": %m", newfile); + return -1; + } + } + else + { + if (fsync(fd) != 0) + { + pg_log_error("could not fsync file \"%s\": %m", newfile); + close(fd); + exit(EXIT_FAILURE); + } + close(fd); + } + + /* Time to do the real deal... */ + if (rename(oldfile, newfile) != 0) + { + pg_log_error("could not rename file \"%s\" to \"%s\": %m", + oldfile, newfile); + return -1; + } + + /* + * To guarantee renaming the file is persistent, fsync the file with its + * new name, and its containing directory. + */ + if (fsync_fname(newfile, false) != 0) + return -1; + + if (fsync_parent_path(newfile) != 0) + return -1; + + return 0; +} + +#endif /* FRONTEND */ + +/* + * Return the type of a directory entry. + * + * In frontend code, elevel should be a level from logging.h; in backend code + * it should be a level from elog.h. + */ +PGFileType +get_dirent_type(const char *path, + const struct dirent *de, + bool look_through_symlinks, + int elevel) +{ + PGFileType result; + + /* + * Some systems tell us the type directly in the dirent struct, but that's + * a BSD and Linux extension not required by POSIX. Even when the + * interface is present, sometimes the type is unknown, depending on the + * filesystem. + */ +#if defined(DT_REG) && defined(DT_DIR) && defined(DT_LNK) + if (de->d_type == DT_REG) + result = PGFILETYPE_REG; + else if (de->d_type == DT_DIR) + result = PGFILETYPE_DIR; + else if (de->d_type == DT_LNK && !look_through_symlinks) + result = PGFILETYPE_LNK; + else + result = PGFILETYPE_UNKNOWN; +#else + result = PGFILETYPE_UNKNOWN; +#endif + + if (result == PGFILETYPE_UNKNOWN) + { + struct stat fst; + int sret; + + + if (look_through_symlinks) + sret = stat(path, &fst); + else + sret = lstat(path, &fst); + + if (sret < 0) + { + result = PGFILETYPE_ERROR; +#ifdef FRONTEND + pg_log_generic(elevel, PG_LOG_PRIMARY, "could not stat file \"%s\": %m", path); +#else + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", path))); +#endif + } + else if (S_ISREG(fst.st_mode)) + result = PGFILETYPE_REG; + else if (S_ISDIR(fst.st_mode)) + result = PGFILETYPE_DIR; + else if (S_ISLNK(fst.st_mode)) + result = PGFILETYPE_LNK; + } + + return result; +} + +/* + * pg_pwritev_with_retry + * + * Convenience wrapper for pg_pwritev() that retries on partial write. If an + * error is returned, it is unspecified how much has been written. + */ +ssize_t +pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset) +{ + struct iovec iov_copy[PG_IOV_MAX]; + ssize_t sum = 0; + ssize_t part; + + /* We'd better have space to make a copy, in case we need to retry. */ + if (iovcnt > PG_IOV_MAX) + { + errno = EINVAL; + return -1; + } + + for (;;) + { + /* Write as much as we can. */ + part = pg_pwritev(fd, iov, iovcnt, offset); + if (part < 0) + return -1; + +#ifdef SIMULATE_SHORT_WRITE + part = Min(part, 4096); +#endif + + /* Count our progress. */ + sum += part; + offset += part; + + /* Step over iovecs that are done. */ + while (iovcnt > 0 && iov->iov_len <= part) + { + part -= iov->iov_len; + ++iov; + --iovcnt; + } + + /* Are they all done? */ + if (iovcnt == 0) + { + /* We don't expect the kernel to write more than requested. */ + Assert(part == 0); + break; + } + + /* + * Move whatever's left to the front of our mutable copy and adjust + * the leading iovec. + */ + Assert(iovcnt > 0); + memmove(iov_copy, iov, sizeof(*iov) * iovcnt); + Assert(iov->iov_len > part); + iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part; + iov_copy[0].iov_len -= part; + iov = iov_copy; + } + + return sum; +} + +/* + * pg_pwrite_zeros + * + * Writes zeros to file worth "size" bytes at "offset" (from the start of the + * file), using vectored I/O. + * + * Returns the total amount of data written. On failure, a negative value + * is returned with errno set. + */ +ssize_t +pg_pwrite_zeros(int fd, size_t size, off_t offset) +{ + static const PGIOAlignedBlock zbuffer = {{0}}; /* worth BLCKSZ */ + void *zerobuf_addr = unconstify(PGIOAlignedBlock *, &zbuffer)->data; + struct iovec iov[PG_IOV_MAX]; + size_t remaining_size = size; + ssize_t total_written = 0; + + /* Loop, writing as many blocks as we can for each system call. */ + while (remaining_size > 0) + { + int iovcnt = 0; + ssize_t written; + + for (; iovcnt < PG_IOV_MAX && remaining_size > 0; iovcnt++) + { + size_t this_iov_size; + + iov[iovcnt].iov_base = zerobuf_addr; + + if (remaining_size < BLCKSZ) + this_iov_size = remaining_size; + else + this_iov_size = BLCKSZ; + + iov[iovcnt].iov_len = this_iov_size; + remaining_size -= this_iov_size; + } + + written = pg_pwritev_with_retry(fd, iov, iovcnt, offset); + + if (written < 0) + return written; + + offset += written; + total_written += written; + } + + Assert(total_written == size); + + return total_written; +} diff --git a/src/common/hashfn.c b/src/common/hashfn.c new file mode 100644 index 0000000..2490607 --- /dev/null +++ b/src/common/hashfn.c @@ -0,0 +1,692 @@ +/*------------------------------------------------------------------------- + * + * hashfn.c + * Generic hashing functions, and hash functions for use in dynahash.c + * hashtables + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/hashfn.c + * + * NOTES + * It is expected that every bit of a hash function's 32-bit result is + * as random as every other; failure to ensure this is likely to lead + * to poor performance of hash tables. In most cases a hash + * function should use hash_bytes() or its variant hash_bytes_uint32(), + * or the wrappers hash_any() and hash_uint32 defined in hashfn.h. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "common/hashfn.h" +#include "port/pg_bitutils.h" + + +/* + * This hash function was written by Bob Jenkins + * (bob_jenkins@burtleburtle.net), and superficially adapted + * for PostgreSQL by Neil Conway. For more information on this + * hash function, see http://burtleburtle.net/bob/hash/doobs.html, + * or Bob's article in Dr. Dobb's Journal, Sept. 1997. + * + * In the current code, we have adopted Bob's 2006 update of his hash + * function to fetch the data a word at a time when it is suitably aligned. + * This makes for a useful speedup, at the cost of having to maintain + * four code paths (aligned vs unaligned, and little-endian vs big-endian). + * It also uses two separate mixing functions mix() and final(), instead + * of a slower multi-purpose function. + */ + +/* Get a bit mask of the bits set in non-uint32 aligned addresses */ +#define UINT32_ALIGN_MASK (sizeof(uint32) - 1) + +#define rot(x,k) pg_rotate_left32(x, k) + +/*---------- + * mix -- mix 3 32-bit values reversibly. + * + * This is reversible, so any information in (a,b,c) before mix() is + * still in (a,b,c) after mix(). + * + * If four pairs of (a,b,c) inputs are run through mix(), or through + * mix() in reverse, there are at least 32 bits of the output that + * are sometimes the same for one pair and different for another pair. + * This was tested for: + * * pairs that differed by one bit, by two bits, in any combination + * of top bits of (a,b,c), or in any combination of bottom bits of + * (a,b,c). + * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + * is commonly produced by subtraction) look like a single 1-bit + * difference. + * * the base values were pseudorandom, all zero but one bit set, or + * all zero plus a counter that starts at zero. + * + * This does not achieve avalanche. There are input bits of (a,b,c) + * that fail to affect some output bits of (a,b,c), especially of a. The + * most thoroughly mixed value is c, but it doesn't really even achieve + * avalanche in c. + * + * This allows some parallelism. Read-after-writes are good at doubling + * the number of bits affected, so the goal of mixing pulls in the opposite + * direction from the goal of parallelism. I did what I could. Rotates + * seem to cost as much as shifts on every machine I could lay my hands on, + * and rotates are much kinder to the top and bottom bits, so I used rotates. + *---------- + */ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/*---------- + * final -- final mixing of 3 32-bit values (a,b,c) into c + * + * Pairs of (a,b,c) values differing in only a few bits will usually + * produce values of c that look totally different. This was tested for + * * pairs that differed by one bit, by two bits, in any combination + * of top bits of (a,b,c), or in any combination of bottom bits of + * (a,b,c). + * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + * is commonly produced by subtraction) look like a single 1-bit + * difference. + * * the base values were pseudorandom, all zero but one bit set, or + * all zero plus a counter that starts at zero. + * + * The use of separate functions for mix() and final() allow for a + * substantial performance increase since final() does not need to + * do well in reverse, but is does need to affect all output bits. + * mix(), on the other hand, does not need to affect all output + * bits (affecting 32 bits is enough). The original hash function had + * a single mixing operation that had to satisfy both sets of requirements + * and was slower as a result. + *---------- + */ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c, 4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +/* + * hash_bytes() -- hash a variable-length key into a 32-bit value + * k : the key (the unaligned variable-length array of bytes) + * len : the length of the key, counting by bytes + * + * Returns a uint32 value. Every bit of the key affects every bit of + * the return value. Every 1-bit and 2-bit delta achieves avalanche. + * About 6*len+35 instructions. The best hash table sizes are powers + * of 2. There is no need to do mod a prime (mod is sooo slow!). + * If you need less than 32 bits, use a bitmask. + * + * This procedure must never throw elog(ERROR); the ResourceOwner code + * relies on this not to fail. + * + * Note: we could easily change this function to return a 64-bit hash value + * by using the final values of both b and c. b is perhaps a little less + * well mixed than c, however. + */ +uint32 +hash_bytes(const unsigned char *k, int keylen) +{ + uint32 a, + b, + c, + len; + + /* Set up the internal state */ + len = keylen; + a = b = c = 0x9e3779b9 + len + 3923095; + + /* If the source pointer is word-aligned, we use word-wide fetches */ + if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0) + { + /* Code path for aligned source data */ + const uint32 *ka = (const uint32 *) k; + + /* handle most of the key */ + while (len >= 12) + { + a += ka[0]; + b += ka[1]; + c += ka[2]; + mix(a, b, c); + ka += 3; + len -= 12; + } + + /* handle the last 11 bytes */ + k = (const unsigned char *) ka; +#ifdef WORDS_BIGENDIAN + switch (len) + { + case 11: + c += ((uint32) k[10] << 8); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 24); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += ka[1]; + a += ka[0]; + break; + case 7: + b += ((uint32) k[6] << 8); + /* fall through */ + case 6: + b += ((uint32) k[5] << 16); + /* fall through */ + case 5: + b += ((uint32) k[4] << 24); + /* fall through */ + case 4: + a += ka[0]; + break; + case 3: + a += ((uint32) k[2] << 8); + /* fall through */ + case 2: + a += ((uint32) k[1] << 16); + /* fall through */ + case 1: + a += ((uint32) k[0] << 24); + /* case 0: nothing left to add */ + } +#else /* !WORDS_BIGENDIAN */ + switch (len) + { + case 11: + c += ((uint32) k[10] << 24); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 8); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += ka[1]; + a += ka[0]; + break; + case 7: + b += ((uint32) k[6] << 16); + /* fall through */ + case 6: + b += ((uint32) k[5] << 8); + /* fall through */ + case 5: + b += k[4]; + /* fall through */ + case 4: + a += ka[0]; + break; + case 3: + a += ((uint32) k[2] << 16); + /* fall through */ + case 2: + a += ((uint32) k[1] << 8); + /* fall through */ + case 1: + a += k[0]; + /* case 0: nothing left to add */ + } +#endif /* WORDS_BIGENDIAN */ + } + else + { + /* Code path for non-aligned source data */ + + /* handle most of the key */ + while (len >= 12) + { +#ifdef WORDS_BIGENDIAN + a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24)); + b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24)); + c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24)); +#else /* !WORDS_BIGENDIAN */ + a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24)); + b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24)); + c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24)); +#endif /* WORDS_BIGENDIAN */ + mix(a, b, c); + k += 12; + len -= 12; + } + + /* handle the last 11 bytes */ +#ifdef WORDS_BIGENDIAN + switch (len) + { + case 11: + c += ((uint32) k[10] << 8); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 24); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += k[7]; + /* fall through */ + case 7: + b += ((uint32) k[6] << 8); + /* fall through */ + case 6: + b += ((uint32) k[5] << 16); + /* fall through */ + case 5: + b += ((uint32) k[4] << 24); + /* fall through */ + case 4: + a += k[3]; + /* fall through */ + case 3: + a += ((uint32) k[2] << 8); + /* fall through */ + case 2: + a += ((uint32) k[1] << 16); + /* fall through */ + case 1: + a += ((uint32) k[0] << 24); + /* case 0: nothing left to add */ + } +#else /* !WORDS_BIGENDIAN */ + switch (len) + { + case 11: + c += ((uint32) k[10] << 24); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 8); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += ((uint32) k[7] << 24); + /* fall through */ + case 7: + b += ((uint32) k[6] << 16); + /* fall through */ + case 6: + b += ((uint32) k[5] << 8); + /* fall through */ + case 5: + b += k[4]; + /* fall through */ + case 4: + a += ((uint32) k[3] << 24); + /* fall through */ + case 3: + a += ((uint32) k[2] << 16); + /* fall through */ + case 2: + a += ((uint32) k[1] << 8); + /* fall through */ + case 1: + a += k[0]; + /* case 0: nothing left to add */ + } +#endif /* WORDS_BIGENDIAN */ + } + + final(a, b, c); + + /* report the result */ + return c; +} + +/* + * hash_bytes_extended() -- hash into a 64-bit value, using an optional seed + * k : the key (the unaligned variable-length array of bytes) + * len : the length of the key, counting by bytes + * seed : a 64-bit seed (0 means no seed) + * + * Returns a uint64 value. Otherwise similar to hash_bytes. + */ +uint64 +hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed) +{ + uint32 a, + b, + c, + len; + + /* Set up the internal state */ + len = keylen; + a = b = c = 0x9e3779b9 + len + 3923095; + + /* If the seed is non-zero, use it to perturb the internal state. */ + if (seed != 0) + { + /* + * In essence, the seed is treated as part of the data being hashed, + * but for simplicity, we pretend that it's padded with four bytes of + * zeroes so that the seed constitutes a 12-byte chunk. + */ + a += (uint32) (seed >> 32); + b += (uint32) seed; + mix(a, b, c); + } + + /* If the source pointer is word-aligned, we use word-wide fetches */ + if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0) + { + /* Code path for aligned source data */ + const uint32 *ka = (const uint32 *) k; + + /* handle most of the key */ + while (len >= 12) + { + a += ka[0]; + b += ka[1]; + c += ka[2]; + mix(a, b, c); + ka += 3; + len -= 12; + } + + /* handle the last 11 bytes */ + k = (const unsigned char *) ka; +#ifdef WORDS_BIGENDIAN + switch (len) + { + case 11: + c += ((uint32) k[10] << 8); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 24); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += ka[1]; + a += ka[0]; + break; + case 7: + b += ((uint32) k[6] << 8); + /* fall through */ + case 6: + b += ((uint32) k[5] << 16); + /* fall through */ + case 5: + b += ((uint32) k[4] << 24); + /* fall through */ + case 4: + a += ka[0]; + break; + case 3: + a += ((uint32) k[2] << 8); + /* fall through */ + case 2: + a += ((uint32) k[1] << 16); + /* fall through */ + case 1: + a += ((uint32) k[0] << 24); + /* case 0: nothing left to add */ + } +#else /* !WORDS_BIGENDIAN */ + switch (len) + { + case 11: + c += ((uint32) k[10] << 24); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 8); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += ka[1]; + a += ka[0]; + break; + case 7: + b += ((uint32) k[6] << 16); + /* fall through */ + case 6: + b += ((uint32) k[5] << 8); + /* fall through */ + case 5: + b += k[4]; + /* fall through */ + case 4: + a += ka[0]; + break; + case 3: + a += ((uint32) k[2] << 16); + /* fall through */ + case 2: + a += ((uint32) k[1] << 8); + /* fall through */ + case 1: + a += k[0]; + /* case 0: nothing left to add */ + } +#endif /* WORDS_BIGENDIAN */ + } + else + { + /* Code path for non-aligned source data */ + + /* handle most of the key */ + while (len >= 12) + { +#ifdef WORDS_BIGENDIAN + a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24)); + b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24)); + c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24)); +#else /* !WORDS_BIGENDIAN */ + a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24)); + b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24)); + c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24)); +#endif /* WORDS_BIGENDIAN */ + mix(a, b, c); + k += 12; + len -= 12; + } + + /* handle the last 11 bytes */ +#ifdef WORDS_BIGENDIAN + switch (len) + { + case 11: + c += ((uint32) k[10] << 8); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 24); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += k[7]; + /* fall through */ + case 7: + b += ((uint32) k[6] << 8); + /* fall through */ + case 6: + b += ((uint32) k[5] << 16); + /* fall through */ + case 5: + b += ((uint32) k[4] << 24); + /* fall through */ + case 4: + a += k[3]; + /* fall through */ + case 3: + a += ((uint32) k[2] << 8); + /* fall through */ + case 2: + a += ((uint32) k[1] << 16); + /* fall through */ + case 1: + a += ((uint32) k[0] << 24); + /* case 0: nothing left to add */ + } +#else /* !WORDS_BIGENDIAN */ + switch (len) + { + case 11: + c += ((uint32) k[10] << 24); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 8); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += ((uint32) k[7] << 24); + /* fall through */ + case 7: + b += ((uint32) k[6] << 16); + /* fall through */ + case 6: + b += ((uint32) k[5] << 8); + /* fall through */ + case 5: + b += k[4]; + /* fall through */ + case 4: + a += ((uint32) k[3] << 24); + /* fall through */ + case 3: + a += ((uint32) k[2] << 16); + /* fall through */ + case 2: + a += ((uint32) k[1] << 8); + /* fall through */ + case 1: + a += k[0]; + /* case 0: nothing left to add */ + } +#endif /* WORDS_BIGENDIAN */ + } + + final(a, b, c); + + /* report the result */ + return ((uint64) b << 32) | c; +} + +/* + * hash_bytes_uint32() -- hash a 32-bit value to a 32-bit value + * + * This has the same result as + * hash_bytes(&k, sizeof(uint32)) + * but is faster and doesn't force the caller to store k into memory. + */ +uint32 +hash_bytes_uint32(uint32 k) +{ + uint32 a, + b, + c; + + a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095; + a += k; + + final(a, b, c); + + /* report the result */ + return c; +} + +/* + * hash_bytes_uint32_extended() -- hash 32-bit value to 64-bit value, with seed + * + * Like hash_bytes_uint32, this is a convenience function. + */ +uint64 +hash_bytes_uint32_extended(uint32 k, uint64 seed) +{ + uint32 a, + b, + c; + + a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095; + + if (seed != 0) + { + a += (uint32) (seed >> 32); + b += (uint32) seed; + mix(a, b, c); + } + + a += k; + + final(a, b, c); + + /* report the result */ + return ((uint64) b << 32) | c; +} + +/* + * string_hash: hash function for keys that are NUL-terminated strings. + * + * NOTE: this is the default hash function if none is specified. + */ +uint32 +string_hash(const void *key, Size keysize) +{ + /* + * If the string exceeds keysize-1 bytes, we want to hash only that many, + * because when it is copied into the hash table it will be truncated at + * that length. + */ + Size s_len = strlen((const char *) key); + + s_len = Min(s_len, keysize - 1); + return hash_bytes((const unsigned char *) key, (int) s_len); +} + +/* + * tag_hash: hash function for fixed-size tag values + */ +uint32 +tag_hash(const void *key, Size keysize) +{ + return hash_bytes((const unsigned char *) key, (int) keysize); +} + +/* + * uint32_hash: hash function for keys that are uint32 or int32 + * + * (tag_hash works for this case too, but is slower) + */ +uint32 +uint32_hash(const void *key, Size keysize) +{ + Assert(keysize == sizeof(uint32)); + return hash_bytes_uint32(*((const uint32 *) key)); +} diff --git a/src/common/hmac.c b/src/common/hmac.c new file mode 100644 index 0000000..f0b239d --- /dev/null +++ b/src/common/hmac.c @@ -0,0 +1,330 @@ +/*------------------------------------------------------------------------- + * + * hmac.c + * Implements Keyed-Hashing for Message Authentication (HMAC) + * + * Fallback implementation of HMAC, as specified in RFC 2104. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/hmac.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/cryptohash.h" +#include "common/hmac.h" +#include "common/md5.h" +#include "common/sha1.h" +#include "common/sha2.h" + +/* + * In backend, use palloc/pfree to ease the error handling. In frontend, + * use malloc to be able to return a failure status back to the caller. + */ +#ifndef FRONTEND +#define ALLOC(size) palloc(size) +#define FREE(ptr) pfree(ptr) +#else +#define ALLOC(size) malloc(size) +#define FREE(ptr) free(ptr) +#endif + +/* Set of error states */ +typedef enum pg_hmac_errno +{ + PG_HMAC_ERROR_NONE = 0, + PG_HMAC_ERROR_OOM, + PG_HMAC_ERROR_INTERNAL +} pg_hmac_errno; + +/* Internal pg_hmac_ctx structure */ +struct pg_hmac_ctx +{ + pg_cryptohash_ctx *hash; + pg_cryptohash_type type; + pg_hmac_errno error; + const char *errreason; + int block_size; + int digest_size; + + /* + * Use the largest block size among supported options. This wastes some + * memory but simplifies the allocation logic. + */ + uint8 k_ipad[PG_SHA512_BLOCK_LENGTH]; + uint8 k_opad[PG_SHA512_BLOCK_LENGTH]; +}; + +#define HMAC_IPAD 0x36 +#define HMAC_OPAD 0x5C + +/* + * pg_hmac_create + * + * Allocate a hash context. Returns NULL on failure for an OOM. The + * backend issues an error, without returning. + */ +pg_hmac_ctx * +pg_hmac_create(pg_cryptohash_type type) +{ + pg_hmac_ctx *ctx; + + ctx = ALLOC(sizeof(pg_hmac_ctx)); + if (ctx == NULL) + return NULL; + memset(ctx, 0, sizeof(pg_hmac_ctx)); + ctx->type = type; + ctx->error = PG_HMAC_ERROR_NONE; + ctx->errreason = NULL; + + /* + * Initialize the context data. This requires to know the digest and + * block lengths, that depend on the type of hash used. + */ + switch (type) + { + case PG_MD5: + ctx->digest_size = MD5_DIGEST_LENGTH; + ctx->block_size = MD5_BLOCK_SIZE; + break; + case PG_SHA1: + ctx->digest_size = SHA1_DIGEST_LENGTH; + ctx->block_size = SHA1_BLOCK_SIZE; + break; + case PG_SHA224: + ctx->digest_size = PG_SHA224_DIGEST_LENGTH; + ctx->block_size = PG_SHA224_BLOCK_LENGTH; + break; + case PG_SHA256: + ctx->digest_size = PG_SHA256_DIGEST_LENGTH; + ctx->block_size = PG_SHA256_BLOCK_LENGTH; + break; + case PG_SHA384: + ctx->digest_size = PG_SHA384_DIGEST_LENGTH; + ctx->block_size = PG_SHA384_BLOCK_LENGTH; + break; + case PG_SHA512: + ctx->digest_size = PG_SHA512_DIGEST_LENGTH; + ctx->block_size = PG_SHA512_BLOCK_LENGTH; + break; + } + + ctx->hash = pg_cryptohash_create(type); + if (ctx->hash == NULL) + { + explicit_bzero(ctx, sizeof(pg_hmac_ctx)); + FREE(ctx); + return NULL; + } + + return ctx; +} + +/* + * pg_hmac_init + * + * Initialize a HMAC context. Returns 0 on success, -1 on failure. + */ +int +pg_hmac_init(pg_hmac_ctx *ctx, const uint8 *key, size_t len) +{ + int i; + int digest_size; + int block_size; + uint8 *shrinkbuf = NULL; + + if (ctx == NULL) + return -1; + + digest_size = ctx->digest_size; + block_size = ctx->block_size; + + memset(ctx->k_opad, HMAC_OPAD, ctx->block_size); + memset(ctx->k_ipad, HMAC_IPAD, ctx->block_size); + + /* + * If the key is longer than the block size, pass it through the hash once + * to shrink it down. + */ + if (len > block_size) + { + pg_cryptohash_ctx *hash_ctx; + + /* temporary buffer for one-time shrink */ + shrinkbuf = ALLOC(digest_size); + if (shrinkbuf == NULL) + { + ctx->error = PG_HMAC_ERROR_OOM; + return -1; + } + memset(shrinkbuf, 0, digest_size); + + hash_ctx = pg_cryptohash_create(ctx->type); + if (hash_ctx == NULL) + { + ctx->error = PG_HMAC_ERROR_OOM; + FREE(shrinkbuf); + return -1; + } + + if (pg_cryptohash_init(hash_ctx) < 0 || + pg_cryptohash_update(hash_ctx, key, len) < 0 || + pg_cryptohash_final(hash_ctx, shrinkbuf, digest_size) < 0) + { + ctx->error = PG_HMAC_ERROR_INTERNAL; + ctx->errreason = pg_cryptohash_error(hash_ctx); + pg_cryptohash_free(hash_ctx); + FREE(shrinkbuf); + return -1; + } + + key = shrinkbuf; + len = digest_size; + pg_cryptohash_free(hash_ctx); + } + + for (i = 0; i < len; i++) + { + ctx->k_ipad[i] ^= key[i]; + ctx->k_opad[i] ^= key[i]; + } + + /* tmp = H(K XOR ipad, text) */ + if (pg_cryptohash_init(ctx->hash) < 0 || + pg_cryptohash_update(ctx->hash, ctx->k_ipad, ctx->block_size) < 0) + { + ctx->error = PG_HMAC_ERROR_INTERNAL; + ctx->errreason = pg_cryptohash_error(ctx->hash); + if (shrinkbuf) + FREE(shrinkbuf); + return -1; + } + + if (shrinkbuf) + FREE(shrinkbuf); + return 0; +} + +/* + * pg_hmac_update + * + * Update a HMAC context. Returns 0 on success, -1 on failure. + */ +int +pg_hmac_update(pg_hmac_ctx *ctx, const uint8 *data, size_t len) +{ + if (ctx == NULL) + return -1; + + if (pg_cryptohash_update(ctx->hash, data, len) < 0) + { + ctx->error = PG_HMAC_ERROR_INTERNAL; + ctx->errreason = pg_cryptohash_error(ctx->hash); + return -1; + } + + return 0; +} + +/* + * pg_hmac_final + * + * Finalize a HMAC context. Returns 0 on success, -1 on failure. + */ +int +pg_hmac_final(pg_hmac_ctx *ctx, uint8 *dest, size_t len) +{ + uint8 *h; + + if (ctx == NULL) + return -1; + + h = ALLOC(ctx->digest_size); + if (h == NULL) + { + ctx->error = PG_HMAC_ERROR_OOM; + return -1; + } + memset(h, 0, ctx->digest_size); + + if (pg_cryptohash_final(ctx->hash, h, ctx->digest_size) < 0) + { + ctx->error = PG_HMAC_ERROR_INTERNAL; + ctx->errreason = pg_cryptohash_error(ctx->hash); + FREE(h); + return -1; + } + + /* H(K XOR opad, tmp) */ + if (pg_cryptohash_init(ctx->hash) < 0 || + pg_cryptohash_update(ctx->hash, ctx->k_opad, ctx->block_size) < 0 || + pg_cryptohash_update(ctx->hash, h, ctx->digest_size) < 0 || + pg_cryptohash_final(ctx->hash, dest, len) < 0) + { + ctx->error = PG_HMAC_ERROR_INTERNAL; + ctx->errreason = pg_cryptohash_error(ctx->hash); + FREE(h); + return -1; + } + + FREE(h); + return 0; +} + +/* + * pg_hmac_free + * + * Free a HMAC context. + */ +void +pg_hmac_free(pg_hmac_ctx *ctx) +{ + if (ctx == NULL) + return; + + pg_cryptohash_free(ctx->hash); + explicit_bzero(ctx, sizeof(pg_hmac_ctx)); + FREE(ctx); +} + +/* + * pg_hmac_error + * + * Returns a static string providing details about an error that happened + * during a HMAC computation. + */ +const char * +pg_hmac_error(pg_hmac_ctx *ctx) +{ + if (ctx == NULL) + return _("out of memory"); + + /* + * If a reason is provided, rely on it, else fallback to any error code + * set. + */ + if (ctx->errreason) + return ctx->errreason; + + switch (ctx->error) + { + case PG_HMAC_ERROR_NONE: + return _("success"); + case PG_HMAC_ERROR_INTERNAL: + return _("internal error"); + case PG_HMAC_ERROR_OOM: + return _("out of memory"); + } + + Assert(false); /* cannot be reached */ + return _("success"); +} diff --git a/src/common/hmac_openssl.c b/src/common/hmac_openssl.c new file mode 100644 index 0000000..12be542 --- /dev/null +++ b/src/common/hmac_openssl.c @@ -0,0 +1,348 @@ +/*------------------------------------------------------------------------- + * + * hmac_openssl.c + * Implementation of HMAC with OpenSSL. + * + * This should only be used if code is compiled with OpenSSL support. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/hmac_openssl.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + + +#include <openssl/err.h> +#include <openssl/hmac.h> + +#include "common/hmac.h" +#include "common/md5.h" +#include "common/sha1.h" +#include "common/sha2.h" +#ifndef FRONTEND +#include "utils/memutils.h" +#include "utils/resowner.h" +#include "utils/resowner_private.h" +#endif + +/* + * In backend, use an allocation in TopMemoryContext to count for resowner + * cleanup handling if necessary. For versions of OpenSSL where HMAC_CTX is + * known, just use palloc(). In frontend, use malloc to be able to return + * a failure status back to the caller. + */ +#ifndef FRONTEND +#ifdef HAVE_HMAC_CTX_NEW +#define ALLOC(size) MemoryContextAlloc(TopMemoryContext, size) +#else +#define ALLOC(size) palloc(size) +#endif +#define FREE(ptr) pfree(ptr) +#else /* FRONTEND */ +#define ALLOC(size) malloc(size) +#define FREE(ptr) free(ptr) +#endif /* FRONTEND */ + +/* Set of error states */ +typedef enum pg_hmac_errno +{ + PG_HMAC_ERROR_NONE = 0, + PG_HMAC_ERROR_DEST_LEN, + PG_HMAC_ERROR_OPENSSL +} pg_hmac_errno; + +/* Internal pg_hmac_ctx structure */ +struct pg_hmac_ctx +{ + HMAC_CTX *hmacctx; + pg_cryptohash_type type; + pg_hmac_errno error; + const char *errreason; + +#ifndef FRONTEND + ResourceOwner resowner; +#endif +}; + +static const char * +SSLerrmessage(unsigned long ecode) +{ + if (ecode == 0) + return NULL; + + /* + * This may return NULL, but we would fall back to a default error path if + * that were the case. + */ + return ERR_reason_error_string(ecode); +} + +/* + * pg_hmac_create + * + * Allocate a hash context. Returns NULL on failure for an OOM. The + * backend issues an error, without returning. + */ +pg_hmac_ctx * +pg_hmac_create(pg_cryptohash_type type) +{ + pg_hmac_ctx *ctx; + + ctx = ALLOC(sizeof(pg_hmac_ctx)); + if (ctx == NULL) + return NULL; + memset(ctx, 0, sizeof(pg_hmac_ctx)); + + ctx->type = type; + ctx->error = PG_HMAC_ERROR_NONE; + ctx->errreason = NULL; + + + /* + * Initialization takes care of assigning the correct type for OpenSSL. + * Also ensure that there aren't any unconsumed errors in the queue from + * previous runs. + */ + ERR_clear_error(); +#ifdef HAVE_HMAC_CTX_NEW +#ifndef FRONTEND + ResourceOwnerEnlargeHMAC(CurrentResourceOwner); +#endif + ctx->hmacctx = HMAC_CTX_new(); +#else + ctx->hmacctx = ALLOC(sizeof(HMAC_CTX)); +#endif + + if (ctx->hmacctx == NULL) + { + explicit_bzero(ctx, sizeof(pg_hmac_ctx)); + FREE(ctx); +#ifndef FRONTEND + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); +#endif + return NULL; + } + +#ifdef HAVE_HMAC_CTX_NEW +#ifndef FRONTEND + ctx->resowner = CurrentResourceOwner; + ResourceOwnerRememberHMAC(CurrentResourceOwner, PointerGetDatum(ctx)); +#endif +#else + memset(ctx->hmacctx, 0, sizeof(HMAC_CTX)); +#endif /* HAVE_HMAC_CTX_NEW */ + + return ctx; +} + +/* + * pg_hmac_init + * + * Initialize a HMAC context. Returns 0 on success, -1 on failure. + */ +int +pg_hmac_init(pg_hmac_ctx *ctx, const uint8 *key, size_t len) +{ + int status = 0; + + if (ctx == NULL) + return -1; + + switch (ctx->type) + { + case PG_MD5: + status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_md5(), NULL); + break; + case PG_SHA1: + status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_sha1(), NULL); + break; + case PG_SHA224: + status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_sha224(), NULL); + break; + case PG_SHA256: + status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_sha256(), NULL); + break; + case PG_SHA384: + status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_sha384(), NULL); + break; + case PG_SHA512: + status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_sha512(), NULL); + break; + } + + /* OpenSSL internals return 1 on success, 0 on failure */ + if (status <= 0) + { + ctx->errreason = SSLerrmessage(ERR_get_error()); + ctx->error = PG_HMAC_ERROR_OPENSSL; + return -1; + } + + return 0; +} + +/* + * pg_hmac_update + * + * Update a HMAC context. Returns 0 on success, -1 on failure. + */ +int +pg_hmac_update(pg_hmac_ctx *ctx, const uint8 *data, size_t len) +{ + int status = 0; + + if (ctx == NULL) + return -1; + + status = HMAC_Update(ctx->hmacctx, data, len); + + /* OpenSSL internals return 1 on success, 0 on failure */ + if (status <= 0) + { + ctx->errreason = SSLerrmessage(ERR_get_error()); + ctx->error = PG_HMAC_ERROR_OPENSSL; + return -1; + } + return 0; +} + +/* + * pg_hmac_final + * + * Finalize a HMAC context. Returns 0 on success, -1 on failure. + */ +int +pg_hmac_final(pg_hmac_ctx *ctx, uint8 *dest, size_t len) +{ + int status = 0; + uint32 outlen; + + if (ctx == NULL) + return -1; + + switch (ctx->type) + { + case PG_MD5: + if (len < MD5_DIGEST_LENGTH) + { + ctx->error = PG_HMAC_ERROR_DEST_LEN; + return -1; + } + break; + case PG_SHA1: + if (len < SHA1_DIGEST_LENGTH) + { + ctx->error = PG_HMAC_ERROR_DEST_LEN; + return -1; + } + break; + case PG_SHA224: + if (len < PG_SHA224_DIGEST_LENGTH) + { + ctx->error = PG_HMAC_ERROR_DEST_LEN; + return -1; + } + break; + case PG_SHA256: + if (len < PG_SHA256_DIGEST_LENGTH) + { + ctx->error = PG_HMAC_ERROR_DEST_LEN; + return -1; + } + break; + case PG_SHA384: + if (len < PG_SHA384_DIGEST_LENGTH) + { + ctx->error = PG_HMAC_ERROR_DEST_LEN; + return -1; + } + break; + case PG_SHA512: + if (len < PG_SHA512_DIGEST_LENGTH) + { + ctx->error = PG_HMAC_ERROR_DEST_LEN; + return -1; + } + break; + } + + status = HMAC_Final(ctx->hmacctx, dest, &outlen); + + /* OpenSSL internals return 1 on success, 0 on failure */ + if (status <= 0) + { + ctx->errreason = SSLerrmessage(ERR_get_error()); + ctx->error = PG_HMAC_ERROR_OPENSSL; + return -1; + } + return 0; +} + +/* + * pg_hmac_free + * + * Free a HMAC context. + */ +void +pg_hmac_free(pg_hmac_ctx *ctx) +{ + if (ctx == NULL) + return; + +#ifdef HAVE_HMAC_CTX_FREE + HMAC_CTX_free(ctx->hmacctx); +#ifndef FRONTEND + ResourceOwnerForgetHMAC(ctx->resowner, PointerGetDatum(ctx)); +#endif +#else + explicit_bzero(ctx->hmacctx, sizeof(HMAC_CTX)); + FREE(ctx->hmacctx); +#endif + + explicit_bzero(ctx, sizeof(pg_hmac_ctx)); + FREE(ctx); +} + +/* + * pg_hmac_error + * + * Returns a static string providing details about an error that happened + * during a HMAC computation. + */ +const char * +pg_hmac_error(pg_hmac_ctx *ctx) +{ + if (ctx == NULL) + return _("out of memory"); + + /* + * If a reason is provided, rely on it, else fallback to any error code + * set. + */ + if (ctx->errreason) + return ctx->errreason; + + switch (ctx->error) + { + case PG_HMAC_ERROR_NONE: + return _("success"); + case PG_HMAC_ERROR_DEST_LEN: + return _("destination buffer too small"); + case PG_HMAC_ERROR_OPENSSL: + return _("OpenSSL failure"); + } + + Assert(false); /* cannot be reached */ + return _("success"); +} diff --git a/src/common/ip.c b/src/common/ip.c new file mode 100644 index 0000000..9baad3a --- /dev/null +++ b/src/common/ip.c @@ -0,0 +1,262 @@ +/*------------------------------------------------------------------------- + * + * ip.c + * IPv6-aware network access. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/ip.c + * + * This file and the IPV6 implementation were initially provided by + * Nigel Kukard <nkukard@lbsd.net>, Linux Based Systems Design + * http://www.lbsd.net. + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <unistd.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <netdb.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <arpa/inet.h> +#include <sys/file.h> + +#include "common/ip.h" + + + +static int getaddrinfo_unix(const char *path, + const struct addrinfo *hintsp, + struct addrinfo **result); + +static int getnameinfo_unix(const struct sockaddr_un *sa, int salen, + char *node, int nodelen, + char *service, int servicelen, + int flags); + + +/* + * pg_getaddrinfo_all - get address info for Unix, IPv4 and IPv6 sockets + */ +int +pg_getaddrinfo_all(const char *hostname, const char *servname, + const struct addrinfo *hintp, struct addrinfo **result) +{ + int rc; + + /* not all versions of getaddrinfo() zero *result on failure */ + *result = NULL; + + if (hintp->ai_family == AF_UNIX) + return getaddrinfo_unix(servname, hintp, result); + + /* NULL has special meaning to getaddrinfo(). */ + rc = getaddrinfo((!hostname || hostname[0] == '\0') ? NULL : hostname, + servname, hintp, result); + + return rc; +} + + +/* + * pg_freeaddrinfo_all - free addrinfo structures for IPv4, IPv6, or Unix + * + * Note: the ai_family field of the original hint structure must be passed + * so that we can tell whether the addrinfo struct was built by the system's + * getaddrinfo() routine or our own getaddrinfo_unix() routine. Some versions + * of getaddrinfo() might be willing to return AF_UNIX addresses, so it's + * not safe to look at ai_family in the addrinfo itself. + */ +void +pg_freeaddrinfo_all(int hint_ai_family, struct addrinfo *ai) +{ + if (hint_ai_family == AF_UNIX) + { + /* struct was built by getaddrinfo_unix (see pg_getaddrinfo_all) */ + while (ai != NULL) + { + struct addrinfo *p = ai; + + ai = ai->ai_next; + free(p->ai_addr); + free(p); + } + } + else + { + /* struct was built by getaddrinfo() */ + if (ai != NULL) + freeaddrinfo(ai); + } +} + + +/* + * pg_getnameinfo_all - get name info for Unix, IPv4 and IPv6 sockets + * + * The API of this routine differs from the standard getnameinfo() definition + * in two ways: first, the addr parameter is declared as sockaddr_storage + * rather than struct sockaddr, and second, the node and service fields are + * guaranteed to be filled with something even on failure return. + */ +int +pg_getnameinfo_all(const struct sockaddr_storage *addr, int salen, + char *node, int nodelen, + char *service, int servicelen, + int flags) +{ + int rc; + + if (addr && addr->ss_family == AF_UNIX) + rc = getnameinfo_unix((const struct sockaddr_un *) addr, salen, + node, nodelen, + service, servicelen, + flags); + else + rc = getnameinfo((const struct sockaddr *) addr, salen, + node, nodelen, + service, servicelen, + flags); + + if (rc != 0) + { + if (node) + strlcpy(node, "???", nodelen); + if (service) + strlcpy(service, "???", servicelen); + } + + return rc; +} + + +/* ------- + * getaddrinfo_unix - get unix socket info using IPv6-compatible API + * + * Bugs: only one addrinfo is set even though hintsp is NULL or + * ai_socktype is 0 + * AI_CANONNAME is not supported. + * ------- + */ +static int +getaddrinfo_unix(const char *path, const struct addrinfo *hintsp, + struct addrinfo **result) +{ + struct addrinfo hints = {0}; + struct addrinfo *aip; + struct sockaddr_un *unp; + + *result = NULL; + + if (strlen(path) >= sizeof(unp->sun_path)) + return EAI_FAIL; + + if (hintsp == NULL) + { + hints.ai_family = AF_UNIX; + hints.ai_socktype = SOCK_STREAM; + } + else + memcpy(&hints, hintsp, sizeof(hints)); + + if (hints.ai_socktype == 0) + hints.ai_socktype = SOCK_STREAM; + + if (hints.ai_family != AF_UNIX) + { + /* shouldn't have been called */ + return EAI_FAIL; + } + + aip = calloc(1, sizeof(struct addrinfo)); + if (aip == NULL) + return EAI_MEMORY; + + unp = calloc(1, sizeof(struct sockaddr_un)); + if (unp == NULL) + { + free(aip); + return EAI_MEMORY; + } + + aip->ai_family = AF_UNIX; + aip->ai_socktype = hints.ai_socktype; + aip->ai_protocol = hints.ai_protocol; + aip->ai_next = NULL; + aip->ai_canonname = NULL; + *result = aip; + + unp->sun_family = AF_UNIX; + aip->ai_addr = (struct sockaddr *) unp; + aip->ai_addrlen = sizeof(struct sockaddr_un); + + strcpy(unp->sun_path, path); + + /* + * If the supplied path starts with @, replace that with a zero byte for + * the internal representation. In that mode, the entire sun_path is the + * address, including trailing zero bytes. But we set the address length + * to only include the length of the original string. That way the + * trailing zero bytes won't show up in any network or socket lists of the + * operating system. This is just a convention, also followed by other + * packages. + */ + if (path[0] == '@') + { + unp->sun_path[0] = '\0'; + aip->ai_addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(path); + } + + return 0; +} + +/* + * Convert an address to a hostname. + */ +static int +getnameinfo_unix(const struct sockaddr_un *sa, int salen, + char *node, int nodelen, + char *service, int servicelen, + int flags) +{ + int ret; + + /* Invalid arguments. */ + if (sa == NULL || sa->sun_family != AF_UNIX || + (node == NULL && service == NULL)) + return EAI_FAIL; + + if (node) + { + ret = snprintf(node, nodelen, "%s", "[local]"); + if (ret < 0 || ret >= nodelen) + return EAI_MEMORY; + } + + if (service) + { + /* + * Check whether it looks like an abstract socket, but it could also + * just be an empty string. + */ + if (sa->sun_path[0] == '\0' && sa->sun_path[1] != '\0') + ret = snprintf(service, servicelen, "@%s", sa->sun_path + 1); + else + ret = snprintf(service, servicelen, "%s", sa->sun_path); + if (ret < 0 || ret >= servicelen) + return EAI_MEMORY; + } + + return 0; +} diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c new file mode 100644 index 0000000..2e86589 --- /dev/null +++ b/src/common/jsonapi.c @@ -0,0 +1,1206 @@ +/*------------------------------------------------------------------------- + * + * jsonapi.c + * JSON parser and lexer interfaces + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/jsonapi.c + * + *------------------------------------------------------------------------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/jsonapi.h" +#include "mb/pg_wchar.h" +#include "port/pg_lfind.h" + +#ifndef FRONTEND +#include "miscadmin.h" +#endif + +/* + * The context of the parser is maintained by the recursive descent + * mechanism, but is passed explicitly to the error reporting routine + * for better diagnostics. + */ +typedef enum /* contexts of JSON parser */ +{ + JSON_PARSE_VALUE, /* expecting a value */ + JSON_PARSE_STRING, /* expecting a string (for a field name) */ + JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */ + JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */ + JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */ + JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */ + JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */ + JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */ + JSON_PARSE_END /* saw the end of a document, expect nothing */ +} JsonParseContext; + +static inline JsonParseErrorType json_lex_string(JsonLexContext *lex); +static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, char *s, + bool *num_err, int *total_len); +static inline JsonParseErrorType parse_scalar(JsonLexContext *lex, JsonSemAction *sem); +static JsonParseErrorType parse_object_field(JsonLexContext *lex, JsonSemAction *sem); +static JsonParseErrorType parse_object(JsonLexContext *lex, JsonSemAction *sem); +static JsonParseErrorType parse_array_element(JsonLexContext *lex, JsonSemAction *sem); +static JsonParseErrorType parse_array(JsonLexContext *lex, JsonSemAction *sem); +static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex); + +/* the null action object used for pure validation */ +JsonSemAction nullSemAction = +{ + NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL +}; + +/* Recursive Descent parser support routines */ + +/* + * lex_peek + * + * what is the current look_ahead token? +*/ +static inline JsonTokenType +lex_peek(JsonLexContext *lex) +{ + return lex->token_type; +} + +/* + * lex_expect + * + * move the lexer to the next token if the current look_ahead token matches + * the parameter token. Otherwise, report an error. + */ +static inline JsonParseErrorType +lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token) +{ + if (lex_peek(lex) == token) + return json_lex(lex); + else + return report_parse_error(ctx, lex); +} + +/* chars to consider as part of an alphanumeric token */ +#define JSON_ALPHANUMERIC_CHAR(c) \ + (((c) >= 'a' && (c) <= 'z') || \ + ((c) >= 'A' && (c) <= 'Z') || \ + ((c) >= '0' && (c) <= '9') || \ + (c) == '_' || \ + IS_HIGHBIT_SET(c)) + +/* + * Utility function to check if a string is a valid JSON number. + * + * str is of length len, and need not be null-terminated. + */ +bool +IsValidJsonNumber(const char *str, int len) +{ + bool numeric_error; + int total_len; + JsonLexContext dummy_lex; + + if (len <= 0) + return false; + + /* + * json_lex_number expects a leading '-' to have been eaten already. + * + * having to cast away the constness of str is ugly, but there's not much + * easy alternative. + */ + if (*str == '-') + { + dummy_lex.input = unconstify(char *, str) + 1; + dummy_lex.input_length = len - 1; + } + else + { + dummy_lex.input = unconstify(char *, str); + dummy_lex.input_length = len; + } + + json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len); + + return (!numeric_error) && (total_len == dummy_lex.input_length); +} + +/* + * makeJsonLexContextCstringLen + * + * lex constructor, with or without StringInfo object for de-escaped lexemes. + * + * Without is better as it makes the processing faster, so only make one + * if really required. + */ +JsonLexContext * +makeJsonLexContextCstringLen(char *json, int len, int encoding, bool need_escapes) +{ + JsonLexContext *lex = palloc0(sizeof(JsonLexContext)); + + lex->input = lex->token_terminator = lex->line_start = json; + lex->line_number = 1; + lex->input_length = len; + lex->input_encoding = encoding; + if (need_escapes) + lex->strval = makeStringInfo(); + return lex; +} + +/* + * pg_parse_json + * + * Publicly visible entry point for the JSON parser. + * + * lex is a lexing context, set up for the json to be processed by calling + * makeJsonLexContext(). sem is a structure of function pointers to semantic + * action routines to be called at appropriate spots during parsing, and a + * pointer to a state object to be passed to those routines. + */ +JsonParseErrorType +pg_parse_json(JsonLexContext *lex, JsonSemAction *sem) +{ + JsonTokenType tok; + JsonParseErrorType result; + + /* get the initial token */ + result = json_lex(lex); + if (result != JSON_SUCCESS) + return result; + + tok = lex_peek(lex); + + /* parse by recursive descent */ + switch (tok) + { + case JSON_TOKEN_OBJECT_START: + result = parse_object(lex, sem); + break; + case JSON_TOKEN_ARRAY_START: + result = parse_array(lex, sem); + break; + default: + result = parse_scalar(lex, sem); /* json can be a bare scalar */ + } + + if (result == JSON_SUCCESS) + result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END); + + return result; +} + +/* + * json_count_array_elements + * + * Returns number of array elements in lex context at start of array token + * until end of array token at same nesting level. + * + * Designed to be called from array_start routines. + */ +JsonParseErrorType +json_count_array_elements(JsonLexContext *lex, int *elements) +{ + JsonLexContext copylex; + int count; + JsonParseErrorType result; + + /* + * It's safe to do this with a shallow copy because the lexical routines + * don't scribble on the input. They do scribble on the other pointers + * etc, so doing this with a copy makes that safe. + */ + memcpy(©lex, lex, sizeof(JsonLexContext)); + copylex.strval = NULL; /* not interested in values here */ + copylex.lex_level++; + + count = 0; + result = lex_expect(JSON_PARSE_ARRAY_START, ©lex, + JSON_TOKEN_ARRAY_START); + if (result != JSON_SUCCESS) + return result; + if (lex_peek(©lex) != JSON_TOKEN_ARRAY_END) + { + while (1) + { + count++; + result = parse_array_element(©lex, &nullSemAction); + if (result != JSON_SUCCESS) + return result; + if (copylex.token_type != JSON_TOKEN_COMMA) + break; + result = json_lex(©lex); + if (result != JSON_SUCCESS) + return result; + } + } + result = lex_expect(JSON_PARSE_ARRAY_NEXT, ©lex, + JSON_TOKEN_ARRAY_END); + if (result != JSON_SUCCESS) + return result; + + *elements = count; + return JSON_SUCCESS; +} + +/* + * Recursive Descent parse routines. There is one for each structural + * element in a json document: + * - scalar (string, number, true, false, null) + * - array ( [ ] ) + * - array element + * - object ( { } ) + * - object field + */ +static inline JsonParseErrorType +parse_scalar(JsonLexContext *lex, JsonSemAction *sem) +{ + char *val = NULL; + json_scalar_action sfunc = sem->scalar; + JsonTokenType tok = lex_peek(lex); + JsonParseErrorType result; + + /* a scalar must be a string, a number, true, false, or null */ + if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER && + tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE && + tok != JSON_TOKEN_NULL) + return report_parse_error(JSON_PARSE_VALUE, lex); + + /* if no semantic function, just consume the token */ + if (sfunc == NULL) + return json_lex(lex); + + /* extract the de-escaped string value, or the raw lexeme */ + if (lex_peek(lex) == JSON_TOKEN_STRING) + { + if (lex->strval != NULL) + val = pstrdup(lex->strval->data); + } + else + { + int len = (lex->token_terminator - lex->token_start); + + val = palloc(len + 1); + memcpy(val, lex->token_start, len); + val[len] = '\0'; + } + + /* consume the token */ + result = json_lex(lex); + if (result != JSON_SUCCESS) + return result; + + /* invoke the callback */ + result = (*sfunc) (sem->semstate, val, tok); + + return result; +} + +static JsonParseErrorType +parse_object_field(JsonLexContext *lex, JsonSemAction *sem) +{ + /* + * An object field is "fieldname" : value where value can be a scalar, + * object or array. Note: in user-facing docs and error messages, we + * generally call a field name a "key". + */ + + char *fname = NULL; /* keep compiler quiet */ + json_ofield_action ostart = sem->object_field_start; + json_ofield_action oend = sem->object_field_end; + bool isnull; + JsonTokenType tok; + JsonParseErrorType result; + + if (lex_peek(lex) != JSON_TOKEN_STRING) + return report_parse_error(JSON_PARSE_STRING, lex); + if ((ostart != NULL || oend != NULL) && lex->strval != NULL) + fname = pstrdup(lex->strval->data); + result = json_lex(lex); + if (result != JSON_SUCCESS) + return result; + + result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON); + if (result != JSON_SUCCESS) + return result; + + tok = lex_peek(lex); + isnull = tok == JSON_TOKEN_NULL; + + if (ostart != NULL) + { + result = (*ostart) (sem->semstate, fname, isnull); + if (result != JSON_SUCCESS) + return result; + } + + switch (tok) + { + case JSON_TOKEN_OBJECT_START: + result = parse_object(lex, sem); + break; + case JSON_TOKEN_ARRAY_START: + result = parse_array(lex, sem); + break; + default: + result = parse_scalar(lex, sem); + } + if (result != JSON_SUCCESS) + return result; + + if (oend != NULL) + { + result = (*oend) (sem->semstate, fname, isnull); + if (result != JSON_SUCCESS) + return result; + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +parse_object(JsonLexContext *lex, JsonSemAction *sem) +{ + /* + * an object is a possibly empty sequence of object fields, separated by + * commas and surrounded by curly braces. + */ + json_struct_action ostart = sem->object_start; + json_struct_action oend = sem->object_end; + JsonTokenType tok; + JsonParseErrorType result; + +#ifndef FRONTEND + check_stack_depth(); +#endif + + if (ostart != NULL) + { + result = (*ostart) (sem->semstate); + if (result != JSON_SUCCESS) + return result; + } + + /* + * Data inside an object is at a higher nesting level than the object + * itself. Note that we increment this after we call the semantic routine + * for the object start and restore it before we call the routine for the + * object end. + */ + lex->lex_level++; + + Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START); + result = json_lex(lex); + if (result != JSON_SUCCESS) + return result; + + tok = lex_peek(lex); + switch (tok) + { + case JSON_TOKEN_STRING: + result = parse_object_field(lex, sem); + while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA) + { + result = json_lex(lex); + if (result != JSON_SUCCESS) + break; + result = parse_object_field(lex, sem); + } + break; + case JSON_TOKEN_OBJECT_END: + break; + default: + /* case of an invalid initial token inside the object */ + result = report_parse_error(JSON_PARSE_OBJECT_START, lex); + } + if (result != JSON_SUCCESS) + return result; + + result = lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END); + if (result != JSON_SUCCESS) + return result; + + lex->lex_level--; + + if (oend != NULL) + { + result = (*oend) (sem->semstate); + if (result != JSON_SUCCESS) + return result; + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +parse_array_element(JsonLexContext *lex, JsonSemAction *sem) +{ + json_aelem_action astart = sem->array_element_start; + json_aelem_action aend = sem->array_element_end; + JsonTokenType tok = lex_peek(lex); + JsonParseErrorType result; + bool isnull; + + isnull = tok == JSON_TOKEN_NULL; + + if (astart != NULL) + { + result = (*astart) (sem->semstate, isnull); + if (result != JSON_SUCCESS) + return result; + } + + /* an array element is any object, array or scalar */ + switch (tok) + { + case JSON_TOKEN_OBJECT_START: + result = parse_object(lex, sem); + break; + case JSON_TOKEN_ARRAY_START: + result = parse_array(lex, sem); + break; + default: + result = parse_scalar(lex, sem); + } + + if (result != JSON_SUCCESS) + return result; + + if (aend != NULL) + { + result = (*aend) (sem->semstate, isnull); + if (result != JSON_SUCCESS) + return result; + } + + return JSON_SUCCESS; +} + +static JsonParseErrorType +parse_array(JsonLexContext *lex, JsonSemAction *sem) +{ + /* + * an array is a possibly empty sequence of array elements, separated by + * commas and surrounded by square brackets. + */ + json_struct_action astart = sem->array_start; + json_struct_action aend = sem->array_end; + JsonParseErrorType result; + +#ifndef FRONTEND + check_stack_depth(); +#endif + + if (astart != NULL) + { + result = (*astart) (sem->semstate); + if (result != JSON_SUCCESS) + return result; + } + + /* + * Data inside an array is at a higher nesting level than the array + * itself. Note that we increment this after we call the semantic routine + * for the array start and restore it before we call the routine for the + * array end. + */ + lex->lex_level++; + + result = lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START); + if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END) + { + result = parse_array_element(lex, sem); + + while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA) + { + result = json_lex(lex); + if (result != JSON_SUCCESS) + break; + result = parse_array_element(lex, sem); + } + } + if (result != JSON_SUCCESS) + return result; + + result = lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END); + if (result != JSON_SUCCESS) + return result; + + lex->lex_level--; + + if (aend != NULL) + { + result = (*aend) (sem->semstate); + if (result != JSON_SUCCESS) + return result; + } + + return JSON_SUCCESS; +} + +/* + * Lex one token from the input stream. + */ +JsonParseErrorType +json_lex(JsonLexContext *lex) +{ + char *s; + char *const end = lex->input + lex->input_length; + JsonParseErrorType result; + + /* Skip leading whitespace. */ + s = lex->token_terminator; + while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')) + { + if (*s++ == '\n') + { + ++lex->line_number; + lex->line_start = s; + } + } + lex->token_start = s; + + /* Determine token type. */ + if (s >= end) + { + lex->token_start = NULL; + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s; + lex->token_type = JSON_TOKEN_END; + } + else + { + switch (*s) + { + /* Single-character token, some kind of punctuation mark. */ + case '{': + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s + 1; + lex->token_type = JSON_TOKEN_OBJECT_START; + break; + case '}': + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s + 1; + lex->token_type = JSON_TOKEN_OBJECT_END; + break; + case '[': + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s + 1; + lex->token_type = JSON_TOKEN_ARRAY_START; + break; + case ']': + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s + 1; + lex->token_type = JSON_TOKEN_ARRAY_END; + break; + case ',': + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s + 1; + lex->token_type = JSON_TOKEN_COMMA; + break; + case ':': + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s + 1; + lex->token_type = JSON_TOKEN_COLON; + break; + case '"': + /* string */ + result = json_lex_string(lex); + if (result != JSON_SUCCESS) + return result; + lex->token_type = JSON_TOKEN_STRING; + break; + case '-': + /* Negative number. */ + result = json_lex_number(lex, s + 1, NULL, NULL); + if (result != JSON_SUCCESS) + return result; + lex->token_type = JSON_TOKEN_NUMBER; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + /* Positive number. */ + result = json_lex_number(lex, s, NULL, NULL); + if (result != JSON_SUCCESS) + return result; + lex->token_type = JSON_TOKEN_NUMBER; + break; + default: + { + char *p; + + /* + * We're not dealing with a string, number, legal + * punctuation mark, or end of string. The only legal + * tokens we might find here are true, false, and null, + * but for error reporting purposes we scan until we see a + * non-alphanumeric character. That way, we can report + * the whole word as an unexpected token, rather than just + * some unintuitive prefix thereof. + */ + for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++) + /* skip */ ; + + /* + * We got some sort of unexpected punctuation or an + * otherwise unexpected character, so just complain about + * that one character. + */ + if (p == s) + { + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s + 1; + return JSON_INVALID_TOKEN; + } + + /* + * We've got a real alphanumeric token here. If it + * happens to be true, false, or null, all is well. If + * not, error out. + */ + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = p; + if (p - s == 4) + { + if (memcmp(s, "true", 4) == 0) + lex->token_type = JSON_TOKEN_TRUE; + else if (memcmp(s, "null", 4) == 0) + lex->token_type = JSON_TOKEN_NULL; + else + return JSON_INVALID_TOKEN; + } + else if (p - s == 5 && memcmp(s, "false", 5) == 0) + lex->token_type = JSON_TOKEN_FALSE; + else + return JSON_INVALID_TOKEN; + } + } /* end of switch */ + } + + return JSON_SUCCESS; +} + +/* + * The next token in the input stream is known to be a string; lex it. + * + * If lex->strval isn't NULL, fill it with the decoded string. + * Set lex->token_terminator to the end of the decoded input, and in + * success cases, transfer its previous value to lex->prev_token_terminator. + * Return JSON_SUCCESS or an error code. + * + * Note: be careful that all error exits advance lex->token_terminator + * to the point after the character we detected the error on. + */ +static inline JsonParseErrorType +json_lex_string(JsonLexContext *lex) +{ + char *s; + char *const end = lex->input + lex->input_length; + int hi_surrogate = -1; + + /* Convenience macros for error exits */ +#define FAIL_AT_CHAR_START(code) \ + do { \ + lex->token_terminator = s; \ + return code; \ + } while (0) +#define FAIL_AT_CHAR_END(code) \ + do { \ + lex->token_terminator = \ + s + pg_encoding_mblen_bounded(lex->input_encoding, s); \ + return code; \ + } while (0) + + if (lex->strval != NULL) + resetStringInfo(lex->strval); + + Assert(lex->input_length > 0); + s = lex->token_start; + for (;;) + { + s++; + /* Premature end of the string. */ + if (s >= end) + FAIL_AT_CHAR_START(JSON_INVALID_TOKEN); + else if (*s == '"') + break; + else if (*s == '\\') + { + /* OK, we have an escape character. */ + s++; + if (s >= end) + FAIL_AT_CHAR_START(JSON_INVALID_TOKEN); + else if (*s == 'u') + { + int i; + int ch = 0; + + for (i = 1; i <= 4; i++) + { + s++; + if (s >= end) + FAIL_AT_CHAR_START(JSON_INVALID_TOKEN); + else if (*s >= '0' && *s <= '9') + ch = (ch * 16) + (*s - '0'); + else if (*s >= 'a' && *s <= 'f') + ch = (ch * 16) + (*s - 'a') + 10; + else if (*s >= 'A' && *s <= 'F') + ch = (ch * 16) + (*s - 'A') + 10; + else + FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT); + } + if (lex->strval != NULL) + { + /* + * Combine surrogate pairs. + */ + if (is_utf16_surrogate_first(ch)) + { + if (hi_surrogate != -1) + FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE); + hi_surrogate = ch; + continue; + } + else if (is_utf16_surrogate_second(ch)) + { + if (hi_surrogate == -1) + FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE); + ch = surrogate_pair_to_codepoint(hi_surrogate, ch); + hi_surrogate = -1; + } + + if (hi_surrogate != -1) + FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE); + + /* + * Reject invalid cases. We can't have a value above + * 0xFFFF here (since we only accepted 4 hex digits + * above), so no need to test for out-of-range chars. + */ + if (ch == 0) + { + /* We can't allow this, since our TEXT type doesn't */ + FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO); + } + + /* + * Add the represented character to lex->strval. In the + * backend, we can let pg_unicode_to_server_noerror() + * handle any required character set conversion; in + * frontend, we can only deal with trivial conversions. + */ +#ifndef FRONTEND + { + char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1]; + + if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf)) + FAIL_AT_CHAR_END(JSON_UNICODE_UNTRANSLATABLE); + appendStringInfoString(lex->strval, cbuf); + } +#else + if (lex->input_encoding == PG_UTF8) + { + /* OK, we can map the code point to UTF8 easily */ + char utf8str[5]; + int utf8len; + + unicode_to_utf8(ch, (unsigned char *) utf8str); + utf8len = pg_utf_mblen((unsigned char *) utf8str); + appendBinaryStringInfo(lex->strval, utf8str, utf8len); + } + else if (ch <= 0x007f) + { + /* The ASCII range is the same in all encodings */ + appendStringInfoChar(lex->strval, (char) ch); + } + else + FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE); +#endif /* FRONTEND */ + } + } + else if (lex->strval != NULL) + { + if (hi_surrogate != -1) + FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE); + + switch (*s) + { + case '"': + case '\\': + case '/': + appendStringInfoChar(lex->strval, *s); + break; + case 'b': + appendStringInfoChar(lex->strval, '\b'); + break; + case 'f': + appendStringInfoChar(lex->strval, '\f'); + break; + case 'n': + appendStringInfoChar(lex->strval, '\n'); + break; + case 'r': + appendStringInfoChar(lex->strval, '\r'); + break; + case 't': + appendStringInfoChar(lex->strval, '\t'); + break; + default: + + /* + * Not a valid string escape, so signal error. We + * adjust token_start so that just the escape sequence + * is reported, not the whole string. + */ + lex->token_start = s; + FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID); + } + } + else if (strchr("\"\\/bfnrt", *s) == NULL) + { + /* + * Simpler processing if we're not bothered about de-escaping + * + * It's very tempting to remove the strchr() call here and + * replace it with a switch statement, but testing so far has + * shown it's not a performance win. + */ + lex->token_start = s; + FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID); + } + } + else + { + char *p = s; + + if (hi_surrogate != -1) + FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE); + + /* + * Skip to the first byte that requires special handling, so we + * can batch calls to appendBinaryStringInfo. + */ + while (p < end - sizeof(Vector8) && + !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) && + !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) && + !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8))) + p += sizeof(Vector8); + + for (; p < end; p++) + { + if (*p == '\\' || *p == '"') + break; + else if ((unsigned char) *p <= 31) + { + /* Per RFC4627, these characters MUST be escaped. */ + /* + * Since *p isn't printable, exclude it from the context + * string + */ + lex->token_terminator = p; + return JSON_ESCAPING_REQUIRED; + } + } + + if (lex->strval != NULL) + appendBinaryStringInfo(lex->strval, s, p - s); + + /* + * s will be incremented at the top of the loop, so set it to just + * behind our lookahead position + */ + s = p - 1; + } + } + + if (hi_surrogate != -1) + { + lex->token_terminator = s + 1; + return JSON_UNICODE_LOW_SURROGATE; + } + + /* Hooray, we found the end of the string! */ + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s + 1; + return JSON_SUCCESS; + +#undef FAIL_AT_CHAR_START +#undef FAIL_AT_CHAR_END +} + +/* + * The next token in the input stream is known to be a number; lex it. + * + * In JSON, a number consists of four parts: + * + * (1) An optional minus sign ('-'). + * + * (2) Either a single '0', or a string of one or more digits that does not + * begin with a '0'. + * + * (3) An optional decimal part, consisting of a period ('.') followed by + * one or more digits. (Note: While this part can be omitted + * completely, it's not OK to have only the decimal point without + * any digits afterwards.) + * + * (4) An optional exponent part, consisting of 'e' or 'E', optionally + * followed by '+' or '-', followed by one or more digits. (Note: + * As with the decimal part, if 'e' or 'E' is present, it must be + * followed by at least one digit.) + * + * The 's' argument to this function points to the ostensible beginning + * of part 2 - i.e. the character after any optional minus sign, or the + * first character of the string if there is none. + * + * If num_err is not NULL, we return an error flag to *num_err rather than + * raising an error for a badly-formed number. Also, if total_len is not NULL + * the distance from lex->input to the token end+1 is returned to *total_len. + */ +static inline JsonParseErrorType +json_lex_number(JsonLexContext *lex, char *s, + bool *num_err, int *total_len) +{ + bool error = false; + int len = s - lex->input; + + /* Part (1): leading sign indicator. */ + /* Caller already did this for us; so do nothing. */ + + /* Part (2): parse main digit string. */ + if (len < lex->input_length && *s == '0') + { + s++; + len++; + } + else if (len < lex->input_length && *s >= '1' && *s <= '9') + { + do + { + s++; + len++; + } while (len < lex->input_length && *s >= '0' && *s <= '9'); + } + else + error = true; + + /* Part (3): parse optional decimal portion. */ + if (len < lex->input_length && *s == '.') + { + s++; + len++; + if (len == lex->input_length || *s < '0' || *s > '9') + error = true; + else + { + do + { + s++; + len++; + } while (len < lex->input_length && *s >= '0' && *s <= '9'); + } + } + + /* Part (4): parse optional exponent. */ + if (len < lex->input_length && (*s == 'e' || *s == 'E')) + { + s++; + len++; + if (len < lex->input_length && (*s == '+' || *s == '-')) + { + s++; + len++; + } + if (len == lex->input_length || *s < '0' || *s > '9') + error = true; + else + { + do + { + s++; + len++; + } while (len < lex->input_length && *s >= '0' && *s <= '9'); + } + } + + /* + * Check for trailing garbage. As in json_lex(), any alphanumeric stuff + * here should be considered part of the token for error-reporting + * purposes. + */ + for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++) + error = true; + + if (total_len != NULL) + *total_len = len; + + if (num_err != NULL) + { + /* let the caller handle any error */ + *num_err = error; + } + else + { + /* return token endpoint */ + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = s; + /* handle error if any */ + if (error) + return JSON_INVALID_TOKEN; + } + + return JSON_SUCCESS; +} + +/* + * Report a parse error. + * + * lex->token_start and lex->token_terminator must identify the current token. + */ +static JsonParseErrorType +report_parse_error(JsonParseContext ctx, JsonLexContext *lex) +{ + /* Handle case where the input ended prematurely. */ + if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END) + return JSON_EXPECTED_MORE; + + /* Otherwise choose the error type based on the parsing context. */ + switch (ctx) + { + case JSON_PARSE_END: + return JSON_EXPECTED_END; + case JSON_PARSE_VALUE: + return JSON_EXPECTED_JSON; + case JSON_PARSE_STRING: + return JSON_EXPECTED_STRING; + case JSON_PARSE_ARRAY_START: + return JSON_EXPECTED_ARRAY_FIRST; + case JSON_PARSE_ARRAY_NEXT: + return JSON_EXPECTED_ARRAY_NEXT; + case JSON_PARSE_OBJECT_START: + return JSON_EXPECTED_OBJECT_FIRST; + case JSON_PARSE_OBJECT_LABEL: + return JSON_EXPECTED_COLON; + case JSON_PARSE_OBJECT_NEXT: + return JSON_EXPECTED_OBJECT_NEXT; + case JSON_PARSE_OBJECT_COMMA: + return JSON_EXPECTED_STRING; + } + + /* + * We don't use a default: case, so that the compiler will warn about + * unhandled enum values. + */ + Assert(false); + return JSON_SUCCESS; /* silence stupider compilers */ +} + + +#ifndef FRONTEND +/* + * Extract the current token from a lexing context, for error reporting. + */ +static char * +extract_token(JsonLexContext *lex) +{ + int toklen = lex->token_terminator - lex->token_start; + char *token = palloc(toklen + 1); + + memcpy(token, lex->token_start, toklen); + token[toklen] = '\0'; + return token; +} + +/* + * Construct an (already translated) detail message for a JSON error. + * + * Note that the error message generated by this routine may not be + * palloc'd, making it unsafe for frontend code as there is no way to + * know if this can be safely pfree'd or not. + */ +char * +json_errdetail(JsonParseErrorType error, JsonLexContext *lex) +{ + switch (error) + { + case JSON_SUCCESS: + /* fall through to the error code after switch */ + break; + case JSON_ESCAPING_INVALID: + return psprintf(_("Escape sequence \"\\%s\" is invalid."), + extract_token(lex)); + case JSON_ESCAPING_REQUIRED: + return psprintf(_("Character with value 0x%02x must be escaped."), + (unsigned char) *(lex->token_terminator)); + case JSON_EXPECTED_END: + return psprintf(_("Expected end of input, but found \"%s\"."), + extract_token(lex)); + case JSON_EXPECTED_ARRAY_FIRST: + return psprintf(_("Expected array element or \"]\", but found \"%s\"."), + extract_token(lex)); + case JSON_EXPECTED_ARRAY_NEXT: + return psprintf(_("Expected \",\" or \"]\", but found \"%s\"."), + extract_token(lex)); + case JSON_EXPECTED_COLON: + return psprintf(_("Expected \":\", but found \"%s\"."), + extract_token(lex)); + case JSON_EXPECTED_JSON: + return psprintf(_("Expected JSON value, but found \"%s\"."), + extract_token(lex)); + case JSON_EXPECTED_MORE: + return _("The input string ended unexpectedly."); + case JSON_EXPECTED_OBJECT_FIRST: + return psprintf(_("Expected string or \"}\", but found \"%s\"."), + extract_token(lex)); + case JSON_EXPECTED_OBJECT_NEXT: + return psprintf(_("Expected \",\" or \"}\", but found \"%s\"."), + extract_token(lex)); + case JSON_EXPECTED_STRING: + return psprintf(_("Expected string, but found \"%s\"."), + extract_token(lex)); + case JSON_INVALID_TOKEN: + return psprintf(_("Token \"%s\" is invalid."), + extract_token(lex)); + case JSON_UNICODE_CODE_POINT_ZERO: + return _("\\u0000 cannot be converted to text."); + case JSON_UNICODE_ESCAPE_FORMAT: + return _("\"\\u\" must be followed by four hexadecimal digits."); + case JSON_UNICODE_HIGH_ESCAPE: + /* note: this case is only reachable in frontend not backend */ + return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8."); + case JSON_UNICODE_UNTRANSLATABLE: + /* note: this case is only reachable in backend not frontend */ + return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."), + GetDatabaseEncodingName()); + case JSON_UNICODE_HIGH_SURROGATE: + return _("Unicode high surrogate must not follow a high surrogate."); + case JSON_UNICODE_LOW_SURROGATE: + return _("Unicode low surrogate must follow a high surrogate."); + case JSON_SEM_ACTION_FAILED: + /* fall through to the error code after switch */ + break; + } + + /* + * We don't use a default: case, so that the compiler will warn about + * unhandled enum values. But this needs to be here anyway to cover the + * possibility of an incorrect input. + */ + elog(ERROR, "unexpected json parse error type: %d", (int) error); + return NULL; +} +#endif diff --git a/src/common/keywords.c b/src/common/keywords.c new file mode 100644 index 0000000..b72f0d5 --- /dev/null +++ b/src/common/keywords.c @@ -0,0 +1,48 @@ +/*------------------------------------------------------------------------- + * + * keywords.c + * PostgreSQL's list of SQL keywords + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/keywords.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "common/keywords.h" + + +/* ScanKeywordList lookup data for SQL keywords */ + +#include "kwlist_d.h" + +/* Keyword categories for SQL keywords */ + +#define PG_KEYWORD(kwname, value, category, collabel) category, + +const uint8 ScanKeywordCategories[SCANKEYWORDS_NUM_KEYWORDS] = { +#include "parser/kwlist.h" +}; + +#undef PG_KEYWORD + +/* Keyword can-be-bare-label flags for SQL keywords */ + +#define PG_KEYWORD(kwname, value, category, collabel) collabel, + +#define BARE_LABEL true +#define AS_LABEL false + +const bool ScanKeywordBareLabel[SCANKEYWORDS_NUM_KEYWORDS] = { +#include "parser/kwlist.h" +}; + +#undef PG_KEYWORD +#undef BARE_LABEL +#undef AS_LABEL diff --git a/src/common/kwlist_d.h b/src/common/kwlist_d.h new file mode 100644 index 0000000..e8af260 --- /dev/null +++ b/src/common/kwlist_d.h @@ -0,0 +1,1119 @@ +/*------------------------------------------------------------------------- + * + * kwlist_d.h + * List of keywords represented as a ScanKeywordList. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES + * ****************************** + * *** DO NOT EDIT THIS FILE! *** + * ****************************** + * + * It has been GENERATED by src/tools/gen_keywordlist.pl + * + *------------------------------------------------------------------------- + */ + +#ifndef KWLIST_D_H +#define KWLIST_D_H + +#include "common/kwlookup.h" + +static const char ScanKeywords_kw_string[] = + "abort\0" + "absent\0" + "absolute\0" + "access\0" + "action\0" + "add\0" + "admin\0" + "after\0" + "aggregate\0" + "all\0" + "also\0" + "alter\0" + "always\0" + "analyse\0" + "analyze\0" + "and\0" + "any\0" + "array\0" + "as\0" + "asc\0" + "asensitive\0" + "assertion\0" + "assignment\0" + "asymmetric\0" + "at\0" + "atomic\0" + "attach\0" + "attribute\0" + "authorization\0" + "backward\0" + "before\0" + "begin\0" + "between\0" + "bigint\0" + "binary\0" + "bit\0" + "boolean\0" + "both\0" + "breadth\0" + "by\0" + "cache\0" + "call\0" + "called\0" + "cascade\0" + "cascaded\0" + "case\0" + "cast\0" + "catalog\0" + "chain\0" + "char\0" + "character\0" + "characteristics\0" + "check\0" + "checkpoint\0" + "class\0" + "close\0" + "cluster\0" + "coalesce\0" + "collate\0" + "collation\0" + "column\0" + "columns\0" + "comment\0" + "comments\0" + "commit\0" + "committed\0" + "compression\0" + "concurrently\0" + "configuration\0" + "conflict\0" + "connection\0" + "constraint\0" + "constraints\0" + "content\0" + "continue\0" + "conversion\0" + "copy\0" + "cost\0" + "create\0" + "cross\0" + "csv\0" + "cube\0" + "current\0" + "current_catalog\0" + "current_date\0" + "current_role\0" + "current_schema\0" + "current_time\0" + "current_timestamp\0" + "current_user\0" + "cursor\0" + "cycle\0" + "data\0" + "database\0" + "day\0" + "deallocate\0" + "dec\0" + "decimal\0" + "declare\0" + "default\0" + "defaults\0" + "deferrable\0" + "deferred\0" + "definer\0" + "delete\0" + "delimiter\0" + "delimiters\0" + "depends\0" + "depth\0" + "desc\0" + "detach\0" + "dictionary\0" + "disable\0" + "discard\0" + "distinct\0" + "do\0" + "document\0" + "domain\0" + "double\0" + "drop\0" + "each\0" + "else\0" + "enable\0" + "encoding\0" + "encrypted\0" + "end\0" + "enum\0" + "escape\0" + "event\0" + "except\0" + "exclude\0" + "excluding\0" + "exclusive\0" + "execute\0" + "exists\0" + "explain\0" + "expression\0" + "extension\0" + "external\0" + "extract\0" + "false\0" + "family\0" + "fetch\0" + "filter\0" + "finalize\0" + "first\0" + "float\0" + "following\0" + "for\0" + "force\0" + "foreign\0" + "format\0" + "forward\0" + "freeze\0" + "from\0" + "full\0" + "function\0" + "functions\0" + "generated\0" + "global\0" + "grant\0" + "granted\0" + "greatest\0" + "group\0" + "grouping\0" + "groups\0" + "handler\0" + "having\0" + "header\0" + "hold\0" + "hour\0" + "identity\0" + "if\0" + "ilike\0" + "immediate\0" + "immutable\0" + "implicit\0" + "import\0" + "in\0" + "include\0" + "including\0" + "increment\0" + "indent\0" + "index\0" + "indexes\0" + "inherit\0" + "inherits\0" + "initially\0" + "inline\0" + "inner\0" + "inout\0" + "input\0" + "insensitive\0" + "insert\0" + "instead\0" + "int\0" + "integer\0" + "intersect\0" + "interval\0" + "into\0" + "invoker\0" + "is\0" + "isnull\0" + "isolation\0" + "join\0" + "json\0" + "json_array\0" + "json_arrayagg\0" + "json_object\0" + "json_objectagg\0" + "key\0" + "keys\0" + "label\0" + "language\0" + "large\0" + "last\0" + "lateral\0" + "leading\0" + "leakproof\0" + "least\0" + "left\0" + "level\0" + "like\0" + "limit\0" + "listen\0" + "load\0" + "local\0" + "localtime\0" + "localtimestamp\0" + "location\0" + "lock\0" + "locked\0" + "logged\0" + "mapping\0" + "match\0" + "matched\0" + "materialized\0" + "maxvalue\0" + "merge\0" + "method\0" + "minute\0" + "minvalue\0" + "mode\0" + "month\0" + "move\0" + "name\0" + "names\0" + "national\0" + "natural\0" + "nchar\0" + "new\0" + "next\0" + "nfc\0" + "nfd\0" + "nfkc\0" + "nfkd\0" + "no\0" + "none\0" + "normalize\0" + "normalized\0" + "not\0" + "nothing\0" + "notify\0" + "notnull\0" + "nowait\0" + "null\0" + "nullif\0" + "nulls\0" + "numeric\0" + "object\0" + "of\0" + "off\0" + "offset\0" + "oids\0" + "old\0" + "on\0" + "only\0" + "operator\0" + "option\0" + "options\0" + "or\0" + "order\0" + "ordinality\0" + "others\0" + "out\0" + "outer\0" + "over\0" + "overlaps\0" + "overlay\0" + "overriding\0" + "owned\0" + "owner\0" + "parallel\0" + "parameter\0" + "parser\0" + "partial\0" + "partition\0" + "passing\0" + "password\0" + "placing\0" + "plans\0" + "policy\0" + "position\0" + "preceding\0" + "precision\0" + "prepare\0" + "prepared\0" + "preserve\0" + "primary\0" + "prior\0" + "privileges\0" + "procedural\0" + "procedure\0" + "procedures\0" + "program\0" + "publication\0" + "quote\0" + "range\0" + "read\0" + "real\0" + "reassign\0" + "recheck\0" + "recursive\0" + "ref\0" + "references\0" + "referencing\0" + "refresh\0" + "reindex\0" + "relative\0" + "release\0" + "rename\0" + "repeatable\0" + "replace\0" + "replica\0" + "reset\0" + "restart\0" + "restrict\0" + "return\0" + "returning\0" + "returns\0" + "revoke\0" + "right\0" + "role\0" + "rollback\0" + "rollup\0" + "routine\0" + "routines\0" + "row\0" + "rows\0" + "rule\0" + "savepoint\0" + "scalar\0" + "schema\0" + "schemas\0" + "scroll\0" + "search\0" + "second\0" + "security\0" + "select\0" + "sequence\0" + "sequences\0" + "serializable\0" + "server\0" + "session\0" + "session_user\0" + "set\0" + "setof\0" + "sets\0" + "share\0" + "show\0" + "similar\0" + "simple\0" + "skip\0" + "smallint\0" + "snapshot\0" + "some\0" + "sql\0" + "stable\0" + "standalone\0" + "start\0" + "statement\0" + "statistics\0" + "stdin\0" + "stdout\0" + "storage\0" + "stored\0" + "strict\0" + "strip\0" + "subscription\0" + "substring\0" + "support\0" + "symmetric\0" + "sysid\0" + "system\0" + "system_user\0" + "table\0" + "tables\0" + "tablesample\0" + "tablespace\0" + "temp\0" + "template\0" + "temporary\0" + "text\0" + "then\0" + "ties\0" + "time\0" + "timestamp\0" + "to\0" + "trailing\0" + "transaction\0" + "transform\0" + "treat\0" + "trigger\0" + "trim\0" + "true\0" + "truncate\0" + "trusted\0" + "type\0" + "types\0" + "uescape\0" + "unbounded\0" + "uncommitted\0" + "unencrypted\0" + "union\0" + "unique\0" + "unknown\0" + "unlisten\0" + "unlogged\0" + "until\0" + "update\0" + "user\0" + "using\0" + "vacuum\0" + "valid\0" + "validate\0" + "validator\0" + "value\0" + "values\0" + "varchar\0" + "variadic\0" + "varying\0" + "verbose\0" + "version\0" + "view\0" + "views\0" + "volatile\0" + "when\0" + "where\0" + "whitespace\0" + "window\0" + "with\0" + "within\0" + "without\0" + "work\0" + "wrapper\0" + "write\0" + "xml\0" + "xmlattributes\0" + "xmlconcat\0" + "xmlelement\0" + "xmlexists\0" + "xmlforest\0" + "xmlnamespaces\0" + "xmlparse\0" + "xmlpi\0" + "xmlroot\0" + "xmlserialize\0" + "xmltable\0" + "year\0" + "yes\0" + "zone"; + +static const uint16 ScanKeywords_kw_offsets[] = { + 0, + 6, + 13, + 22, + 29, + 36, + 40, + 46, + 52, + 62, + 66, + 71, + 77, + 84, + 92, + 100, + 104, + 108, + 114, + 117, + 121, + 132, + 142, + 153, + 164, + 167, + 174, + 181, + 191, + 205, + 214, + 221, + 227, + 235, + 242, + 249, + 253, + 261, + 266, + 274, + 277, + 283, + 288, + 295, + 303, + 312, + 317, + 322, + 330, + 336, + 341, + 351, + 367, + 373, + 384, + 390, + 396, + 404, + 413, + 421, + 431, + 438, + 446, + 454, + 463, + 470, + 480, + 492, + 505, + 519, + 528, + 539, + 550, + 562, + 570, + 579, + 590, + 595, + 600, + 607, + 613, + 617, + 622, + 630, + 646, + 659, + 672, + 687, + 700, + 718, + 731, + 738, + 744, + 749, + 758, + 762, + 773, + 777, + 785, + 793, + 801, + 810, + 821, + 830, + 838, + 845, + 855, + 866, + 874, + 880, + 885, + 892, + 903, + 911, + 919, + 928, + 931, + 940, + 947, + 954, + 959, + 964, + 969, + 976, + 985, + 995, + 999, + 1004, + 1011, + 1017, + 1024, + 1032, + 1042, + 1052, + 1060, + 1067, + 1075, + 1086, + 1096, + 1105, + 1113, + 1119, + 1126, + 1132, + 1139, + 1148, + 1154, + 1160, + 1170, + 1174, + 1180, + 1188, + 1195, + 1203, + 1210, + 1215, + 1220, + 1229, + 1239, + 1249, + 1256, + 1262, + 1270, + 1279, + 1285, + 1294, + 1301, + 1309, + 1316, + 1323, + 1328, + 1333, + 1342, + 1345, + 1351, + 1361, + 1371, + 1380, + 1387, + 1390, + 1398, + 1408, + 1418, + 1425, + 1431, + 1439, + 1447, + 1456, + 1466, + 1473, + 1479, + 1485, + 1491, + 1503, + 1510, + 1518, + 1522, + 1530, + 1540, + 1549, + 1554, + 1562, + 1565, + 1572, + 1582, + 1587, + 1592, + 1603, + 1617, + 1629, + 1644, + 1648, + 1653, + 1659, + 1668, + 1674, + 1679, + 1687, + 1695, + 1705, + 1711, + 1716, + 1722, + 1727, + 1733, + 1740, + 1745, + 1751, + 1761, + 1776, + 1785, + 1790, + 1797, + 1804, + 1812, + 1818, + 1826, + 1839, + 1848, + 1854, + 1861, + 1868, + 1877, + 1882, + 1888, + 1893, + 1898, + 1904, + 1913, + 1921, + 1927, + 1931, + 1936, + 1940, + 1944, + 1949, + 1954, + 1957, + 1962, + 1972, + 1983, + 1987, + 1995, + 2002, + 2010, + 2017, + 2022, + 2029, + 2035, + 2043, + 2050, + 2053, + 2057, + 2064, + 2069, + 2073, + 2076, + 2081, + 2090, + 2097, + 2105, + 2108, + 2114, + 2125, + 2132, + 2136, + 2142, + 2147, + 2156, + 2164, + 2175, + 2181, + 2187, + 2196, + 2206, + 2213, + 2221, + 2231, + 2239, + 2248, + 2256, + 2262, + 2269, + 2278, + 2288, + 2298, + 2306, + 2315, + 2324, + 2332, + 2338, + 2349, + 2360, + 2370, + 2381, + 2389, + 2401, + 2407, + 2413, + 2418, + 2423, + 2432, + 2440, + 2450, + 2454, + 2465, + 2477, + 2485, + 2493, + 2502, + 2510, + 2517, + 2528, + 2536, + 2544, + 2550, + 2558, + 2567, + 2574, + 2584, + 2592, + 2599, + 2605, + 2610, + 2619, + 2626, + 2634, + 2643, + 2647, + 2652, + 2657, + 2667, + 2674, + 2681, + 2689, + 2696, + 2703, + 2710, + 2719, + 2726, + 2735, + 2745, + 2758, + 2765, + 2773, + 2786, + 2790, + 2796, + 2801, + 2807, + 2812, + 2820, + 2827, + 2832, + 2841, + 2850, + 2855, + 2859, + 2866, + 2877, + 2883, + 2893, + 2904, + 2910, + 2917, + 2925, + 2932, + 2939, + 2945, + 2958, + 2968, + 2976, + 2986, + 2992, + 2999, + 3011, + 3017, + 3024, + 3036, + 3047, + 3052, + 3061, + 3071, + 3076, + 3081, + 3086, + 3091, + 3101, + 3104, + 3113, + 3125, + 3135, + 3141, + 3149, + 3154, + 3159, + 3168, + 3176, + 3181, + 3187, + 3195, + 3205, + 3217, + 3229, + 3235, + 3242, + 3250, + 3259, + 3268, + 3274, + 3281, + 3286, + 3292, + 3299, + 3305, + 3314, + 3324, + 3330, + 3337, + 3345, + 3354, + 3362, + 3370, + 3378, + 3383, + 3389, + 3398, + 3403, + 3409, + 3420, + 3427, + 3432, + 3439, + 3447, + 3452, + 3460, + 3466, + 3470, + 3484, + 3494, + 3505, + 3515, + 3525, + 3539, + 3548, + 3554, + 3562, + 3575, + 3584, + 3589, + 3593, +}; + +#define SCANKEYWORDS_NUM_KEYWORDS 471 + +static int +ScanKeywords_hash_func(const void *key, size_t keylen) +{ + static const int16 h[943] = { + 543, -186, 201, 0, 32767, 32767, 32767, 32767, + 221, -207, 32767, 0, 135, 283, 32767, 454, + 14, 79, 32767, 32767, 77, 32767, 102, 160, + 0, 32767, 151, 32767, 30, 392, -322, 452, + 32767, 0, 32767, 0, 0, 32767, 32767, 32767, + 234, 32767, 0, 32767, 0, 631, 32767, 368, + 80, 0, 0, -115, 32767, 285, 32767, 423, + 0, 32767, 155, 229, 32767, 126, 291, 165, + -22, 400, 327, 32767, 32767, 32767, 32767, -399, + 0, 406, 32767, 210, 1102, -203, 32767, 32767, + 32767, -944, 0, -188, 32767, 32767, 0, 347, + 32767, 0, 559, 316, 133, 32767, 202, 32767, + 305, 0, 32767, -94, 32767, 0, 32767, -222, + 32767, 138, 32767, -52, 32767, 32767, 279, 69, + -136, 0, 32767, 32767, 189, 32767, 32767, 88, + 0, 32767, 32767, 274, 32767, 514, 769, 248, + 32767, 32767, 32767, 32767, 32767, 32767, 0, 81, + 8, -29, 32767, 32767, 32767, -174, 258, 0, + 465, 211, 32767, 0, -229, 32767, -191, 32767, + 1263, 48, 32767, 343, 0, 58, 0, 32767, + 32767, 855, 0, 415, 0, -217, 32767, 1195, + 32767, 32767, 166, 32767, 42, 262, -736, 0, + 32767, 32767, 418, 178, 122, 32767, 46, 32767, + 32767, 32767, 229, 443, 32767, 32767, 250, 32767, + -300, 0, 32767, 1153, 32767, 108, 32767, -462, + 266, 32767, 478, -220, 235, 32767, 32767, -127, + 32767, 32767, 32767, 427, -231, 156, 32767, 0, + 0, 148, -218, 142, 73, 420, 32767, 32767, + 523, 32767, -36, 32767, 32767, 467, 844, -415, + 32767, 32767, -148, 179, 361, 32767, 151, 0, + 0, 32767, 145, 32767, 248, 110, 29, 125, + 282, 32767, -36, 43, 32767, 1125, 32767, 530, + 251, 519, 191, 0, 32767, -34, -502, 313, + 462, 845, 32767, 32767, -255, 412, 32767, 78, + 0, 32767, 444, 161, 0, 32767, 308, 32767, + -273, 400, 32767, 296, 32767, 32767, 72, 32767, + 32767, 34, 32767, 364, 151, -63, 4, 229, + 0, -276, 32767, 32767, 32767, 32767, -406, 32767, + 203, 32767, 140, 187, 160, 32767, 286, 0, + 32767, 32767, -88, 0, 100, -361, 32767, 9, + 0, -456, 32767, -37, -404, 32767, -969, 32767, + 371, 95, 0, 703, -31, 263, 373, -745, + 507, 14, 32767, -159, 0, 32767, 47, 299, + -126, 0, 32767, 83, 32767, 32767, 420, 236, + 32767, 32767, 0, 310, 89, 233, 32767, 93, + 32767, 0, 816, 60, 301, 211, 193, 0, + 452, -107, -403, -242, 353, 18, 32767, 32767, + 32767, 243, 104, 32767, 32767, 32767, -305, 32767, + -1048, 54, 0, 383, 32767, 32767, 32767, 226, + 319, 0, 32767, 32767, 32767, -130, 537, 32767, + 0, -206, 240, 696, 121, 32767, 180, 164, + 32767, 390, 185, 32767, 220, 545, 29, 32767, + 0, 32767, 32767, 1120, -163, 32767, 32767, 32767, + -368, 136, 445, 171, 233, 32767, 73, 32767, + 92, 32767, 0, 32767, 0, 208, 354, 32767, + 54, 32767, 32767, -246, -93, 389, 32767, 32767, + 32767, 32767, 50, 32767, 32767, 308, 32767, -278, + 0, 32767, 32767, -1172, 32767, 8, 32767, 0, + 32767, 341, 304, 242, -174, -92, 76, 419, + 32767, 87, 32767, -262, 32767, 32767, 32767, 109, + 200, 0, 32767, 0, 85, 530, 32767, -316, + 32767, 0, -286, 32767, 193, 268, 32767, 32767, + 278, 32767, 32767, 155, 445, 95, -310, 32767, + 207, -56, 32767, 32767, 0, -127, 232, -283, + 103, 32767, 1, 0, 32767, 32767, -485, 350, + 79, -56, -354, 32767, 121, 24, 81, 20, + 325, 40, 248, 32767, 32767, 32767, 358, 32767, + -56, 32767, 0, 174, -28, -301, -92, 32767, + 114, 295, 32767, 363, -355, 32767, 290, 0, + 32767, 32767, 32767, 122, 55, -142, 32767, 50, + 32767, 32767, 152, 571, 1397, 0, 472, -448, + 185, 140, 228, 435, 0, 32767, 32767, 414, + 32767, 379, 92, 185, 23, 299, 32767, 32767, + 0, 32767, 32767, 32767, 306, 439, -198, 219, + 340, 32767, 416, 0, -123, 377, 32767, 32767, + 0, 32767, 670, -670, 339, 32767, 32767, 32767, + 0, -256, 70, 514, 331, 0, 302, 469, + 0, 370, 32767, 32767, 42, 255, 212, 0, + 322, 277, 32767, -163, 32767, 216, 32767, 32767, + 0, 32767, 190, 32767, 32767, 0, 32767, 0, + -409, 1366, 32767, 32767, 32767, 193, 32767, 325, + 32767, 0, 142, 466, 32767, 32767, 32767, 113, + 32767, 32767, 62, 0, -62, 113, -90, 34, + -256, 32767, 32767, -936, 32767, 32767, 32767, 0, + -64, 0, -34, 451, 290, 108, 32767, 276, + 842, 0, 556, -153, 32767, 412, -168, 32767, + 32767, 1331, 407, 234, -60, 115, 457, -73, + 502, 772, 32767, 33, 404, -925, 32767, 32767, + 421, -123, 32767, 32767, 32767, 0, 0, 32767, + 32767, 32767, 429, 0, 3, 769, -81, 306, + 64, 32767, 192, 96, 0, 63, 44, 32767, + 32767, 32767, 32767, 0, 284, 32767, 575, 32767, + 32767, 12, 32767, 516, 116, 32767, 32767, 150, + 442, 134, 32767, 198, -45, 249, 40, 373, + 32767, 0, 32767, 32767, 0, 0, 352, 32767, + 117, 32767, 426, 0, 0, 32767, 32767, 32767, + 32767, -92, 32767, -442, 32767, 269, 32767, 32767, + 32767, 429, 32767, 0, 32767, 0, 143, 32767, + 508, -66, 32767, 280, 32767, 39, 162, 32767, + 32767, 0, 32767, 31, 32767, 32767, 32767, 0, + 32767, 257, -90, -249, 224, 272, 32767, 32767, + 313, -467, 214, 0, -85, 32767, 48, 0, + 32767, -336, 202, 0, 447, 90, 264, 32767, + 32767, 0, 101, 32767, 32767, 32767, 0, 32767, + 32767, 227, -1093, 32767, 0, 32767, 27, 174, + 32767, 7, 32767, -621, 146, 32767, 32767, 32767, + 854, 0, 32767, 161, 0, 137, 32767, 32767, + 32767, 32767, 0, 391, 219, 276, 32767, 168, + 32767, 32767, 0, 32767, 32767, 32767, 1, -4, + 32767, 0, 293, 0, 374, 256, 0, 0, + 32767, 355, 212, 404, 0, 186, 32767, 0, + 359, 32767, 32767, 172, 32767, 32767, -131, 0, + 402, 0, 56, 32767, 462, 389, 82, 0, + 32767, 0, 32767, 0, 32767, 32767, 32767, 32767, + 106, 425, -160, 31, 32767, 55, 0, 0, + 32767, 32767, 430, 1224, 179, -179, 0, 397, + 32767, 0, 0, 0, -60, 47, 32767, 396, + 32767, 326, 383, 369, 32767, 368, 32767 + }; + + const unsigned char *k = (const unsigned char *) key; + uint32 a = 0; + uint32 b = 0; + + while (keylen--) + { + unsigned char c = *k++ | 0x20; + + a = a * 257 + c; + b = b * 31 + c; + } + return h[a % 943] + h[b % 943]; +} + +const ScanKeywordList ScanKeywords = { + ScanKeywords_kw_string, + ScanKeywords_kw_offsets, + ScanKeywords_hash_func, + SCANKEYWORDS_NUM_KEYWORDS, + 17 +}; + +#endif /* KWLIST_D_H */ diff --git a/src/common/kwlookup.c b/src/common/kwlookup.c new file mode 100644 index 0000000..7e49825 --- /dev/null +++ b/src/common/kwlookup.c @@ -0,0 +1,85 @@ +/*------------------------------------------------------------------------- + * + * kwlookup.c + * Key word lookup for PostgreSQL + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/kwlookup.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "common/kwlookup.h" + + +/* + * ScanKeywordLookup - see if a given word is a keyword + * + * The list of keywords to be matched against is passed as a ScanKeywordList. + * + * Returns the keyword number (0..N-1) of the keyword, or -1 if no match. + * Callers typically use the keyword number to index into information + * arrays, but that is no concern of this code. + * + * The match is done case-insensitively. Note that we deliberately use a + * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z', + * even if we are in a locale where tolower() would produce more or different + * translations. This is to conform to the SQL99 spec, which says that + * keywords are to be matched in this way even though non-keyword identifiers + * receive a different case-normalization mapping. + */ +int +ScanKeywordLookup(const char *str, + const ScanKeywordList *keywords) +{ + size_t len; + int h; + const char *kw; + + /* + * Reject immediately if too long to be any keyword. This saves useless + * hashing and downcasing work on long strings. + */ + len = strlen(str); + if (len > keywords->max_kw_len) + return -1; + + /* + * Compute the hash function. We assume it was generated to produce + * case-insensitive results. Since it's a perfect hash, we need only + * match to the specific keyword it identifies. + */ + h = keywords->hash(str, len); + + /* An out-of-range result implies no match */ + if (h < 0 || h >= keywords->num_keywords) + return -1; + + /* + * Compare character-by-character to see if we have a match, applying an + * ASCII-only downcasing to the input characters. We must not use + * tolower() since it may produce the wrong translation in some locales + * (eg, Turkish). + */ + kw = GetScanKeyword(h, keywords); + while (*str != '\0') + { + char ch = *str++; + + if (ch >= 'A' && ch <= 'Z') + ch += 'a' - 'A'; + if (ch != *kw++) + return -1; + } + if (*kw != '\0') + return -1; + + /* Success! */ + return h; +} diff --git a/src/common/link-canary.c b/src/common/link-canary.c new file mode 100644 index 0000000..f84331a --- /dev/null +++ b/src/common/link-canary.c @@ -0,0 +1,36 @@ +/*------------------------------------------------------------------------- + * link-canary.c + * Detect whether src/common functions came from frontend or backend. + * + * Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/link-canary.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "common/link-canary.h" + +/* + * This function just reports whether this file was compiled for frontend + * or backend environment. We need this because in some systems, mainly + * ELF-based platforms, it is possible for a shlib (such as libpq) loaded + * into the backend to call a backend function named XYZ in preference to + * the shlib's own function XYZ. That's bad if the two functions don't + * act identically. This exact situation comes up for many functions in + * src/common and src/port, where the same function names exist in both + * libpq and the backend but they don't act quite identically. To verify + * that appropriate measures have been taken to prevent incorrect symbol + * resolution, libpq should test that this function returns true. + */ +bool +pg_link_canary_is_frontend(void) +{ +#ifdef FRONTEND + return true; +#else + return false; +#endif +} diff --git a/src/common/logging.c b/src/common/logging.c new file mode 100644 index 0000000..dab718b --- /dev/null +++ b/src/common/logging.c @@ -0,0 +1,334 @@ +/*------------------------------------------------------------------------- + * Logging framework for frontend programs + * + * Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * src/common/logging.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#error "This file is not expected to be compiled for backend code" +#endif + +#include "postgres_fe.h" + +#include <unistd.h> + +#include "common/logging.h" + +enum pg_log_level __pg_log_level; + +static const char *progname; +static int log_flags; + +static void (*log_pre_callback) (void); +static void (*log_locus_callback) (const char **, uint64 *); + +static const char *sgr_error = NULL; +static const char *sgr_warning = NULL; +static const char *sgr_note = NULL; +static const char *sgr_locus = NULL; + +#define SGR_ERROR_DEFAULT "01;31" +#define SGR_WARNING_DEFAULT "01;35" +#define SGR_NOTE_DEFAULT "01;36" +#define SGR_LOCUS_DEFAULT "01" + +#define ANSI_ESCAPE_FMT "\x1b[%sm" +#define ANSI_ESCAPE_RESET "\x1b[0m" + +#ifdef WIN32 + +#ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING +#define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x0004 +#endif + +/* + * Attempt to enable VT100 sequence processing for colorization on Windows. + * If current environment is not VT100-compatible or if this mode could not + * be enabled, return false. + */ +static bool +enable_vt_processing(void) +{ + /* Check stderr */ + HANDLE hOut = GetStdHandle(STD_ERROR_HANDLE); + DWORD dwMode = 0; + + if (hOut == INVALID_HANDLE_VALUE) + return false; + + /* + * Look for the current console settings and check if VT100 is already + * enabled. + */ + if (!GetConsoleMode(hOut, &dwMode)) + return false; + if ((dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING) != 0) + return true; + + dwMode |= ENABLE_VIRTUAL_TERMINAL_PROCESSING; + if (!SetConsoleMode(hOut, dwMode)) + return false; + return true; +} +#endif /* WIN32 */ + +/* + * This should be called before any output happens. + */ +void +pg_logging_init(const char *argv0) +{ + const char *pg_color_env = getenv("PG_COLOR"); + bool log_color = false; + bool color_terminal = isatty(fileno(stderr)); + +#ifdef WIN32 + + /* + * On Windows, check if environment is VT100-compatible if using a + * terminal. + */ + if (color_terminal) + color_terminal = enable_vt_processing(); +#endif + + /* usually the default, but not on Windows */ + setvbuf(stderr, NULL, _IONBF, 0); + + progname = get_progname(argv0); + __pg_log_level = PG_LOG_INFO; + + if (pg_color_env) + { + if (strcmp(pg_color_env, "always") == 0 || + (strcmp(pg_color_env, "auto") == 0 && color_terminal)) + log_color = true; + } + + if (log_color) + { + const char *pg_colors_env = getenv("PG_COLORS"); + + if (pg_colors_env) + { + char *colors = strdup(pg_colors_env); + + if (colors) + { + for (char *token = strtok(colors, ":"); token; token = strtok(NULL, ":")) + { + char *e = strchr(token, '='); + + if (e) + { + char *name; + char *value; + + *e = '\0'; + name = token; + value = e + 1; + + if (strcmp(name, "error") == 0) + sgr_error = strdup(value); + if (strcmp(name, "warning") == 0) + sgr_warning = strdup(value); + if (strcmp(name, "note") == 0) + sgr_note = strdup(value); + if (strcmp(name, "locus") == 0) + sgr_locus = strdup(value); + } + } + + free(colors); + } + } + else + { + sgr_error = SGR_ERROR_DEFAULT; + sgr_warning = SGR_WARNING_DEFAULT; + sgr_note = SGR_NOTE_DEFAULT; + sgr_locus = SGR_LOCUS_DEFAULT; + } + } +} + +/* + * Change the logging flags. + */ +void +pg_logging_config(int new_flags) +{ + log_flags = new_flags; +} + +/* + * pg_logging_init sets the default log level to INFO. Programs that prefer + * a different default should use this to set it, immediately afterward. + */ +void +pg_logging_set_level(enum pg_log_level new_level) +{ + __pg_log_level = new_level; +} + +/* + * Command line switches such as --verbose should invoke this. + */ +void +pg_logging_increase_verbosity(void) +{ + /* + * The enum values are chosen such that we have to decrease __pg_log_level + * in order to become more verbose. + */ + if (__pg_log_level > PG_LOG_NOTSET + 1) + __pg_log_level--; +} + +void +pg_logging_set_pre_callback(void (*cb) (void)) +{ + log_pre_callback = cb; +} + +void +pg_logging_set_locus_callback(void (*cb) (const char **filename, uint64 *lineno)) +{ + log_locus_callback = cb; +} + +void +pg_log_generic(enum pg_log_level level, enum pg_log_part part, + const char *pg_restrict fmt,...) +{ + va_list ap; + + va_start(ap, fmt); + pg_log_generic_v(level, part, fmt, ap); + va_end(ap); +} + +void +pg_log_generic_v(enum pg_log_level level, enum pg_log_part part, + const char *pg_restrict fmt, va_list ap) +{ + int save_errno = errno; + const char *filename = NULL; + uint64 lineno = 0; + va_list ap2; + size_t required_len; + char *buf; + + Assert(progname); + Assert(level); + Assert(fmt); + Assert(fmt[strlen(fmt) - 1] != '\n'); + + /* Do nothing if log level is too low. */ + if (level < __pg_log_level) + return; + + /* + * Flush stdout before output to stderr, to ensure sync even when stdout + * is buffered. + */ + fflush(stdout); + + if (log_pre_callback) + log_pre_callback(); + + if (log_locus_callback) + log_locus_callback(&filename, &lineno); + + fmt = _(fmt); + + if (!(log_flags & PG_LOG_FLAG_TERSE) || filename) + { + if (sgr_locus) + fprintf(stderr, ANSI_ESCAPE_FMT, sgr_locus); + if (!(log_flags & PG_LOG_FLAG_TERSE)) + fprintf(stderr, "%s:", progname); + if (filename) + { + fprintf(stderr, "%s:", filename); + if (lineno > 0) + fprintf(stderr, UINT64_FORMAT ":", lineno); + } + fprintf(stderr, " "); + if (sgr_locus) + fprintf(stderr, ANSI_ESCAPE_RESET); + } + + if (!(log_flags & PG_LOG_FLAG_TERSE)) + { + switch (part) + { + case PG_LOG_PRIMARY: + switch (level) + { + case PG_LOG_ERROR: + if (sgr_error) + fprintf(stderr, ANSI_ESCAPE_FMT, sgr_error); + fprintf(stderr, _("error: ")); + if (sgr_error) + fprintf(stderr, ANSI_ESCAPE_RESET); + break; + case PG_LOG_WARNING: + if (sgr_warning) + fprintf(stderr, ANSI_ESCAPE_FMT, sgr_warning); + fprintf(stderr, _("warning: ")); + if (sgr_warning) + fprintf(stderr, ANSI_ESCAPE_RESET); + break; + default: + break; + } + break; + case PG_LOG_DETAIL: + if (sgr_note) + fprintf(stderr, ANSI_ESCAPE_FMT, sgr_note); + fprintf(stderr, _("detail: ")); + if (sgr_note) + fprintf(stderr, ANSI_ESCAPE_RESET); + break; + case PG_LOG_HINT: + if (sgr_note) + fprintf(stderr, ANSI_ESCAPE_FMT, sgr_note); + fprintf(stderr, _("hint: ")); + if (sgr_note) + fprintf(stderr, ANSI_ESCAPE_RESET); + break; + } + } + + errno = save_errno; + + va_copy(ap2, ap); + required_len = vsnprintf(NULL, 0, fmt, ap2) + 1; + va_end(ap2); + + buf = pg_malloc_extended(required_len, MCXT_ALLOC_NO_OOM); + + errno = save_errno; /* malloc might change errno */ + + if (!buf) + { + /* memory trouble, just print what we can and get out of here */ + vfprintf(stderr, fmt, ap); + return; + } + + vsnprintf(buf, required_len, fmt, ap); + + /* strip one newline, for PQerrorMessage() */ + if (required_len >= 2 && buf[required_len - 2] == '\n') + buf[required_len - 2] = '\0'; + + fprintf(stderr, "%s\n", buf); + + free(buf); +} diff --git a/src/common/md5.c b/src/common/md5.c new file mode 100644 index 0000000..bf56311 --- /dev/null +++ b/src/common/md5.c @@ -0,0 +1,439 @@ +/*------------------------------------------------------------------------- + * + * md5.c + * Implements the MD5 Message-Digest Algorithm + * + * Fallback implementation of MD5, as specified in RFC 1321. This + * implementation is a simple one, in that it needs every input byte + * to be buffered before doing any calculations. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/md5.c + * + *------------------------------------------------------------------------- + */ + +/* $KAME: md5.c,v 1.3 2000/02/22 14:01:17 itojun Exp $ */ + +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "md5_int.h" + +#define SHIFT(X, s) (((X) << (s)) | ((X) >> (32 - (s)))) + +#define F(X, Y, Z) (((X) & (Y)) | ((~X) & (Z))) +#define G(X, Y, Z) (((X) & (Z)) | ((Y) & (~Z))) +#define H(X, Y, Z) ((X) ^ (Y) ^ (Z)) +#define I(X, Y, Z) ((Y) ^ ((X) | (~Z))) + +#define ROUND1(a, b, c, d, k, s, i) \ +do { \ + (a) = (a) + F((b), (c), (d)) + X[(k)] + T[(i)]; \ + (a) = SHIFT((a), (s)); \ + (a) = (b) + (a); \ +} while (0) + +#define ROUND2(a, b, c, d, k, s, i) \ +do { \ + (a) = (a) + G((b), (c), (d)) + X[(k)] + T[(i)]; \ + (a) = SHIFT((a), (s)); \ + (a) = (b) + (a); \ +} while (0) + +#define ROUND3(a, b, c, d, k, s, i) \ +do { \ + (a) = (a) + H((b), (c), (d)) + X[(k)] + T[(i)]; \ + (a) = SHIFT((a), (s)); \ + (a) = (b) + (a); \ +} while (0) + +#define ROUND4(a, b, c, d, k, s, i) \ +do { \ + (a) = (a) + I((b), (c), (d)) + X[(k)] + T[(i)]; \ + (a) = SHIFT((a), (s)); \ + (a) = (b) + (a); \ +} while (0) + +#define Sa 7 +#define Sb 12 +#define Sc 17 +#define Sd 22 + +#define Se 5 +#define Sf 9 +#define Sg 14 +#define Sh 20 + +#define Si 4 +#define Sj 11 +#define Sk 16 +#define Sl 23 + +#define Sm 6 +#define Sn 10 +#define So 15 +#define Sp 21 + +#define MD5_A0 0x67452301 +#define MD5_B0 0xefcdab89 +#define MD5_C0 0x98badcfe +#define MD5_D0 0x10325476 + +/* Integer part of 4294967296 times abs(sin(i)), where i is in radians. */ +static const uint32 T[65] = { + 0, + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, + 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, + 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, + 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, + + 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, + 0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8, + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, + 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, + + 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, + 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, + 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05, + 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, + 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, + 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, + 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, +}; + +static const uint8 md5_paddat[MD5_BUFLEN] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; + +#ifdef WORDS_BIGENDIAN +static uint32 X[16]; +#endif + +static void +md5_calc(const uint8 *b64, pg_md5_ctx *ctx) +{ + uint32 A = ctx->md5_sta; + uint32 B = ctx->md5_stb; + uint32 C = ctx->md5_stc; + uint32 D = ctx->md5_std; + +#ifndef WORDS_BIGENDIAN + const uint32 *X = (const uint32 *) b64; +#else + /* 4 byte words */ + /* what a brute force but fast! */ + uint8 *y = (uint8 *) X; + + y[0] = b64[3]; + y[1] = b64[2]; + y[2] = b64[1]; + y[3] = b64[0]; + y[4] = b64[7]; + y[5] = b64[6]; + y[6] = b64[5]; + y[7] = b64[4]; + y[8] = b64[11]; + y[9] = b64[10]; + y[10] = b64[9]; + y[11] = b64[8]; + y[12] = b64[15]; + y[13] = b64[14]; + y[14] = b64[13]; + y[15] = b64[12]; + y[16] = b64[19]; + y[17] = b64[18]; + y[18] = b64[17]; + y[19] = b64[16]; + y[20] = b64[23]; + y[21] = b64[22]; + y[22] = b64[21]; + y[23] = b64[20]; + y[24] = b64[27]; + y[25] = b64[26]; + y[26] = b64[25]; + y[27] = b64[24]; + y[28] = b64[31]; + y[29] = b64[30]; + y[30] = b64[29]; + y[31] = b64[28]; + y[32] = b64[35]; + y[33] = b64[34]; + y[34] = b64[33]; + y[35] = b64[32]; + y[36] = b64[39]; + y[37] = b64[38]; + y[38] = b64[37]; + y[39] = b64[36]; + y[40] = b64[43]; + y[41] = b64[42]; + y[42] = b64[41]; + y[43] = b64[40]; + y[44] = b64[47]; + y[45] = b64[46]; + y[46] = b64[45]; + y[47] = b64[44]; + y[48] = b64[51]; + y[49] = b64[50]; + y[50] = b64[49]; + y[51] = b64[48]; + y[52] = b64[55]; + y[53] = b64[54]; + y[54] = b64[53]; + y[55] = b64[52]; + y[56] = b64[59]; + y[57] = b64[58]; + y[58] = b64[57]; + y[59] = b64[56]; + y[60] = b64[63]; + y[61] = b64[62]; + y[62] = b64[61]; + y[63] = b64[60]; +#endif + + ROUND1(A, B, C, D, 0, Sa, 1); + ROUND1(D, A, B, C, 1, Sb, 2); + ROUND1(C, D, A, B, 2, Sc, 3); + ROUND1(B, C, D, A, 3, Sd, 4); + ROUND1(A, B, C, D, 4, Sa, 5); + ROUND1(D, A, B, C, 5, Sb, 6); + ROUND1(C, D, A, B, 6, Sc, 7); + ROUND1(B, C, D, A, 7, Sd, 8); + ROUND1(A, B, C, D, 8, Sa, 9); + ROUND1(D, A, B, C, 9, Sb, 10); + ROUND1(C, D, A, B, 10, Sc, 11); + ROUND1(B, C, D, A, 11, Sd, 12); + ROUND1(A, B, C, D, 12, Sa, 13); + ROUND1(D, A, B, C, 13, Sb, 14); + ROUND1(C, D, A, B, 14, Sc, 15); + ROUND1(B, C, D, A, 15, Sd, 16); + + ROUND2(A, B, C, D, 1, Se, 17); + ROUND2(D, A, B, C, 6, Sf, 18); + ROUND2(C, D, A, B, 11, Sg, 19); + ROUND2(B, C, D, A, 0, Sh, 20); + ROUND2(A, B, C, D, 5, Se, 21); + ROUND2(D, A, B, C, 10, Sf, 22); + ROUND2(C, D, A, B, 15, Sg, 23); + ROUND2(B, C, D, A, 4, Sh, 24); + ROUND2(A, B, C, D, 9, Se, 25); + ROUND2(D, A, B, C, 14, Sf, 26); + ROUND2(C, D, A, B, 3, Sg, 27); + ROUND2(B, C, D, A, 8, Sh, 28); + ROUND2(A, B, C, D, 13, Se, 29); + ROUND2(D, A, B, C, 2, Sf, 30); + ROUND2(C, D, A, B, 7, Sg, 31); + ROUND2(B, C, D, A, 12, Sh, 32); + + ROUND3(A, B, C, D, 5, Si, 33); + ROUND3(D, A, B, C, 8, Sj, 34); + ROUND3(C, D, A, B, 11, Sk, 35); + ROUND3(B, C, D, A, 14, Sl, 36); + ROUND3(A, B, C, D, 1, Si, 37); + ROUND3(D, A, B, C, 4, Sj, 38); + ROUND3(C, D, A, B, 7, Sk, 39); + ROUND3(B, C, D, A, 10, Sl, 40); + ROUND3(A, B, C, D, 13, Si, 41); + ROUND3(D, A, B, C, 0, Sj, 42); + ROUND3(C, D, A, B, 3, Sk, 43); + ROUND3(B, C, D, A, 6, Sl, 44); + ROUND3(A, B, C, D, 9, Si, 45); + ROUND3(D, A, B, C, 12, Sj, 46); + ROUND3(C, D, A, B, 15, Sk, 47); + ROUND3(B, C, D, A, 2, Sl, 48); + + ROUND4(A, B, C, D, 0, Sm, 49); + ROUND4(D, A, B, C, 7, Sn, 50); + ROUND4(C, D, A, B, 14, So, 51); + ROUND4(B, C, D, A, 5, Sp, 52); + ROUND4(A, B, C, D, 12, Sm, 53); + ROUND4(D, A, B, C, 3, Sn, 54); + ROUND4(C, D, A, B, 10, So, 55); + ROUND4(B, C, D, A, 1, Sp, 56); + ROUND4(A, B, C, D, 8, Sm, 57); + ROUND4(D, A, B, C, 15, Sn, 58); + ROUND4(C, D, A, B, 6, So, 59); + ROUND4(B, C, D, A, 13, Sp, 60); + ROUND4(A, B, C, D, 4, Sm, 61); + ROUND4(D, A, B, C, 11, Sn, 62); + ROUND4(C, D, A, B, 2, So, 63); + ROUND4(B, C, D, A, 9, Sp, 64); + + ctx->md5_sta += A; + ctx->md5_stb += B; + ctx->md5_stc += C; + ctx->md5_std += D; +} + +static void +md5_pad(pg_md5_ctx *ctx) +{ + unsigned int gap; + + /* Don't count up padding. Keep md5_n. */ + gap = MD5_BUFLEN - ctx->md5_i; + if (gap > 8) + { + memmove(ctx->md5_buf + ctx->md5_i, md5_paddat, + gap - sizeof(ctx->md5_n)); + } + else + { + /* including gap == 8 */ + memmove(ctx->md5_buf + ctx->md5_i, md5_paddat, gap); + md5_calc(ctx->md5_buf, ctx); + memmove(ctx->md5_buf, md5_paddat + gap, + MD5_BUFLEN - sizeof(ctx->md5_n)); + } + + /* 8 byte word */ +#ifndef WORDS_BIGENDIAN + memmove(&ctx->md5_buf[56], &ctx->md5_n8[0], 8); +#else + ctx->md5_buf[56] = ctx->md5_n8[7]; + ctx->md5_buf[57] = ctx->md5_n8[6]; + ctx->md5_buf[58] = ctx->md5_n8[5]; + ctx->md5_buf[59] = ctx->md5_n8[4]; + ctx->md5_buf[60] = ctx->md5_n8[3]; + ctx->md5_buf[61] = ctx->md5_n8[2]; + ctx->md5_buf[62] = ctx->md5_n8[1]; + ctx->md5_buf[63] = ctx->md5_n8[0]; +#endif + + md5_calc(ctx->md5_buf, ctx); +} + +static void +md5_result(uint8 *digest, pg_md5_ctx *ctx) +{ + /* 4 byte words */ +#ifndef WORDS_BIGENDIAN + memmove(digest, &ctx->md5_st8[0], 16); +#else + digest[0] = ctx->md5_st8[3]; + digest[1] = ctx->md5_st8[2]; + digest[2] = ctx->md5_st8[1]; + digest[3] = ctx->md5_st8[0]; + digest[4] = ctx->md5_st8[7]; + digest[5] = ctx->md5_st8[6]; + digest[6] = ctx->md5_st8[5]; + digest[7] = ctx->md5_st8[4]; + digest[8] = ctx->md5_st8[11]; + digest[9] = ctx->md5_st8[10]; + digest[10] = ctx->md5_st8[9]; + digest[11] = ctx->md5_st8[8]; + digest[12] = ctx->md5_st8[15]; + digest[13] = ctx->md5_st8[14]; + digest[14] = ctx->md5_st8[13]; + digest[15] = ctx->md5_st8[12]; +#endif +} + + +/* External routines for this MD5 implementation */ + +/* + * pg_md5_init + * + * Initialize a MD5 context. + */ +void +pg_md5_init(pg_md5_ctx *ctx) +{ + ctx->md5_n = 0; + ctx->md5_i = 0; + ctx->md5_sta = MD5_A0; + ctx->md5_stb = MD5_B0; + ctx->md5_stc = MD5_C0; + ctx->md5_std = MD5_D0; + memset(ctx->md5_buf, 0, sizeof(ctx->md5_buf)); +} + + +/* + * pg_md5_update + * + * Update a MD5 context. + */ +void +pg_md5_update(pg_md5_ctx *ctx, const uint8 *data, size_t len) +{ + unsigned int gap, + i; + + ctx->md5_n += len * 8; /* byte to bit */ + gap = MD5_BUFLEN - ctx->md5_i; + + if (len >= gap) + { + memmove(ctx->md5_buf + ctx->md5_i, data, gap); + md5_calc(ctx->md5_buf, ctx); + + for (i = gap; i + MD5_BUFLEN <= len; i += MD5_BUFLEN) + md5_calc(data + i, ctx); + + ctx->md5_i = len - i; + memmove(ctx->md5_buf, data + i, ctx->md5_i); + } + else + { + memmove(ctx->md5_buf + ctx->md5_i, data, len); + ctx->md5_i += len; + } +} + +/* + * pg_md5_final + * + * Finalize a MD5 context. + */ +void +pg_md5_final(pg_md5_ctx *ctx, uint8 *dest) +{ + md5_pad(ctx); + md5_result(dest, ctx); +} diff --git a/src/common/md5_common.c b/src/common/md5_common.c new file mode 100644 index 0000000..82ce75d --- /dev/null +++ b/src/common/md5_common.c @@ -0,0 +1,172 @@ +/*------------------------------------------------------------------------- + * + * md5_common.c + * Routines shared between all MD5 implementations used for encrypted + * passwords. + * + * Sverre H. Huseby <sverrehu@online.no> + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/md5_common.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/cryptohash.h" +#include "common/md5.h" + +static void +bytesToHex(uint8 b[16], char *s) +{ + static const char *hex = "0123456789abcdef"; + int q, + w; + + for (q = 0, w = 0; q < 16; q++) + { + s[w++] = hex[(b[q] >> 4) & 0x0F]; + s[w++] = hex[b[q] & 0x0F]; + } + s[w] = '\0'; +} + +/* + * pg_md5_hash + * + * Calculates the MD5 sum of the bytes in a buffer. + * + * SYNOPSIS #include "md5.h" + * int pg_md5_hash(const void *buff, size_t len, char *hexsum) + * + * INPUT buff the buffer containing the bytes that you want + * the MD5 sum of. + * len number of bytes in the buffer. + * + * OUTPUT hexsum the MD5 sum as a '\0'-terminated string of + * hexadecimal digits. an MD5 sum is 16 bytes long. + * each byte is represented by two hexadecimal + * characters. you thus need to provide an array + * of 33 characters, including the trailing '\0'. + * + * errstr filled with a constant-string error message + * on failure return; NULL on success. + * + * RETURNS false on failure (out of memory for internal buffers + * or MD5 computation failure) or true on success. + * + * STANDARDS MD5 is described in RFC 1321. + * + * AUTHOR Sverre H. Huseby <sverrehu@online.no> + * + */ + +bool +pg_md5_hash(const void *buff, size_t len, char *hexsum, const char **errstr) +{ + uint8 sum[MD5_DIGEST_LENGTH]; + pg_cryptohash_ctx *ctx; + + *errstr = NULL; + ctx = pg_cryptohash_create(PG_MD5); + if (ctx == NULL) + { + *errstr = pg_cryptohash_error(NULL); /* returns OOM */ + return false; + } + + if (pg_cryptohash_init(ctx) < 0 || + pg_cryptohash_update(ctx, buff, len) < 0 || + pg_cryptohash_final(ctx, sum, sizeof(sum)) < 0) + { + *errstr = pg_cryptohash_error(ctx); + pg_cryptohash_free(ctx); + return false; + } + + bytesToHex(sum, hexsum); + pg_cryptohash_free(ctx); + return true; +} + +/* + * pg_md5_binary + * + * As above, except that the MD5 digest is returned as a binary string + * (of size MD5_DIGEST_LENGTH) rather than being converted to ASCII hex. + */ +bool +pg_md5_binary(const void *buff, size_t len, void *outbuf, const char **errstr) +{ + pg_cryptohash_ctx *ctx; + + *errstr = NULL; + ctx = pg_cryptohash_create(PG_MD5); + if (ctx == NULL) + { + *errstr = pg_cryptohash_error(NULL); /* returns OOM */ + return false; + } + + if (pg_cryptohash_init(ctx) < 0 || + pg_cryptohash_update(ctx, buff, len) < 0 || + pg_cryptohash_final(ctx, outbuf, MD5_DIGEST_LENGTH) < 0) + { + *errstr = pg_cryptohash_error(ctx); + pg_cryptohash_free(ctx); + return false; + } + + pg_cryptohash_free(ctx); + return true; +} + + +/* + * Computes MD5 checksum of "passwd" (a null-terminated string) followed + * by "salt" (which need not be null-terminated). + * + * Output format is "md5" followed by a 32-hex-digit MD5 checksum. + * Hence, the output buffer "buf" must be at least 36 bytes long. + * + * Returns true if okay, false on error with *errstr providing some + * error context. + */ +bool +pg_md5_encrypt(const char *passwd, const char *salt, size_t salt_len, + char *buf, const char **errstr) +{ + size_t passwd_len = strlen(passwd); + + /* +1 here is just to avoid risk of unportable malloc(0) */ + char *crypt_buf = malloc(passwd_len + salt_len + 1); + bool ret; + + if (!crypt_buf) + { + *errstr = _("out of memory"); + return false; + } + + /* + * Place salt at the end because it may be known by users trying to crack + * the MD5 output. + */ + memcpy(crypt_buf, passwd, passwd_len); + memcpy(crypt_buf + passwd_len, salt, salt_len); + + strcpy(buf, "md5"); + ret = pg_md5_hash(crypt_buf, passwd_len + salt_len, buf + 3, errstr); + + free(crypt_buf); + + return ret; +} diff --git a/src/common/md5_int.h b/src/common/md5_int.h new file mode 100644 index 0000000..63fd395 --- /dev/null +++ b/src/common/md5_int.h @@ -0,0 +1,85 @@ +/*------------------------------------------------------------------------- + * + * md5_int.h + * Internal headers for fallback implementation of MD5 + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/md5_int.h + * + *------------------------------------------------------------------------- + */ + +/* $KAME: md5.h,v 1.3 2000/02/22 14:01:18 itojun Exp $ */ + +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef PG_MD5_INT_H +#define PG_MD5_INT_H + +#include "common/md5.h" + +#define MD5_BUFLEN 64 + +/* Context data for MD5 */ +typedef struct +{ + union + { + uint32 md5_state32[4]; + uint8 md5_state8[16]; + } md5_st; + +#define md5_sta md5_st.md5_state32[0] +#define md5_stb md5_st.md5_state32[1] +#define md5_stc md5_st.md5_state32[2] +#define md5_std md5_st.md5_state32[3] +#define md5_st8 md5_st.md5_state8 + + union + { + uint64 md5_count64; + uint8 md5_count8[8]; + } md5_count; +#define md5_n md5_count.md5_count64 +#define md5_n8 md5_count.md5_count8 + + unsigned int md5_i; + uint8 md5_buf[MD5_BUFLEN]; +} pg_md5_ctx; + +/* Interface routines for MD5 */ +extern void pg_md5_init(pg_md5_ctx *ctx); +extern void pg_md5_update(pg_md5_ctx *ctx, const uint8 *data, size_t len); +extern void pg_md5_final(pg_md5_ctx *ctx, uint8 *dest); + +#endif /* PG_MD5_INT_H */ diff --git a/src/common/meson.build b/src/common/meson.build new file mode 100644 index 0000000..9efc80a --- /dev/null +++ b/src/common/meson.build @@ -0,0 +1,177 @@ +# Copyright (c) 2022-2023, PostgreSQL Global Development Group + +common_sources = files( + 'archive.c', + 'base64.c', + 'checksum_helper.c', + 'compression.c', + 'controldata_utils.c', + 'encnames.c', + 'exec.c', + 'file_perm.c', + 'file_utils.c', + 'hashfn.c', + 'ip.c', + 'jsonapi.c', + 'keywords.c', + 'kwlookup.c', + 'link-canary.c', + 'md5_common.c', + 'percentrepl.c', + 'pg_get_line.c', + 'pg_lzcompress.c', + 'pg_prng.c', + 'pgfnames.c', + 'psprintf.c', + 'relpath.c', + 'rmtree.c', + 'saslprep.c', + 'scram-common.c', + 'string.c', + 'stringinfo.c', + 'unicode_norm.c', + 'username.c', + 'wait_error.c', + 'wchar.c', +) + +if ssl.found() + common_sources += files( + 'cryptohash_openssl.c', + 'hmac_openssl.c', + 'protocol_openssl.c', + ) +else + common_sources += files( + 'cryptohash.c', + 'hmac.c', + 'md5.c', + 'sha1.c', + 'sha2.c', + ) +endif + +common_kwlist = custom_target('kwlist', + input: files('../include/parser/kwlist.h'), + output: 'kwlist_d.h', + depend_files: gen_kwlist_deps, + command: [gen_kwlist_cmd, '--extern']) +generated_sources += common_kwlist +common_sources += common_kwlist + +# The code imported from Ryu gets a pass on declaration-after-statement, +# in order to keep it more closely aligned with its upstream. +ryu_sources = files( + 'd2s.c', + 'f2s.c', +) +ryu_cflags = [] + +ryu_cflags += cflags_no_decl_after_statement + +config_info_sources = files('config_info.c',) +config_info_cflags = [ + '-DVAL_CC="@0@"'.format(var_cc), + '-DVAL_CPPFLAGS="@0@"'.format(var_cppflags), + '-DVAL_CFLAGS="@0@"'.format(var_cflags), + '-DVAL_CFLAGS_SL="@0@"'.format(var_cflags_sl), + '-DVAL_LDFLAGS="@0@"'.format(var_ldflags), + '-DVAL_LDFLAGS_EX="@0@"'.format(var_ldflags_ex), + '-DVAL_LDFLAGS_SL="@0@"'.format(var_ldflags_sl), + '-DVAL_LIBS="@0@"'.format(var_libs), +] + +# Some files need to be built with different cflags. The different sets are +# defined here. +common_cflags = { + 'ryu': ryu_cflags, + 'config_info': config_info_cflags, +} +common_sources_cflags = { + 'ryu': ryu_sources, + 'config_info': config_info_sources +} + + +# A few files are currently only built for frontend, not server +# (Mkvcbuild.pm has a copy of this list, too). logging.c is excluded +# from OBJS_FRONTEND_SHLIB (shared library) as a matter of policy, +# because it is not appropriate for general purpose libraries such +# as libpq to report errors directly. + +common_sources_frontend_shlib = common_sources +common_sources_frontend_shlib += files( + 'fe_memutils.c', + 'restricted_token.c', + 'sprompt.c', +) + +common_sources_frontend_static = common_sources_frontend_shlib +common_sources_frontend_static += files( + 'logging.c', +) + +# Build pgport once for backend, once for use in frontend binaries, and once +# for use in shared libraries +# +# XXX: in most environments we could probably link_whole pgcommon_shlib +# against pgcommon_static, instead of compiling twice. +# +# For the server build of pgcommon, depend on lwlocknames_h, because at least +# cryptohash_openssl.c, hmac_openssl.c depend on it. That's arguably a +# layering violation, but ... +pgcommon = {} +pgcommon_variants = { + '_srv': internal_lib_args + { + 'sources': common_sources + [lwlocknames_h], + 'dependencies': [backend_common_code], + }, + '': default_lib_args + { + 'sources': common_sources_frontend_static, + 'dependencies': [frontend_common_code], + }, + '_shlib': default_lib_args + { + 'pic': true, + 'sources': common_sources_frontend_shlib, + 'dependencies': [frontend_common_code], + }, +} + +foreach name, opts : pgcommon_variants + + # Build internal static libraries for sets of files that need to be built + # with different cflags + cflag_libs = [] + foreach cflagname, sources : common_sources_cflags + if sources.length() == 0 + continue + endif + c_args = opts.get('c_args', []) + common_cflags[cflagname] + cflag_libs += static_library('libpgcommon@0@_@1@'.format(name, cflagname), + c_pch: pch_c_h, + include_directories: include_directories('.'), + kwargs: opts + { + 'sources': sources, + 'c_args': c_args, + 'build_by_default': false, + 'install': false, + }, + ) + endforeach + + lib = static_library('libpgcommon@0@'.format(name), + link_with: cflag_libs, + c_pch: pch_c_h, + include_directories: include_directories('.'), + kwargs: opts + { + 'dependencies': opts['dependencies'] + [ssl], + } + ) + pgcommon += {name: lib} +endforeach + +common_srv = pgcommon['_srv'] +common_shlib = pgcommon['_shlib'] +common_static = pgcommon[''] + +subdir('unicode') diff --git a/src/common/percentrepl.c b/src/common/percentrepl.c new file mode 100644 index 0000000..7aa85fd --- /dev/null +++ b/src/common/percentrepl.c @@ -0,0 +1,137 @@ +/*------------------------------------------------------------------------- + * + * percentrepl.c + * Common routines to replace percent placeholders in strings + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/percentrepl.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#include "common/logging.h" +#endif + +#include "common/percentrepl.h" +#include "lib/stringinfo.h" + +/* + * replace_percent_placeholders + * + * Replace percent-letter placeholders in input string with the supplied + * values. For example, to replace %f with foo and %b with bar, call + * + * replace_percent_placeholders(instr, "param_name", "bf", bar, foo); + * + * The return value is palloc'd. + * + * "%%" is replaced by a single "%". + * + * This throws an error for an unsupported placeholder or a "%" at the end of + * the input string. + * + * A value may be NULL. If the corresponding placeholder is found in the + * input string, it will be treated as if an unsupported placeholder was used. + * This allows callers to share a "letters" specification but vary the + * actually supported placeholders at run time. + * + * This functions is meant for cases where all the values are readily + * available or cheap to compute and most invocations will use most values + * (for example for archive_command). Also, it requires that all values are + * strings. It won't be a good match for things like log prefixes or prompts + * that use a mix of data types and any invocation will only use a few of the + * possible values. + * + * param_name is the name of the underlying GUC parameter, for error + * reporting. At the moment, this function is only used for GUC parameters. + * If other kinds of uses were added, the error reporting would need to be + * revised. + */ +char * +replace_percent_placeholders(const char *instr, const char *param_name, const char *letters,...) +{ + StringInfoData result; + + initStringInfo(&result); + + for (const char *sp = instr; *sp; sp++) + { + if (*sp == '%') + { + if (sp[1] == '%') + { + /* Convert %% to a single % */ + sp++; + appendStringInfoChar(&result, *sp); + } + else if (sp[1] == '\0') + { + /* Incomplete escape sequence, expected a character afterward */ +#ifdef FRONTEND + pg_log_error("invalid value for parameter \"%s\": \"%s\"", param_name, instr); + pg_log_error_detail("String ends unexpectedly after escape character \"%%\"."); + exit(1); +#else + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for parameter \"%s\": \"%s\"", param_name, instr), + errdetail("String ends unexpectedly after escape character \"%%\".")); +#endif + } + else + { + /* Look up placeholder character */ + bool found = false; + va_list ap; + + sp++; + + va_start(ap, letters); + for (const char *lp = letters; *lp; lp++) + { + char *val = va_arg(ap, char *); + + if (*sp == *lp) + { + if (val) + { + appendStringInfoString(&result, val); + found = true; + } + /* If val is NULL, we will report an error. */ + break; + } + } + va_end(ap); + if (!found) + { + /* Unknown placeholder */ +#ifdef FRONTEND + pg_log_error("invalid value for parameter \"%s\": \"%s\"", param_name, instr); + pg_log_error_detail("String contains unexpected placeholder \"%%%c\".", *sp); + exit(1); +#else + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for parameter \"%s\": \"%s\"", param_name, instr), + errdetail("String contains unexpected placeholder \"%%%c\".", *sp)); +#endif + } + } + } + else + { + appendStringInfoChar(&result, *sp); + } + } + + return result.data; +} diff --git a/src/common/pg_get_line.c b/src/common/pg_get_line.c new file mode 100644 index 0000000..3cdf090 --- /dev/null +++ b/src/common/pg_get_line.c @@ -0,0 +1,180 @@ +/*------------------------------------------------------------------------- + * + * pg_get_line.c + * fgets() with an expansible result buffer + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/pg_get_line.c + * + *------------------------------------------------------------------------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <setjmp.h> + +#include "common/string.h" +#include "lib/stringinfo.h" + + +/* + * pg_get_line() + * + * This is meant to be equivalent to fgets(), except that instead of + * reading into a caller-supplied, fixed-size buffer, it reads into + * a palloc'd (in frontend, really malloc'd) string, which is resized + * as needed to handle indefinitely long input lines. The caller is + * responsible for pfree'ing the result string when appropriate. + * + * As with fgets(), returns NULL if there is a read error or if no + * characters are available before EOF. The caller can distinguish + * these cases by checking ferror(stream). + * + * Since this is meant to be equivalent to fgets(), the trailing newline + * (if any) is not stripped. Callers may wish to apply pg_strip_crlf(). + * + * Note that while I/O errors are reflected back to the caller to be + * dealt with, an OOM condition for the palloc'd buffer will not be; + * there'll be an ereport(ERROR) or exit(1) inside stringinfo.c. + * + * Also note that the palloc'd buffer is usually a lot longer than + * strictly necessary, so it may be inadvisable to use this function + * to collect lots of long-lived data. A less memory-hungry option + * is to use pg_get_line_buf() or pg_get_line_append() in a loop, + * then pstrdup() each line. + * + * prompt_ctx can optionally be provided to allow this function to be + * canceled via an existing SIGINT signal handler that will longjmp to the + * specified place only when *(prompt_ctx->enabled) is true. If canceled, + * this function returns NULL, and prompt_ctx->canceled is set to true. + */ +char * +pg_get_line(FILE *stream, PromptInterruptContext *prompt_ctx) +{ + StringInfoData buf; + + initStringInfo(&buf); + + if (!pg_get_line_append(stream, &buf, prompt_ctx)) + { + /* ensure that free() doesn't mess up errno */ + int save_errno = errno; + + pfree(buf.data); + errno = save_errno; + return NULL; + } + + return buf.data; +} + +/* + * pg_get_line_buf() + * + * This has similar behavior to pg_get_line(), and thence to fgets(), + * except that the collected data is returned in a caller-supplied + * StringInfo buffer. This is a convenient API for code that just + * wants to read and process one line at a time, without any artificial + * limit on line length. + * + * Returns true if a line was successfully collected (including the + * case of a non-newline-terminated line at EOF). Returns false if + * there was an I/O error or no data was available before EOF. + * (Check ferror(stream) to distinguish these cases.) + * + * In the false-result case, buf is reset to empty. + */ +bool +pg_get_line_buf(FILE *stream, StringInfo buf) +{ + /* We just need to drop any data from the previous call */ + resetStringInfo(buf); + return pg_get_line_append(stream, buf, NULL); +} + +/* + * pg_get_line_append() + * + * This has similar behavior to pg_get_line(), and thence to fgets(), + * except that the collected data is appended to whatever is in *buf. + * This is useful in preference to pg_get_line_buf() if the caller wants + * to merge some lines together, e.g. to implement backslash continuation. + * + * Returns true if a line was successfully collected (including the + * case of a non-newline-terminated line at EOF). Returns false if + * there was an I/O error or no data was available before EOF. + * (Check ferror(stream) to distinguish these cases.) + * + * In the false-result case, the contents of *buf are logically unmodified, + * though it's possible that the buffer has been resized. + * + * prompt_ctx can optionally be provided to allow this function to be + * canceled via an existing SIGINT signal handler that will longjmp to the + * specified place only when *(prompt_ctx->enabled) is true. If canceled, + * this function returns false, and prompt_ctx->canceled is set to true. + */ +bool +pg_get_line_append(FILE *stream, StringInfo buf, + PromptInterruptContext *prompt_ctx) +{ + int orig_len = buf->len; + + if (prompt_ctx && sigsetjmp(*((sigjmp_buf *) prompt_ctx->jmpbuf), 1) != 0) + { + /* Got here with longjmp */ + prompt_ctx->canceled = true; + /* Discard any data we collected before detecting error */ + buf->len = orig_len; + buf->data[orig_len] = '\0'; + return false; + } + + /* Loop until newline or EOF/error */ + for (;;) + { + char *res; + + /* Enable longjmp while waiting for input */ + if (prompt_ctx) + *(prompt_ctx->enabled) = true; + + /* Read some data, appending it to whatever we already have */ + res = fgets(buf->data + buf->len, buf->maxlen - buf->len, stream); + + /* Disable longjmp again, then break if fgets failed */ + if (prompt_ctx) + *(prompt_ctx->enabled) = false; + + if (res == NULL) + break; + + /* Got data, so update buf->len */ + buf->len += strlen(buf->data + buf->len); + + /* Done if we have collected a newline */ + if (buf->len > orig_len && buf->data[buf->len - 1] == '\n') + return true; + + /* Make some more room in the buffer, and loop to read more data */ + enlargeStringInfo(buf, 128); + } + + /* Check for I/O errors and EOF */ + if (ferror(stream) || buf->len == orig_len) + { + /* Discard any data we collected before detecting error */ + buf->len = orig_len; + buf->data[orig_len] = '\0'; + return false; + } + + /* No newline at EOF, but we did collect some data */ + return true; +} diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c new file mode 100644 index 0000000..95ad338 --- /dev/null +++ b/src/common/pg_lzcompress.c @@ -0,0 +1,876 @@ +/* ---------- + * pg_lzcompress.c - + * + * This is an implementation of LZ compression for PostgreSQL. + * It uses a simple history table and generates 2-3 byte tags + * capable of backward copy information for 3-273 bytes with + * a max offset of 4095. + * + * Entry routines: + * + * int32 + * pglz_compress(const char *source, int32 slen, char *dest, + * const PGLZ_Strategy *strategy); + * + * source is the input data to be compressed. + * + * slen is the length of the input data. + * + * dest is the output area for the compressed result. + * It must be at least as big as PGLZ_MAX_OUTPUT(slen). + * + * strategy is a pointer to some information controlling + * the compression algorithm. If NULL, the compiled + * in default strategy is used. + * + * The return value is the number of bytes written in the + * buffer dest, or -1 if compression fails; in the latter + * case the contents of dest are undefined. + * + * int32 + * pglz_decompress(const char *source, int32 slen, char *dest, + * int32 rawsize, bool check_complete) + * + * source is the compressed input. + * + * slen is the length of the compressed input. + * + * dest is the area where the uncompressed data will be + * written to. It is the callers responsibility to + * provide enough space. + * + * The data is written to buff exactly as it was handed + * to pglz_compress(). No terminating zero byte is added. + * + * rawsize is the length of the uncompressed data. + * + * check_complete is a flag to let us know if -1 should be + * returned in cases where we don't reach the end of the + * source or dest buffers, or not. This should be false + * if the caller is asking for only a partial result and + * true otherwise. + * + * The return value is the number of bytes written in the + * buffer dest, or -1 if decompression fails. + * + * The decompression algorithm and internal data format: + * + * It is made with the compressed data itself. + * + * The data representation is easiest explained by describing + * the process of decompression. + * + * If compressed_size == rawsize, then the data + * is stored uncompressed as plain bytes. Thus, the decompressor + * simply copies rawsize bytes to the destination. + * + * Otherwise the first byte tells what to do the next 8 times. + * We call this the control byte. + * + * An unset bit in the control byte means, that one uncompressed + * byte follows, which is copied from input to output. + * + * A set bit in the control byte means, that a tag of 2-3 bytes + * follows. A tag contains information to copy some bytes, that + * are already in the output buffer, to the current location in + * the output. Let's call the three tag bytes T1, T2 and T3. The + * position of the data to copy is coded as an offset from the + * actual output position. + * + * The offset is in the upper nibble of T1 and in T2. + * The length is in the lower nibble of T1. + * + * So the 16 bits of a 2 byte tag are coded as + * + * 7---T1--0 7---T2--0 + * OOOO LLLL OOOO OOOO + * + * This limits the offset to 1-4095 (12 bits) and the length + * to 3-18 (4 bits) because 3 is always added to it. To emit + * a tag of 2 bytes with a length of 2 only saves one control + * bit. But we lose one byte in the possible length of a tag. + * + * In the actual implementation, the 2 byte tag's length is + * limited to 3-17, because the value 0xF in the length nibble + * has special meaning. It means, that the next following + * byte (T3) has to be added to the length value of 18. That + * makes total limits of 1-4095 for offset and 3-273 for length. + * + * Now that we have successfully decoded a tag. We simply copy + * the output that occurred <offset> bytes back to the current + * output location in the specified <length>. Thus, a + * sequence of 200 spaces (think about bpchar fields) could be + * coded in 4 bytes. One literal space and a three byte tag to + * copy 199 bytes with a -1 offset. Whow - that's a compression + * rate of 98%! Well, the implementation needs to save the + * original data size too, so we need another 4 bytes for it + * and end up with a total compression rate of 96%, what's still + * worth a Whow. + * + * The compression algorithm + * + * The following uses numbers used in the default strategy. + * + * The compressor works best for attributes of a size between + * 1K and 1M. For smaller items there's not that much chance of + * redundancy in the character sequence (except for large areas + * of identical bytes like trailing spaces) and for bigger ones + * our 4K maximum look-back distance is too small. + * + * The compressor creates a table for lists of positions. + * For each input position (except the last 3), a hash key is + * built from the 4 next input bytes and the position remembered + * in the appropriate list. Thus, the table points to linked + * lists of likely to be at least in the first 4 characters + * matching strings. This is done on the fly while the input + * is compressed into the output area. Table entries are only + * kept for the last 4096 input positions, since we cannot use + * back-pointers larger than that anyway. The size of the hash + * table is chosen based on the size of the input - a larger table + * has a larger startup cost, as it needs to be initialized to + * zero, but reduces the number of hash collisions on long inputs. + * + * For each byte in the input, its hash key (built from this + * byte and the next 3) is used to find the appropriate list + * in the table. The lists remember the positions of all bytes + * that had the same hash key in the past in increasing backward + * offset order. Now for all entries in the used lists, the + * match length is computed by comparing the characters from the + * entries position with the characters from the actual input + * position. + * + * The compressor starts with a so called "good_match" of 128. + * It is a "prefer speed against compression ratio" optimizer. + * So if the first entry looked at already has 128 or more + * matching characters, the lookup stops and that position is + * used for the next tag in the output. + * + * For each subsequent entry in the history list, the "good_match" + * is lowered by 10%. So the compressor will be more happy with + * short matches the further it has to go back in the history. + * Another "speed against ratio" preference characteristic of + * the algorithm. + * + * Thus there are 3 stop conditions for the lookup of matches: + * + * - a match >= good_match is found + * - there are no more history entries to look at + * - the next history entry is already too far back + * to be coded into a tag. + * + * Finally the match algorithm checks that at least a match + * of 3 or more bytes has been found, because that is the smallest + * amount of copy information to code into a tag. If so, a tag + * is omitted and all the input bytes covered by that are just + * scanned for the history add's, otherwise a literal character + * is omitted and only his history entry added. + * + * Acknowledgments: + * + * Many thanks to Adisak Pochanayon, who's article about SLZ + * inspired me to write the PostgreSQL compression this way. + * + * Jan Wieck + * + * Copyright (c) 1999-2023, PostgreSQL Global Development Group + * + * src/common/pg_lzcompress.c + * ---------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <limits.h> + +#include "common/pg_lzcompress.h" + + +/* ---------- + * Local definitions + * ---------- + */ +#define PGLZ_MAX_HISTORY_LISTS 8192 /* must be power of 2 */ +#define PGLZ_HISTORY_SIZE 4096 +#define PGLZ_MAX_MATCH 273 + + +/* ---------- + * PGLZ_HistEntry - + * + * Linked list for the backward history lookup + * + * All the entries sharing a hash key are linked in a doubly linked list. + * This makes it easy to remove an entry when it's time to recycle it + * (because it's more than 4K positions old). + * ---------- + */ +typedef struct PGLZ_HistEntry +{ + struct PGLZ_HistEntry *next; /* links for my hash key's list */ + struct PGLZ_HistEntry *prev; + int hindex; /* my current hash key */ + const char *pos; /* my input position */ +} PGLZ_HistEntry; + + +/* ---------- + * The provided standard strategies + * ---------- + */ +static const PGLZ_Strategy strategy_default_data = { + 32, /* Data chunks less than 32 bytes are not + * compressed */ + INT_MAX, /* No upper limit on what we'll try to + * compress */ + 25, /* Require 25% compression rate, or not worth + * it */ + 1024, /* Give up if no compression in the first 1KB */ + 128, /* Stop history lookup if a match of 128 bytes + * is found */ + 10 /* Lower good match size by 10% at every loop + * iteration */ +}; +const PGLZ_Strategy *const PGLZ_strategy_default = &strategy_default_data; + + +static const PGLZ_Strategy strategy_always_data = { + 0, /* Chunks of any size are compressed */ + INT_MAX, + 0, /* It's enough to save one single byte */ + INT_MAX, /* Never give up early */ + 128, /* Stop history lookup if a match of 128 bytes + * is found */ + 6 /* Look harder for a good match */ +}; +const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data; + + +/* ---------- + * Statically allocated work arrays for history + * ---------- + */ +static int16 hist_start[PGLZ_MAX_HISTORY_LISTS]; +static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE + 1]; + +/* + * Element 0 in hist_entries is unused, and means 'invalid'. Likewise, + * INVALID_ENTRY_PTR in next/prev pointers mean 'invalid'. + */ +#define INVALID_ENTRY 0 +#define INVALID_ENTRY_PTR (&hist_entries[INVALID_ENTRY]) + +/* ---------- + * pglz_hist_idx - + * + * Computes the history table slot for the lookup by the next 4 + * characters in the input. + * + * NB: because we use the next 4 characters, we are not guaranteed to + * find 3-character matches; they very possibly will be in the wrong + * hash list. This seems an acceptable tradeoff for spreading out the + * hash keys more. + * ---------- + */ +#define pglz_hist_idx(_s,_e, _mask) ( \ + ((((_e) - (_s)) < 4) ? (int) (_s)[0] : \ + (((_s)[0] << 6) ^ ((_s)[1] << 4) ^ \ + ((_s)[2] << 2) ^ (_s)[3])) & (_mask) \ + ) + + +/* ---------- + * pglz_hist_add - + * + * Adds a new entry to the history table. + * + * If _recycle is true, then we are recycling a previously used entry, + * and must first delink it from its old hashcode's linked list. + * + * NOTE: beware of multiple evaluations of macro's arguments, and note that + * _hn and _recycle are modified in the macro. + * ---------- + */ +#define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e, _mask) \ +do { \ + int __hindex = pglz_hist_idx((_s),(_e), (_mask)); \ + int16 *__myhsp = &(_hs)[__hindex]; \ + PGLZ_HistEntry *__myhe = &(_he)[_hn]; \ + if (_recycle) { \ + if (__myhe->prev == NULL) \ + (_hs)[__myhe->hindex] = __myhe->next - (_he); \ + else \ + __myhe->prev->next = __myhe->next; \ + if (__myhe->next != NULL) \ + __myhe->next->prev = __myhe->prev; \ + } \ + __myhe->next = &(_he)[*__myhsp]; \ + __myhe->prev = NULL; \ + __myhe->hindex = __hindex; \ + __myhe->pos = (_s); \ + /* If there was an existing entry in this hash slot, link */ \ + /* this new entry to it. However, the 0th entry in the */ \ + /* entries table is unused, so we can freely scribble on it. */ \ + /* So don't bother checking if the slot was used - we'll */ \ + /* scribble on the unused entry if it was not, but that's */ \ + /* harmless. Avoiding the branch in this critical path */ \ + /* speeds this up a little bit. */ \ + /* if (*__myhsp != INVALID_ENTRY) */ \ + (_he)[(*__myhsp)].prev = __myhe; \ + *__myhsp = _hn; \ + if (++(_hn) >= PGLZ_HISTORY_SIZE + 1) { \ + (_hn) = 1; \ + (_recycle) = true; \ + } \ +} while (0) + + +/* ---------- + * pglz_out_ctrl - + * + * Outputs the last and allocates a new control byte if needed. + * ---------- + */ +#define pglz_out_ctrl(__ctrlp,__ctrlb,__ctrl,__buf) \ +do { \ + if ((__ctrl & 0xff) == 0) \ + { \ + *(__ctrlp) = __ctrlb; \ + __ctrlp = (__buf)++; \ + __ctrlb = 0; \ + __ctrl = 1; \ + } \ +} while (0) + + +/* ---------- + * pglz_out_literal - + * + * Outputs a literal byte to the destination buffer including the + * appropriate control bit. + * ---------- + */ +#define pglz_out_literal(_ctrlp,_ctrlb,_ctrl,_buf,_byte) \ +do { \ + pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \ + *(_buf)++ = (unsigned char)(_byte); \ + _ctrl <<= 1; \ +} while (0) + + +/* ---------- + * pglz_out_tag - + * + * Outputs a backward reference tag of 2-4 bytes (depending on + * offset and length) to the destination buffer including the + * appropriate control bit. + * ---------- + */ +#define pglz_out_tag(_ctrlp,_ctrlb,_ctrl,_buf,_len,_off) \ +do { \ + pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \ + _ctrlb |= _ctrl; \ + _ctrl <<= 1; \ + if (_len > 17) \ + { \ + (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | 0x0f); \ + (_buf)[1] = (unsigned char)(((_off) & 0xff)); \ + (_buf)[2] = (unsigned char)((_len) - 18); \ + (_buf) += 3; \ + } else { \ + (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | ((_len) - 3)); \ + (_buf)[1] = (unsigned char)((_off) & 0xff); \ + (_buf) += 2; \ + } \ +} while (0) + + +/* ---------- + * pglz_find_match - + * + * Lookup the history table if the actual input stream matches + * another sequence of characters, starting somewhere earlier + * in the input buffer. + * ---------- + */ +static inline int +pglz_find_match(int16 *hstart, const char *input, const char *end, + int *lenp, int *offp, int good_match, int good_drop, int mask) +{ + PGLZ_HistEntry *hent; + int16 hentno; + int32 len = 0; + int32 off = 0; + + /* + * Traverse the linked history list until a good enough match is found. + */ + hentno = hstart[pglz_hist_idx(input, end, mask)]; + hent = &hist_entries[hentno]; + while (hent != INVALID_ENTRY_PTR) + { + const char *ip = input; + const char *hp = hent->pos; + int32 thisoff; + int32 thislen; + + /* + * Stop if the offset does not fit into our tag anymore. + */ + thisoff = ip - hp; + if (thisoff >= 0x0fff) + break; + + /* + * Determine length of match. A better match must be larger than the + * best so far. And if we already have a match of 16 or more bytes, + * it's worth the call overhead to use memcmp() to check if this match + * is equal for the same size. After that we must fallback to + * character by character comparison to know the exact position where + * the diff occurred. + */ + thislen = 0; + if (len >= 16) + { + if (memcmp(ip, hp, len) == 0) + { + thislen = len; + ip += len; + hp += len; + while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) + { + thislen++; + ip++; + hp++; + } + } + } + else + { + while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) + { + thislen++; + ip++; + hp++; + } + } + + /* + * Remember this match as the best (if it is) + */ + if (thislen > len) + { + len = thislen; + off = thisoff; + } + + /* + * Advance to the next history entry + */ + hent = hent->next; + + /* + * Be happy with lesser good matches the more entries we visited. But + * no point in doing calculation if we're at end of list. + */ + if (hent != INVALID_ENTRY_PTR) + { + if (len >= good_match) + break; + good_match -= (good_match * good_drop) / 100; + } + } + + /* + * Return match information only if it results at least in one byte + * reduction. + */ + if (len > 2) + { + *lenp = len; + *offp = off; + return 1; + } + + return 0; +} + + +/* ---------- + * pglz_compress - + * + * Compresses source into dest using strategy. Returns the number of + * bytes written in buffer dest, or -1 if compression fails. + * ---------- + */ +int32 +pglz_compress(const char *source, int32 slen, char *dest, + const PGLZ_Strategy *strategy) +{ + unsigned char *bp = (unsigned char *) dest; + unsigned char *bstart = bp; + int hist_next = 1; + bool hist_recycle = false; + const char *dp = source; + const char *dend = source + slen; + unsigned char ctrl_dummy = 0; + unsigned char *ctrlp = &ctrl_dummy; + unsigned char ctrlb = 0; + unsigned char ctrl = 0; + bool found_match = false; + int32 match_len; + int32 match_off; + int32 good_match; + int32 good_drop; + int32 result_size; + int32 result_max; + int32 need_rate; + int hashsz; + int mask; + + /* + * Our fallback strategy is the default. + */ + if (strategy == NULL) + strategy = PGLZ_strategy_default; + + /* + * If the strategy forbids compression (at all or if source chunk size out + * of range), fail. + */ + if (strategy->match_size_good <= 0 || + slen < strategy->min_input_size || + slen > strategy->max_input_size) + return -1; + + /* + * Limit the match parameters to the supported range. + */ + good_match = strategy->match_size_good; + if (good_match > PGLZ_MAX_MATCH) + good_match = PGLZ_MAX_MATCH; + else if (good_match < 17) + good_match = 17; + + good_drop = strategy->match_size_drop; + if (good_drop < 0) + good_drop = 0; + else if (good_drop > 100) + good_drop = 100; + + need_rate = strategy->min_comp_rate; + if (need_rate < 0) + need_rate = 0; + else if (need_rate > 99) + need_rate = 99; + + /* + * Compute the maximum result size allowed by the strategy, namely the + * input size minus the minimum wanted compression rate. This had better + * be <= slen, else we might overrun the provided output buffer. + */ + if (slen > (INT_MAX / 100)) + { + /* Approximate to avoid overflow */ + result_max = (slen / 100) * (100 - need_rate); + } + else + result_max = (slen * (100 - need_rate)) / 100; + + /* + * Experiments suggest that these hash sizes work pretty well. A large + * hash table minimizes collision, but has a higher startup cost. For a + * small input, the startup cost dominates. The table size must be a power + * of two. + */ + if (slen < 128) + hashsz = 512; + else if (slen < 256) + hashsz = 1024; + else if (slen < 512) + hashsz = 2048; + else if (slen < 1024) + hashsz = 4096; + else + hashsz = 8192; + mask = hashsz - 1; + + /* + * Initialize the history lists to empty. We do not need to zero the + * hist_entries[] array; its entries are initialized as they are used. + */ + memset(hist_start, 0, hashsz * sizeof(int16)); + + /* + * Compress the source directly into the output buffer. + */ + while (dp < dend) + { + /* + * If we already exceeded the maximum result size, fail. + * + * We check once per loop; since the loop body could emit as many as 4 + * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better + * allow 4 slop bytes. + */ + if (bp - bstart >= result_max) + return -1; + + /* + * If we've emitted more than first_success_by bytes without finding + * anything compressible at all, fail. This lets us fall out + * reasonably quickly when looking at incompressible input (such as + * pre-compressed data). + */ + if (!found_match && bp - bstart >= strategy->first_success_by) + return -1; + + /* + * Try to find a match in the history + */ + if (pglz_find_match(hist_start, dp, dend, &match_len, + &match_off, good_match, good_drop, mask)) + { + /* + * Create the tag and add history entries for all matched + * characters. + */ + pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off); + while (match_len--) + { + pglz_hist_add(hist_start, hist_entries, + hist_next, hist_recycle, + dp, dend, mask); + dp++; /* Do not do this ++ in the line above! */ + /* The macro would do it four times - Jan. */ + } + found_match = true; + } + else + { + /* + * No match found. Copy one literal byte. + */ + pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp); + pglz_hist_add(hist_start, hist_entries, + hist_next, hist_recycle, + dp, dend, mask); + dp++; /* Do not do this ++ in the line above! */ + /* The macro would do it four times - Jan. */ + } + } + + /* + * Write out the last control byte and check that we haven't overrun the + * output size allowed by the strategy. + */ + *ctrlp = ctrlb; + result_size = bp - bstart; + if (result_size >= result_max) + return -1; + + /* success */ + return result_size; +} + + +/* ---------- + * pglz_decompress - + * + * Decompresses source into dest. Returns the number of bytes + * decompressed into the destination buffer, or -1 if the + * compressed data is corrupted. + * + * If check_complete is true, the data is considered corrupted + * if we don't exactly fill the destination buffer. Callers that + * are extracting a slice typically can't apply this check. + * ---------- + */ +int32 +pglz_decompress(const char *source, int32 slen, char *dest, + int32 rawsize, bool check_complete) +{ + const unsigned char *sp; + const unsigned char *srcend; + unsigned char *dp; + unsigned char *destend; + + sp = (const unsigned char *) source; + srcend = ((const unsigned char *) source) + slen; + dp = (unsigned char *) dest; + destend = dp + rawsize; + + while (sp < srcend && dp < destend) + { + /* + * Read one control byte and process the next 8 items (or as many as + * remain in the compressed input). + */ + unsigned char ctrl = *sp++; + int ctrlc; + + for (ctrlc = 0; ctrlc < 8 && sp < srcend && dp < destend; ctrlc++) + { + if (ctrl & 1) + { + /* + * Set control bit means we must read a match tag. The match + * is coded with two bytes. First byte uses lower nibble to + * code length - 3. Higher nibble contains upper 4 bits of the + * offset. The next following byte contains the lower 8 bits + * of the offset. If the length is coded as 18, another + * extension tag byte tells how much longer the match really + * was (0-255). + */ + int32 len; + int32 off; + + len = (sp[0] & 0x0f) + 3; + off = ((sp[0] & 0xf0) << 4) | sp[1]; + sp += 2; + if (len == 18) + len += *sp++; + + /* + * Check for corrupt data: if we fell off the end of the + * source, or if we obtained off = 0, or if off is more than + * the distance back to the buffer start, we have problems. + * (We must check for off = 0, else we risk an infinite loop + * below in the face of corrupt data. Likewise, the upper + * limit on off prevents accessing outside the buffer + * boundaries.) + */ + if (unlikely(sp > srcend || off == 0 || + off > (dp - (unsigned char *) dest))) + return -1; + + /* + * Don't emit more data than requested. + */ + len = Min(len, destend - dp); + + /* + * Now we copy the bytes specified by the tag from OUTPUT to + * OUTPUT (copy len bytes from dp - off to dp). The copied + * areas could overlap, so to avoid undefined behavior in + * memcpy(), be careful to copy only non-overlapping regions. + * + * Note that we cannot use memmove() instead, since while its + * behavior is well-defined, it's also not what we want. + */ + while (off < len) + { + /* + * We can safely copy "off" bytes since that clearly + * results in non-overlapping source and destination. + */ + memcpy(dp, dp - off, off); + len -= off; + dp += off; + + /*---------- + * This bit is less obvious: we can double "off" after + * each such step. Consider this raw input: + * 112341234123412341234 + * This will be encoded as 5 literal bytes "11234" and + * then a match tag with length 16 and offset 4. After + * memcpy'ing the first 4 bytes, we will have emitted + * 112341234 + * so we can double "off" to 8, then after the next step + * we have emitted + * 11234123412341234 + * Then we can double "off" again, after which it is more + * than the remaining "len" so we fall out of this loop + * and finish with a non-overlapping copy of the + * remainder. In general, a match tag with off < len + * implies that the decoded data has a repeat length of + * "off". We can handle 1, 2, 4, etc repetitions of the + * repeated string per memcpy until we get to a situation + * where the final copy step is non-overlapping. + * + * (Another way to understand this is that we are keeping + * the copy source point dp - off the same throughout.) + *---------- + */ + off += off; + } + memcpy(dp, dp - off, len); + dp += len; + } + else + { + /* + * An unset control bit means LITERAL BYTE. So we just copy + * one from INPUT to OUTPUT. + */ + *dp++ = *sp++; + } + + /* + * Advance the control bit + */ + ctrl >>= 1; + } + } + + /* + * If requested, check we decompressed the right amount. + */ + if (check_complete && (dp != destend || sp != srcend)) + return -1; + + /* + * That's it. + */ + return (char *) dp - dest; +} + + +/* ---------- + * pglz_maximum_compressed_size - + * + * Calculate the maximum compressed size for a given amount of raw data. + * Return the maximum size, or total compressed size if maximum size is + * larger than total compressed size. + * + * We can't use PGLZ_MAX_OUTPUT for this purpose, because that's used to size + * the compression buffer (and abort the compression). It does not really say + * what's the maximum compressed size for an input of a given length, and it + * may happen that while the whole value is compressible (and thus fits into + * PGLZ_MAX_OUTPUT nicely), the prefix is not compressible at all. + * ---------- + */ +int32 +pglz_maximum_compressed_size(int32 rawsize, int32 total_compressed_size) +{ + int64 compressed_size; + + /* + * pglz uses one control bit per byte, so if the entire desired prefix is + * represented as literal bytes, we'll need (rawsize * 9) bits. We care + * about bytes though, so be sure to round up not down. + * + * Use int64 here to prevent overflow during calculation. + */ + compressed_size = ((int64) rawsize * 9 + 7) / 8; + + /* + * The above fails to account for a corner case: we could have compressed + * data that starts with N-1 or N-2 literal bytes and then has a match tag + * of 2 or 3 bytes. It's therefore possible that we need to fetch 1 or 2 + * more bytes in order to have the whole match tag. (Match tags earlier + * in the compressed data don't cause a problem, since they should + * represent more decompressed bytes than they occupy themselves.) + */ + compressed_size += 2; + + /* + * Maximum compressed size can't be larger than total compressed size. + * (This also ensures that our result fits in int32.) + */ + compressed_size = Min(compressed_size, total_compressed_size); + + return (int32) compressed_size; +} diff --git a/src/common/pg_prng.c b/src/common/pg_prng.c new file mode 100644 index 0000000..c7bb92e --- /dev/null +++ b/src/common/pg_prng.c @@ -0,0 +1,282 @@ +/*------------------------------------------------------------------------- + * + * Pseudo-Random Number Generator + * + * We use Blackman and Vigna's xoroshiro128** 1.0 algorithm + * to have a small, fast PRNG suitable for generating reasonably + * good-quality 64-bit data. This should not be considered + * cryptographically strong, however. + * + * About these generators: https://prng.di.unimi.it/ + * See also https://en.wikipedia.org/wiki/List_of_random_number_generators + * + * Copyright (c) 2021-2023, PostgreSQL Global Development Group + * + * src/common/pg_prng.c + * + *------------------------------------------------------------------------- + */ + +#include "c.h" + +#include <math.h> + +#include "common/pg_prng.h" +#include "port/pg_bitutils.h" + +/* X/Open (XSI) requires <math.h> to provide M_PI, but core POSIX does not */ +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + + +/* process-wide state vector */ +pg_prng_state pg_global_prng_state; + + +/* + * 64-bit rotate left + */ +static inline uint64 +rotl(uint64 x, int bits) +{ + return (x << bits) | (x >> (64 - bits)); +} + +/* + * The basic xoroshiro128** algorithm. + * Generates and returns a 64-bit uniformly distributed number, + * updating the state vector for next time. + * + * Note: the state vector must not be all-zeroes, as that is a fixed point. + */ +static uint64 +xoroshiro128ss(pg_prng_state *state) +{ + uint64 s0 = state->s0, + sx = state->s1 ^ s0, + val = rotl(s0 * 5, 7) * 9; + + /* update state */ + state->s0 = rotl(s0, 24) ^ sx ^ (sx << 16); + state->s1 = rotl(sx, 37); + + return val; +} + +/* + * We use this generator just to fill the xoroshiro128** state vector + * from a 64-bit seed. + */ +static uint64 +splitmix64(uint64 *state) +{ + /* state update */ + uint64 val = (*state += UINT64CONST(0x9E3779B97f4A7C15)); + + /* value extraction */ + val = (val ^ (val >> 30)) * UINT64CONST(0xBF58476D1CE4E5B9); + val = (val ^ (val >> 27)) * UINT64CONST(0x94D049BB133111EB); + + return val ^ (val >> 31); +} + +/* + * Initialize the PRNG state from a 64-bit integer, + * taking care that we don't produce all-zeroes. + */ +void +pg_prng_seed(pg_prng_state *state, uint64 seed) +{ + state->s0 = splitmix64(&seed); + state->s1 = splitmix64(&seed); + /* Let's just make sure we didn't get all-zeroes */ + (void) pg_prng_seed_check(state); +} + +/* + * Initialize the PRNG state from a double in the range [-1.0, 1.0], + * taking care that we don't produce all-zeroes. + */ +void +pg_prng_fseed(pg_prng_state *state, double fseed) +{ + /* Assume there's about 52 mantissa bits; the sign contributes too. */ + int64 seed = ((double) ((UINT64CONST(1) << 52) - 1)) * fseed; + + pg_prng_seed(state, (uint64) seed); +} + +/* + * Validate a PRNG seed value. + */ +bool +pg_prng_seed_check(pg_prng_state *state) +{ + /* + * If the seeding mechanism chanced to produce all-zeroes, insert + * something nonzero. Anything would do; use Knuth's LCG parameters. + */ + if (unlikely(state->s0 == 0 && state->s1 == 0)) + { + state->s0 = UINT64CONST(0x5851F42D4C957F2D); + state->s1 = UINT64CONST(0x14057B7EF767814F); + } + + /* As a convenience for the pg_prng_strong_seed macro, return true */ + return true; +} + +/* + * Select a random uint64 uniformly from the range [0, PG_UINT64_MAX]. + */ +uint64 +pg_prng_uint64(pg_prng_state *state) +{ + return xoroshiro128ss(state); +} + +/* + * Select a random uint64 uniformly from the range [rmin, rmax]. + * If the range is empty, rmin is always produced. + */ +uint64 +pg_prng_uint64_range(pg_prng_state *state, uint64 rmin, uint64 rmax) +{ + uint64 val; + + if (likely(rmax > rmin)) + { + /* + * Use bitmask rejection method to generate an offset in 0..range. + * Each generated val is less than twice "range", so on average we + * should not have to iterate more than twice. + */ + uint64 range = rmax - rmin; + uint32 rshift = 63 - pg_leftmost_one_pos64(range); + + do + { + val = xoroshiro128ss(state) >> rshift; + } while (val > range); + } + else + val = 0; + + return rmin + val; +} + +/* + * Select a random int64 uniformly from the range [PG_INT64_MIN, PG_INT64_MAX]. + */ +int64 +pg_prng_int64(pg_prng_state *state) +{ + return (int64) xoroshiro128ss(state); +} + +/* + * Select a random int64 uniformly from the range [0, PG_INT64_MAX]. + */ +int64 +pg_prng_int64p(pg_prng_state *state) +{ + return (int64) (xoroshiro128ss(state) & UINT64CONST(0x7FFFFFFFFFFFFFFF)); +} + +/* + * Select a random uint32 uniformly from the range [0, PG_UINT32_MAX]. + */ +uint32 +pg_prng_uint32(pg_prng_state *state) +{ + /* + * Although xoroshiro128** is not known to have any weaknesses in + * randomness of low-order bits, we prefer to use the upper bits of its + * result here and below. + */ + uint64 v = xoroshiro128ss(state); + + return (uint32) (v >> 32); +} + +/* + * Select a random int32 uniformly from the range [PG_INT32_MIN, PG_INT32_MAX]. + */ +int32 +pg_prng_int32(pg_prng_state *state) +{ + uint64 v = xoroshiro128ss(state); + + return (int32) (v >> 32); +} + +/* + * Select a random int32 uniformly from the range [0, PG_INT32_MAX]. + */ +int32 +pg_prng_int32p(pg_prng_state *state) +{ + uint64 v = xoroshiro128ss(state); + + return (int32) (v >> 33); +} + +/* + * Select a random double uniformly from the range [0.0, 1.0). + * + * Note: if you want a result in the range (0.0, 1.0], the standard way + * to get that is "1.0 - pg_prng_double(state)". + */ +double +pg_prng_double(pg_prng_state *state) +{ + uint64 v = xoroshiro128ss(state); + + /* + * As above, assume there's 52 mantissa bits in a double. This result + * could round to 1.0 if double's precision is less than that; but we + * assume IEEE float arithmetic elsewhere in Postgres, so this seems OK. + */ + return ldexp((double) (v >> (64 - 52)), -52); +} + +/* + * Select a random double from the normal distribution with + * mean = 0.0 and stddev = 1.0. + * + * To get a result from a different normal distribution use + * STDDEV * pg_prng_double_normal() + MEAN + * + * Uses https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform + */ +double +pg_prng_double_normal(pg_prng_state *state) +{ + double u1, + u2, + z0; + + /* + * pg_prng_double generates [0, 1), but for the basic version of the + * Box-Muller transform the two uniformly distributed random numbers are + * expected to be in (0, 1]; in particular we'd better not compute log(0). + */ + u1 = 1.0 - pg_prng_double(state); + u2 = 1.0 - pg_prng_double(state); + + /* Apply Box-Muller transform to get one normal-valued output */ + z0 = sqrt(-2.0 * log(u1)) * sin(2.0 * M_PI * u2); + return z0; +} + +/* + * Select a random boolean value. + */ +bool +pg_prng_bool(pg_prng_state *state) +{ + uint64 v = xoroshiro128ss(state); + + return (bool) (v >> 63); +} diff --git a/src/common/pgfnames.c b/src/common/pgfnames.c new file mode 100644 index 0000000..9d2fe9d --- /dev/null +++ b/src/common/pgfnames.c @@ -0,0 +1,94 @@ +/*------------------------------------------------------------------------- + * + * pgfnames.c + * directory handling functions + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/pgfnames.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <dirent.h> + +#ifndef FRONTEND +#define pg_log_warning(...) elog(WARNING, __VA_ARGS__) +#else +#include "common/logging.h" +#endif + +/* + * pgfnames + * + * return a list of the names of objects in the argument directory. Caller + * must call pgfnames_cleanup later to free the memory allocated by this + * function. + */ +char ** +pgfnames(const char *path) +{ + DIR *dir; + struct dirent *file; + char **filenames; + int numnames = 0; + int fnsize = 200; /* enough for many small dbs */ + + dir = opendir(path); + if (dir == NULL) + { + pg_log_warning("could not open directory \"%s\": %m", path); + return NULL; + } + + filenames = (char **) palloc(fnsize * sizeof(char *)); + + while (errno = 0, (file = readdir(dir)) != NULL) + { + if (strcmp(file->d_name, ".") != 0 && strcmp(file->d_name, "..") != 0) + { + if (numnames + 1 >= fnsize) + { + fnsize *= 2; + filenames = (char **) repalloc(filenames, + fnsize * sizeof(char *)); + } + filenames[numnames++] = pstrdup(file->d_name); + } + } + + if (errno) + pg_log_warning("could not read directory \"%s\": %m", path); + + filenames[numnames] = NULL; + + if (closedir(dir)) + pg_log_warning("could not close directory \"%s\": %m", path); + + return filenames; +} + + +/* + * pgfnames_cleanup + * + * deallocate memory used for filenames + */ +void +pgfnames_cleanup(char **filenames) +{ + char **fn; + + for (fn = filenames; *fn; fn++) + pfree(*fn); + + pfree(filenames); +} diff --git a/src/common/protocol_openssl.c b/src/common/protocol_openssl.c new file mode 100644 index 0000000..089cbd3 --- /dev/null +++ b/src/common/protocol_openssl.c @@ -0,0 +1,117 @@ +/*------------------------------------------------------------------------- + * + * protocol_openssl.c + * OpenSSL functionality shared between frontend and backend + * + * This should only be used if code is compiled with OpenSSL support. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/protocol_openssl.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/openssl.h" + +/* + * Replacements for APIs introduced in OpenSSL 1.1.0. + */ +#ifndef SSL_CTX_set_min_proto_version + +/* + * OpenSSL versions that support TLS 1.3 shouldn't get here because they + * already have these functions. So we don't have to keep updating the below + * code for every new TLS version, and eventually it can go away. But let's + * just check this to make sure ... + */ +#ifdef TLS1_3_VERSION +#error OpenSSL version mismatch +#endif + +int +SSL_CTX_set_min_proto_version(SSL_CTX *ctx, int version) +{ + int ssl_options = SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3; + + if (version > TLS1_VERSION) + ssl_options |= SSL_OP_NO_TLSv1; + + /* + * Some OpenSSL versions define TLS*_VERSION macros but not the + * corresponding SSL_OP_NO_* macro, so in those cases we have to return + * unsuccessfully here. + */ +#ifdef TLS1_1_VERSION + if (version > TLS1_1_VERSION) + { +#ifdef SSL_OP_NO_TLSv1_1 + ssl_options |= SSL_OP_NO_TLSv1_1; +#else + return 0; +#endif + } +#endif +#ifdef TLS1_2_VERSION + if (version > TLS1_2_VERSION) + { +#ifdef SSL_OP_NO_TLSv1_2 + ssl_options |= SSL_OP_NO_TLSv1_2; +#else + return 0; +#endif + } +#endif + + SSL_CTX_set_options(ctx, ssl_options); + + return 1; /* success */ +} + +int +SSL_CTX_set_max_proto_version(SSL_CTX *ctx, int version) +{ + int ssl_options = 0; + + Assert(version != 0); + + /* + * Some OpenSSL versions define TLS*_VERSION macros but not the + * corresponding SSL_OP_NO_* macro, so in those cases we have to return + * unsuccessfully here. + */ +#ifdef TLS1_1_VERSION + if (version < TLS1_1_VERSION) + { +#ifdef SSL_OP_NO_TLSv1_1 + ssl_options |= SSL_OP_NO_TLSv1_1; +#else + return 0; +#endif + } +#endif +#ifdef TLS1_2_VERSION + if (version < TLS1_2_VERSION) + { +#ifdef SSL_OP_NO_TLSv1_2 + ssl_options |= SSL_OP_NO_TLSv1_2; +#else + return 0; +#endif + } +#endif + + SSL_CTX_set_options(ctx, ssl_options); + + return 1; /* success */ +} + +#endif /* !SSL_CTX_set_min_proto_version */ diff --git a/src/common/psprintf.c b/src/common/psprintf.c new file mode 100644 index 0000000..c1d2807 --- /dev/null +++ b/src/common/psprintf.c @@ -0,0 +1,151 @@ +/*------------------------------------------------------------------------- + * + * psprintf.c + * sprintf into an allocated-on-demand buffer + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/psprintf.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND + +#include "postgres.h" + +#include "utils/memutils.h" + +#else + +#include "postgres_fe.h" + +/* It's possible we could use a different value for this in frontend code */ +#define MaxAllocSize ((Size) 0x3fffffff) /* 1 gigabyte - 1 */ + +#endif + + +/* + * psprintf + * + * Format text data under the control of fmt (an sprintf-style format string) + * and return it in an allocated-on-demand buffer. The buffer is allocated + * with palloc in the backend, or malloc in frontend builds. Caller is + * responsible to free the buffer when no longer needed, if appropriate. + * + * Errors are not returned to the caller, but are reported via elog(ERROR) + * in the backend, or printf-to-stderr-and-exit() in frontend builds. + * One should therefore think twice about using this in libpq. + */ +char * +psprintf(const char *fmt,...) +{ + int save_errno = errno; + size_t len = 128; /* initial assumption about buffer size */ + + for (;;) + { + char *result; + va_list args; + size_t newlen; + + /* + * Allocate result buffer. Note that in frontend this maps to malloc + * with exit-on-error. + */ + result = (char *) palloc(len); + + /* Try to format the data. */ + errno = save_errno; + va_start(args, fmt); + newlen = pvsnprintf(result, len, fmt, args); + va_end(args); + + if (newlen < len) + return result; /* success */ + + /* Release buffer and loop around to try again with larger len. */ + pfree(result); + len = newlen; + } +} + +/* + * pvsnprintf + * + * Attempt to format text data under the control of fmt (an sprintf-style + * format string) and insert it into buf (which has length len). + * + * If successful, return the number of bytes emitted, not counting the + * trailing zero byte. This will always be strictly less than len. + * + * If there's not enough space in buf, return an estimate of the buffer size + * needed to succeed (this *must* be more than the given len, else callers + * might loop infinitely). + * + * Other error cases do not return, but exit via elog(ERROR) or exit(). + * Hence, this shouldn't be used inside libpq. + * + * Caution: callers must be sure to preserve their entry-time errno + * when looping, in case the fmt contains "%m". + * + * Note that the semantics of the return value are not exactly C99's. + * First, we don't promise that the estimated buffer size is exactly right; + * callers must be prepared to loop multiple times to get the right size. + * (Given a C99-compliant vsnprintf, that won't happen, but it is rumored + * that some implementations don't always return the same value ...) + * Second, we return the recommended buffer size, not one less than that; + * this lets overflow concerns be handled here rather than in the callers. + */ +size_t +pvsnprintf(char *buf, size_t len, const char *fmt, va_list args) +{ + int nprinted; + + nprinted = vsnprintf(buf, len, fmt, args); + + /* We assume failure means the fmt is bogus, hence hard failure is OK */ + if (unlikely(nprinted < 0)) + { +#ifndef FRONTEND + elog(ERROR, "vsnprintf failed: %m with format string \"%s\"", fmt); +#else + fprintf(stderr, "vsnprintf failed: %s with format string \"%s\"\n", + strerror(errno), fmt); + exit(EXIT_FAILURE); +#endif + } + + if ((size_t) nprinted < len) + { + /* Success. Note nprinted does not include trailing null. */ + return (size_t) nprinted; + } + + /* + * We assume a C99-compliant vsnprintf, so believe its estimate of the + * required space, and add one for the trailing null. (If it's wrong, the + * logic will still work, but we may loop multiple times.) + * + * Choke if the required space would exceed MaxAllocSize. Note we use + * this palloc-oriented overflow limit even when in frontend. + */ + if (unlikely((size_t) nprinted > MaxAllocSize - 1)) + { +#ifndef FRONTEND + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("out of memory"))); +#else + fprintf(stderr, _("out of memory\n")); + exit(EXIT_FAILURE); +#endif + } + + return nprinted + 1; +} diff --git a/src/common/relpath.c b/src/common/relpath.c new file mode 100644 index 0000000..87de5f6 --- /dev/null +++ b/src/common/relpath.c @@ -0,0 +1,210 @@ +/*------------------------------------------------------------------------- + * relpath.c + * Shared frontend/backend code to compute pathnames of relation files + * + * This module also contains some logic associated with fork names. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/relpath.c + * + *------------------------------------------------------------------------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "catalog/pg_tablespace_d.h" +#include "common/relpath.h" +#include "storage/backendid.h" + + +/* + * Lookup table of fork name by fork number. + * + * If you add a new entry, remember to update the errhint in + * forkname_to_number() below, and update the SGML documentation for + * pg_relation_size(). + */ +const char *const forkNames[] = { + "main", /* MAIN_FORKNUM */ + "fsm", /* FSM_FORKNUM */ + "vm", /* VISIBILITYMAP_FORKNUM */ + "init" /* INIT_FORKNUM */ +}; + +StaticAssertDecl(lengthof(forkNames) == (MAX_FORKNUM + 1), + "array length mismatch"); + +/* + * forkname_to_number - look up fork number by name + * + * In backend, we throw an error for no match; in frontend, we just + * return InvalidForkNumber. + */ +ForkNumber +forkname_to_number(const char *forkName) +{ + ForkNumber forkNum; + + for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) + if (strcmp(forkNames[forkNum], forkName) == 0) + return forkNum; + +#ifndef FRONTEND + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid fork name"), + errhint("Valid fork names are \"main\", \"fsm\", " + "\"vm\", and \"init\"."))); +#endif + + return InvalidForkNumber; +} + +/* + * forkname_chars + * We use this to figure out whether a filename could be a relation + * fork (as opposed to an oddly named stray file that somehow ended + * up in the database directory). If the passed string begins with + * a fork name (other than the main fork name), we return its length, + * and set *fork (if not NULL) to the fork number. If not, we return 0. + * + * Note that the present coding assumes that there are no fork names which + * are prefixes of other fork names. + */ +int +forkname_chars(const char *str, ForkNumber *fork) +{ + ForkNumber forkNum; + + for (forkNum = 1; forkNum <= MAX_FORKNUM; forkNum++) + { + int len = strlen(forkNames[forkNum]); + + if (strncmp(forkNames[forkNum], str, len) == 0) + { + if (fork) + *fork = forkNum; + return len; + } + } + if (fork) + *fork = InvalidForkNumber; + return 0; +} + + +/* + * GetDatabasePath - construct path to a database directory + * + * Result is a palloc'd string. + * + * XXX this must agree with GetRelationPath()! + */ +char * +GetDatabasePath(Oid dbOid, Oid spcOid) +{ + if (spcOid == GLOBALTABLESPACE_OID) + { + /* Shared system relations live in {datadir}/global */ + Assert(dbOid == 0); + return pstrdup("global"); + } + else if (spcOid == DEFAULTTABLESPACE_OID) + { + /* The default tablespace is {datadir}/base */ + return psprintf("base/%u", dbOid); + } + else + { + /* All other tablespaces are accessed via symlinks */ + return psprintf("pg_tblspc/%u/%s/%u", + spcOid, TABLESPACE_VERSION_DIRECTORY, dbOid); + } +} + +/* + * GetRelationPath - construct path to a relation's file + * + * Result is a palloc'd string. + * + * Note: ideally, backendId would be declared as type BackendId, but relpath.h + * would have to include a backend-only header to do that; doesn't seem worth + * the trouble considering BackendId is just int anyway. + */ +char * +GetRelationPath(Oid dbOid, Oid spcOid, RelFileNumber relNumber, + int backendId, ForkNumber forkNumber) +{ + char *path; + + if (spcOid == GLOBALTABLESPACE_OID) + { + /* Shared system relations live in {datadir}/global */ + Assert(dbOid == 0); + Assert(backendId == InvalidBackendId); + if (forkNumber != MAIN_FORKNUM) + path = psprintf("global/%u_%s", + relNumber, forkNames[forkNumber]); + else + path = psprintf("global/%u", relNumber); + } + else if (spcOid == DEFAULTTABLESPACE_OID) + { + /* The default tablespace is {datadir}/base */ + if (backendId == InvalidBackendId) + { + if (forkNumber != MAIN_FORKNUM) + path = psprintf("base/%u/%u_%s", + dbOid, relNumber, + forkNames[forkNumber]); + else + path = psprintf("base/%u/%u", + dbOid, relNumber); + } + else + { + if (forkNumber != MAIN_FORKNUM) + path = psprintf("base/%u/t%d_%u_%s", + dbOid, backendId, relNumber, + forkNames[forkNumber]); + else + path = psprintf("base/%u/t%d_%u", + dbOid, backendId, relNumber); + } + } + else + { + /* All other tablespaces are accessed via symlinks */ + if (backendId == InvalidBackendId) + { + if (forkNumber != MAIN_FORKNUM) + path = psprintf("pg_tblspc/%u/%s/%u/%u_%s", + spcOid, TABLESPACE_VERSION_DIRECTORY, + dbOid, relNumber, + forkNames[forkNumber]); + else + path = psprintf("pg_tblspc/%u/%s/%u/%u", + spcOid, TABLESPACE_VERSION_DIRECTORY, + dbOid, relNumber); + } + else + { + if (forkNumber != MAIN_FORKNUM) + path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u_%s", + spcOid, TABLESPACE_VERSION_DIRECTORY, + dbOid, backendId, relNumber, + forkNames[forkNumber]); + else + path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u", + spcOid, TABLESPACE_VERSION_DIRECTORY, + dbOid, backendId, relNumber); + } + } + return path; +} diff --git a/src/common/restricted_token.c b/src/common/restricted_token.c new file mode 100644 index 0000000..4ae1ed1 --- /dev/null +++ b/src/common/restricted_token.c @@ -0,0 +1,174 @@ +/*------------------------------------------------------------------------- + * + * restricted_token.c + * helper routine to ensure restricted token on Windows + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/restricted_token.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#error "This file is not expected to be compiled for backend code" +#endif + +#include "postgres_fe.h" + +#include "common/logging.h" +#include "common/restricted_token.h" + +#ifdef WIN32 + +/* internal vars */ +char *restrict_env; + +/* Windows API define missing from some versions of MingW headers */ +#ifndef DISABLE_MAX_PRIVILEGE +#define DISABLE_MAX_PRIVILEGE 0x1 +#endif + +/* + * Create a restricted token and execute the specified process with it. + * + * Returns restricted token on success and 0 on failure. + * + * On any system not containing the required functions, do nothing + * but still report an error. + */ +HANDLE +CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION *processInfo) +{ + BOOL b; + STARTUPINFO si; + HANDLE origToken; + HANDLE restrictedToken; + SID_IDENTIFIER_AUTHORITY NtAuthority = {SECURITY_NT_AUTHORITY}; + SID_AND_ATTRIBUTES dropSids[2]; + + ZeroMemory(&si, sizeof(si)); + si.cb = sizeof(si); + + /* Open the current token to use as a base for the restricted one */ + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ALL_ACCESS, &origToken)) + { + pg_log_error("could not open process token: error code %lu", + GetLastError()); + return 0; + } + + /* Allocate list of SIDs to remove */ + ZeroMemory(&dropSids, sizeof(dropSids)); + if (!AllocateAndInitializeSid(&NtAuthority, 2, + SECURITY_BUILTIN_DOMAIN_RID, DOMAIN_ALIAS_RID_ADMINS, 0, 0, 0, 0, 0, + 0, &dropSids[0].Sid) || + !AllocateAndInitializeSid(&NtAuthority, 2, + SECURITY_BUILTIN_DOMAIN_RID, DOMAIN_ALIAS_RID_POWER_USERS, 0, 0, 0, 0, 0, + 0, &dropSids[1].Sid)) + { + pg_log_error("could not allocate SIDs: error code %lu", + GetLastError()); + CloseHandle(origToken); + return 0; + } + + b = CreateRestrictedToken(origToken, + DISABLE_MAX_PRIVILEGE, + sizeof(dropSids) / sizeof(dropSids[0]), + dropSids, + 0, NULL, + 0, NULL, + &restrictedToken); + + FreeSid(dropSids[1].Sid); + FreeSid(dropSids[0].Sid); + CloseHandle(origToken); + + if (!b) + { + pg_log_error("could not create restricted token: error code %lu", GetLastError()); + return 0; + } + +#ifndef __CYGWIN__ + AddUserToTokenDacl(restrictedToken); +#endif + + if (!CreateProcessAsUser(restrictedToken, + NULL, + cmd, + NULL, + NULL, + TRUE, + CREATE_SUSPENDED, + NULL, + NULL, + &si, + processInfo)) + + { + pg_log_error("could not start process for command \"%s\": error code %lu", cmd, GetLastError()); + return 0; + } + + ResumeThread(processInfo->hThread); + return restrictedToken; +} +#endif + +/* + * On Windows make sure that we are running with a restricted token, + * On other platforms do nothing. + */ +void +get_restricted_token(void) +{ +#ifdef WIN32 + HANDLE restrictedToken; + + /* + * Before we execute another program, make sure that we are running with a + * restricted token. If not, re-execute ourselves with one. + */ + + if ((restrict_env = getenv("PG_RESTRICT_EXEC")) == NULL + || strcmp(restrict_env, "1") != 0) + { + PROCESS_INFORMATION pi; + char *cmdline; + + ZeroMemory(&pi, sizeof(pi)); + + cmdline = pg_strdup(GetCommandLine()); + + setenv("PG_RESTRICT_EXEC", "1", 1); + + if ((restrictedToken = CreateRestrictedProcess(cmdline, &pi)) == 0) + { + pg_log_error("could not re-execute with restricted token: error code %lu", GetLastError()); + } + else + { + /* + * Successfully re-executed. Now wait for child process to capture + * the exit code. + */ + DWORD x; + + CloseHandle(restrictedToken); + CloseHandle(pi.hThread); + WaitForSingleObject(pi.hProcess, INFINITE); + + if (!GetExitCodeProcess(pi.hProcess, &x)) + pg_fatal("could not get exit code from subprocess: error code %lu", GetLastError()); + exit(x); + } + pg_free(cmdline); + } +#endif +} diff --git a/src/common/rmtree.c b/src/common/rmtree.c new file mode 100644 index 0000000..cd99d3f --- /dev/null +++ b/src/common/rmtree.c @@ -0,0 +1,130 @@ +/*------------------------------------------------------------------------- + * + * rmtree.c + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/rmtree.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <unistd.h> +#include <sys/stat.h> + +#include "common/file_utils.h" + +#ifndef FRONTEND +#include "storage/fd.h" +#define pg_log_warning(...) elog(WARNING, __VA_ARGS__) +#define LOG_LEVEL WARNING +#define OPENDIR(x) AllocateDir(x) +#define CLOSEDIR(x) FreeDir(x) +#else +#include "common/logging.h" +#define LOG_LEVEL PG_LOG_WARNING +#define OPENDIR(x) opendir(x) +#define CLOSEDIR(x) closedir(x) +#endif + +/* + * rmtree + * + * Delete a directory tree recursively. + * Assumes path points to a valid directory. + * Deletes everything under path. + * If rmtopdir is true deletes the directory too. + * Returns true if successful, false if there was any problem. + * (The details of the problem are reported already, so caller + * doesn't really have to say anything more, but most do.) + */ +bool +rmtree(const char *path, bool rmtopdir) +{ + char pathbuf[MAXPGPATH]; + DIR *dir; + struct dirent *de; + bool result = true; + size_t dirnames_size = 0; + size_t dirnames_capacity = 8; + char **dirnames = palloc(sizeof(char *) * dirnames_capacity); + + dir = OPENDIR(path); + if (dir == NULL) + { + pg_log_warning("could not open directory \"%s\": %m", path); + return false; + } + + while (errno = 0, (de = readdir(dir))) + { + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) + continue; + snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name); + switch (get_dirent_type(pathbuf, de, false, LOG_LEVEL)) + { + case PGFILETYPE_ERROR: + /* already logged, press on */ + break; + case PGFILETYPE_DIR: + + /* + * Defer recursion until after we've closed this directory, to + * avoid using more than one file descriptor at a time. + */ + if (dirnames_size == dirnames_capacity) + { + dirnames = repalloc(dirnames, + sizeof(char *) * dirnames_capacity * 2); + dirnames_capacity *= 2; + } + dirnames[dirnames_size++] = pstrdup(pathbuf); + break; + default: + if (unlink(pathbuf) != 0 && errno != ENOENT) + { + pg_log_warning("could not remove file \"%s\": %m", pathbuf); + result = false; + } + break; + } + } + + if (errno != 0) + { + pg_log_warning("could not read directory \"%s\": %m", path); + result = false; + } + + CLOSEDIR(dir); + + /* Now recurse into the subdirectories we found. */ + for (size_t i = 0; i < dirnames_size; ++i) + { + if (!rmtree(dirnames[i], true)) + result = false; + pfree(dirnames[i]); + } + + if (rmtopdir) + { + if (rmdir(path) != 0) + { + pg_log_warning("could not remove directory \"%s\": %m", path); + result = false; + } + } + + pfree(dirnames); + + return result; +} diff --git a/src/common/ryu_common.h b/src/common/ryu_common.h new file mode 100644 index 0000000..ad850ac --- /dev/null +++ b/src/common/ryu_common.h @@ -0,0 +1,133 @@ +/*--------------------------------------------------------------------------- + * + * Common routines for Ryu floating-point output. + * + * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/ryu_common.h + * + * This is a modification of code taken from github.com/ulfjack/ryu under the + * terms of the Boost license (not the Apache license). The original copyright + * notice follows: + * + * Copyright 2018 Ulf Adams + * + * The contents of this file may be used under the terms of the Apache + * License, Version 2.0. + * + * (See accompanying file LICENSE-Apache or copy at + * http://www.apache.org/licenses/LICENSE-2.0) + * + * Alternatively, the contents of this file may be used under the terms of the + * Boost Software License, Version 1.0. + * + * (See accompanying file LICENSE-Boost or copy at + * https://www.boost.org/LICENSE_1_0.txt) + * + * Unless required by applicable law or agreed to in writing, this software is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. + * + *--------------------------------------------------------------------------- + */ +#ifndef RYU_COMMON_H +#define RYU_COMMON_H + +/* + * Upstream Ryu's output is always the shortest possible. But we adjust that + * slightly to improve portability: we avoid outputting the exact midpoint + * value between two representable floats, since that relies on the reader + * getting the round-to-even rule correct, which seems to be the common + * failure mode. + * + * Defining this to 1 would restore the upstream behavior. + */ +#define STRICTLY_SHORTEST 0 + +#if SIZEOF_SIZE_T < 8 +#define RYU_32_BIT_PLATFORM +#endif + +/* Returns e == 0 ? 1 : ceil(log_2(5^e)). */ +static inline uint32 +pow5bits(const int32 e) +{ + /* + * This approximation works up to the point that the multiplication + * overflows at e = 3529. + * + * If the multiplication were done in 64 bits, it would fail at 5^4004 + * which is just greater than 2^9297. + */ + Assert(e >= 0); + Assert(e <= 3528); + return ((((uint32) e) * 1217359) >> 19) + 1; +} + +/* Returns floor(log_10(2^e)). */ +static inline int32 +log10Pow2(const int32 e) +{ + /* + * The first value this approximation fails for is 2^1651 which is just + * greater than 10^297. + */ + Assert(e >= 0); + Assert(e <= 1650); + return (int32) ((((uint32) e) * 78913) >> 18); +} + +/* Returns floor(log_10(5^e)). */ +static inline int32 +log10Pow5(const int32 e) +{ + /* + * The first value this approximation fails for is 5^2621 which is just + * greater than 10^1832. + */ + Assert(e >= 0); + Assert(e <= 2620); + return (int32) ((((uint32) e) * 732923) >> 20); +} + +static inline int +copy_special_str(char *const result, const bool sign, const bool exponent, const bool mantissa) +{ + if (mantissa) + { + memcpy(result, "NaN", 3); + return 3; + } + if (sign) + { + result[0] = '-'; + } + if (exponent) + { + memcpy(result + sign, "Infinity", 8); + return sign + 8; + } + result[sign] = '0'; + return sign + 1; +} + +static inline uint32 +float_to_bits(const float f) +{ + uint32 bits = 0; + + memcpy(&bits, &f, sizeof(float)); + return bits; +} + +static inline uint64 +double_to_bits(const double d) +{ + uint64 bits = 0; + + memcpy(&bits, &d, sizeof(double)); + return bits; +} + +#endif /* RYU_COMMON_H */ diff --git a/src/common/saslprep.c b/src/common/saslprep.c new file mode 100644 index 0000000..3cf4988 --- /dev/null +++ b/src/common/saslprep.c @@ -0,0 +1,1245 @@ +/*------------------------------------------------------------------------- + * saslprep.c + * SASLprep normalization, for SCRAM authentication + * + * The SASLprep algorithm is used to process a user-supplied password into + * canonical form. For more details, see: + * + * [RFC3454] Preparation of Internationalized Strings ("stringprep"), + * http://www.ietf.org/rfc/rfc3454.txt + * + * [RFC4013] SASLprep: Stringprep Profile for User Names and Passwords + * http://www.ietf.org/rfc/rfc4013.txt + * + * + * Portions Copyright (c) 2017-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/saslprep.c + * + *------------------------------------------------------------------------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/saslprep.h" +#include "common/string.h" +#include "common/unicode_norm.h" +#include "mb/pg_wchar.h" + +/* + * In backend, we will use palloc/pfree. In frontend, use malloc, and + * return SASLPREP_OOM on out-of-memory. + */ +#ifndef FRONTEND +#define STRDUP(s) pstrdup(s) +#define ALLOC(size) palloc(size) +#define FREE(size) pfree(size) +#else +#define STRDUP(s) strdup(s) +#define ALLOC(size) malloc(size) +#define FREE(size) free(size) +#endif + +/* Prototypes for local functions */ +static int codepoint_range_cmp(const void *a, const void *b); +static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize); +static int pg_utf8_string_len(const char *source); + +/* + * Stringprep Mapping Tables. + * + * The stringprep specification includes a number of tables of Unicode + * codepoints, used in different parts of the algorithm. They are below, + * as arrays of codepoint ranges. Each range is a pair of codepoints, + * for the first and last codepoint included the range (inclusive!). + */ + +/* + * C.1.2 Non-ASCII space characters + * + * These are all mapped to the ASCII space character (U+00A0). + */ +static const pg_wchar non_ascii_space_ranges[] = +{ + 0x00A0, 0x00A0, + 0x1680, 0x1680, + 0x2000, 0x200B, + 0x202F, 0x202F, + 0x205F, 0x205F, + 0x3000, 0x3000 +}; + +/* + * B.1 Commonly mapped to nothing + * + * If any of these appear in the input, they are removed. + */ +static const pg_wchar commonly_mapped_to_nothing_ranges[] = +{ + 0x00AD, 0x00AD, + 0x034F, 0x034F, + 0x1806, 0x1806, + 0x180B, 0x180D, + 0x200B, 0x200D, + 0x2060, 0x2060, + 0xFE00, 0xFE0F, + 0xFEFF, 0xFEFF +}; + +/* + * prohibited_output_ranges is a union of all the characters from + * the following tables: + * + * C.1.2 Non-ASCII space characters + * C.2.1 ASCII control characters + * C.2.2 Non-ASCII control characters + * C.3 Private Use characters + * C.4 Non-character code points + * C.5 Surrogate code points + * C.6 Inappropriate for plain text characters + * C.7 Inappropriate for canonical representation characters + * C.7 Change display properties or deprecated characters + * C.8 Tagging characters + * + * These are the tables that are listed as "prohibited output" + * characters in the SASLprep profile. + * + * The comment after each code range indicates which source table + * the code came from. Note that there is some overlap in the source + * tables, so one code might originate from multiple source tables. + * Adjacent ranges have also been merged together, to save space. + */ +static const pg_wchar prohibited_output_ranges[] = +{ + 0x0000, 0x001F, /* C.2.1 */ + 0x007F, 0x00A0, /* C.1.2, C.2.1, C.2.2 */ + 0x0340, 0x0341, /* C.8 */ + 0x06DD, 0x06DD, /* C.2.2 */ + 0x070F, 0x070F, /* C.2.2 */ + 0x1680, 0x1680, /* C.1.2 */ + 0x180E, 0x180E, /* C.2.2 */ + 0x2000, 0x200F, /* C.1.2, C.2.2, C.8 */ + 0x2028, 0x202F, /* C.1.2, C.2.2, C.8 */ + 0x205F, 0x2063, /* C.1.2, C.2.2 */ + 0x206A, 0x206F, /* C.2.2, C.8 */ + 0x2FF0, 0x2FFB, /* C.7 */ + 0x3000, 0x3000, /* C.1.2 */ + 0xD800, 0xF8FF, /* C.3, C.5 */ + 0xFDD0, 0xFDEF, /* C.4 */ + 0xFEFF, 0xFEFF, /* C.2.2 */ + 0xFFF9, 0xFFFF, /* C.2.2, C.4, C.6 */ + 0x1D173, 0x1D17A, /* C.2.2 */ + 0x1FFFE, 0x1FFFF, /* C.4 */ + 0x2FFFE, 0x2FFFF, /* C.4 */ + 0x3FFFE, 0x3FFFF, /* C.4 */ + 0x4FFFE, 0x4FFFF, /* C.4 */ + 0x5FFFE, 0x5FFFF, /* C.4 */ + 0x6FFFE, 0x6FFFF, /* C.4 */ + 0x7FFFE, 0x7FFFF, /* C.4 */ + 0x8FFFE, 0x8FFFF, /* C.4 */ + 0x9FFFE, 0x9FFFF, /* C.4 */ + 0xAFFFE, 0xAFFFF, /* C.4 */ + 0xBFFFE, 0xBFFFF, /* C.4 */ + 0xCFFFE, 0xCFFFF, /* C.4 */ + 0xDFFFE, 0xDFFFF, /* C.4 */ + 0xE0001, 0xE0001, /* C.9 */ + 0xE0020, 0xE007F, /* C.9 */ + 0xEFFFE, 0xEFFFF, /* C.4 */ + 0xF0000, 0xFFFFF, /* C.3, C.4 */ + 0x100000, 0x10FFFF /* C.3, C.4 */ +}; + +/* A.1 Unassigned code points in Unicode 3.2 */ +static const pg_wchar unassigned_codepoint_ranges[] = +{ + 0x0221, 0x0221, + 0x0234, 0x024F, + 0x02AE, 0x02AF, + 0x02EF, 0x02FF, + 0x0350, 0x035F, + 0x0370, 0x0373, + 0x0376, 0x0379, + 0x037B, 0x037D, + 0x037F, 0x0383, + 0x038B, 0x038B, + 0x038D, 0x038D, + 0x03A2, 0x03A2, + 0x03CF, 0x03CF, + 0x03F7, 0x03FF, + 0x0487, 0x0487, + 0x04CF, 0x04CF, + 0x04F6, 0x04F7, + 0x04FA, 0x04FF, + 0x0510, 0x0530, + 0x0557, 0x0558, + 0x0560, 0x0560, + 0x0588, 0x0588, + 0x058B, 0x0590, + 0x05A2, 0x05A2, + 0x05BA, 0x05BA, + 0x05C5, 0x05CF, + 0x05EB, 0x05EF, + 0x05F5, 0x060B, + 0x060D, 0x061A, + 0x061C, 0x061E, + 0x0620, 0x0620, + 0x063B, 0x063F, + 0x0656, 0x065F, + 0x06EE, 0x06EF, + 0x06FF, 0x06FF, + 0x070E, 0x070E, + 0x072D, 0x072F, + 0x074B, 0x077F, + 0x07B2, 0x0900, + 0x0904, 0x0904, + 0x093A, 0x093B, + 0x094E, 0x094F, + 0x0955, 0x0957, + 0x0971, 0x0980, + 0x0984, 0x0984, + 0x098D, 0x098E, + 0x0991, 0x0992, + 0x09A9, 0x09A9, + 0x09B1, 0x09B1, + 0x09B3, 0x09B5, + 0x09BA, 0x09BB, + 0x09BD, 0x09BD, + 0x09C5, 0x09C6, + 0x09C9, 0x09CA, + 0x09CE, 0x09D6, + 0x09D8, 0x09DB, + 0x09DE, 0x09DE, + 0x09E4, 0x09E5, + 0x09FB, 0x0A01, + 0x0A03, 0x0A04, + 0x0A0B, 0x0A0E, + 0x0A11, 0x0A12, + 0x0A29, 0x0A29, + 0x0A31, 0x0A31, + 0x0A34, 0x0A34, + 0x0A37, 0x0A37, + 0x0A3A, 0x0A3B, + 0x0A3D, 0x0A3D, + 0x0A43, 0x0A46, + 0x0A49, 0x0A4A, + 0x0A4E, 0x0A58, + 0x0A5D, 0x0A5D, + 0x0A5F, 0x0A65, + 0x0A75, 0x0A80, + 0x0A84, 0x0A84, + 0x0A8C, 0x0A8C, + 0x0A8E, 0x0A8E, + 0x0A92, 0x0A92, + 0x0AA9, 0x0AA9, + 0x0AB1, 0x0AB1, + 0x0AB4, 0x0AB4, + 0x0ABA, 0x0ABB, + 0x0AC6, 0x0AC6, + 0x0ACA, 0x0ACA, + 0x0ACE, 0x0ACF, + 0x0AD1, 0x0ADF, + 0x0AE1, 0x0AE5, + 0x0AF0, 0x0B00, + 0x0B04, 0x0B04, + 0x0B0D, 0x0B0E, + 0x0B11, 0x0B12, + 0x0B29, 0x0B29, + 0x0B31, 0x0B31, + 0x0B34, 0x0B35, + 0x0B3A, 0x0B3B, + 0x0B44, 0x0B46, + 0x0B49, 0x0B4A, + 0x0B4E, 0x0B55, + 0x0B58, 0x0B5B, + 0x0B5E, 0x0B5E, + 0x0B62, 0x0B65, + 0x0B71, 0x0B81, + 0x0B84, 0x0B84, + 0x0B8B, 0x0B8D, + 0x0B91, 0x0B91, + 0x0B96, 0x0B98, + 0x0B9B, 0x0B9B, + 0x0B9D, 0x0B9D, + 0x0BA0, 0x0BA2, + 0x0BA5, 0x0BA7, + 0x0BAB, 0x0BAD, + 0x0BB6, 0x0BB6, + 0x0BBA, 0x0BBD, + 0x0BC3, 0x0BC5, + 0x0BC9, 0x0BC9, + 0x0BCE, 0x0BD6, + 0x0BD8, 0x0BE6, + 0x0BF3, 0x0C00, + 0x0C04, 0x0C04, + 0x0C0D, 0x0C0D, + 0x0C11, 0x0C11, + 0x0C29, 0x0C29, + 0x0C34, 0x0C34, + 0x0C3A, 0x0C3D, + 0x0C45, 0x0C45, + 0x0C49, 0x0C49, + 0x0C4E, 0x0C54, + 0x0C57, 0x0C5F, + 0x0C62, 0x0C65, + 0x0C70, 0x0C81, + 0x0C84, 0x0C84, + 0x0C8D, 0x0C8D, + 0x0C91, 0x0C91, + 0x0CA9, 0x0CA9, + 0x0CB4, 0x0CB4, + 0x0CBA, 0x0CBD, + 0x0CC5, 0x0CC5, + 0x0CC9, 0x0CC9, + 0x0CCE, 0x0CD4, + 0x0CD7, 0x0CDD, + 0x0CDF, 0x0CDF, + 0x0CE2, 0x0CE5, + 0x0CF0, 0x0D01, + 0x0D04, 0x0D04, + 0x0D0D, 0x0D0D, + 0x0D11, 0x0D11, + 0x0D29, 0x0D29, + 0x0D3A, 0x0D3D, + 0x0D44, 0x0D45, + 0x0D49, 0x0D49, + 0x0D4E, 0x0D56, + 0x0D58, 0x0D5F, + 0x0D62, 0x0D65, + 0x0D70, 0x0D81, + 0x0D84, 0x0D84, + 0x0D97, 0x0D99, + 0x0DB2, 0x0DB2, + 0x0DBC, 0x0DBC, + 0x0DBE, 0x0DBF, + 0x0DC7, 0x0DC9, + 0x0DCB, 0x0DCE, + 0x0DD5, 0x0DD5, + 0x0DD7, 0x0DD7, + 0x0DE0, 0x0DF1, + 0x0DF5, 0x0E00, + 0x0E3B, 0x0E3E, + 0x0E5C, 0x0E80, + 0x0E83, 0x0E83, + 0x0E85, 0x0E86, + 0x0E89, 0x0E89, + 0x0E8B, 0x0E8C, + 0x0E8E, 0x0E93, + 0x0E98, 0x0E98, + 0x0EA0, 0x0EA0, + 0x0EA4, 0x0EA4, + 0x0EA6, 0x0EA6, + 0x0EA8, 0x0EA9, + 0x0EAC, 0x0EAC, + 0x0EBA, 0x0EBA, + 0x0EBE, 0x0EBF, + 0x0EC5, 0x0EC5, + 0x0EC7, 0x0EC7, + 0x0ECE, 0x0ECF, + 0x0EDA, 0x0EDB, + 0x0EDE, 0x0EFF, + 0x0F48, 0x0F48, + 0x0F6B, 0x0F70, + 0x0F8C, 0x0F8F, + 0x0F98, 0x0F98, + 0x0FBD, 0x0FBD, + 0x0FCD, 0x0FCE, + 0x0FD0, 0x0FFF, + 0x1022, 0x1022, + 0x1028, 0x1028, + 0x102B, 0x102B, + 0x1033, 0x1035, + 0x103A, 0x103F, + 0x105A, 0x109F, + 0x10C6, 0x10CF, + 0x10F9, 0x10FA, + 0x10FC, 0x10FF, + 0x115A, 0x115E, + 0x11A3, 0x11A7, + 0x11FA, 0x11FF, + 0x1207, 0x1207, + 0x1247, 0x1247, + 0x1249, 0x1249, + 0x124E, 0x124F, + 0x1257, 0x1257, + 0x1259, 0x1259, + 0x125E, 0x125F, + 0x1287, 0x1287, + 0x1289, 0x1289, + 0x128E, 0x128F, + 0x12AF, 0x12AF, + 0x12B1, 0x12B1, + 0x12B6, 0x12B7, + 0x12BF, 0x12BF, + 0x12C1, 0x12C1, + 0x12C6, 0x12C7, + 0x12CF, 0x12CF, + 0x12D7, 0x12D7, + 0x12EF, 0x12EF, + 0x130F, 0x130F, + 0x1311, 0x1311, + 0x1316, 0x1317, + 0x131F, 0x131F, + 0x1347, 0x1347, + 0x135B, 0x1360, + 0x137D, 0x139F, + 0x13F5, 0x1400, + 0x1677, 0x167F, + 0x169D, 0x169F, + 0x16F1, 0x16FF, + 0x170D, 0x170D, + 0x1715, 0x171F, + 0x1737, 0x173F, + 0x1754, 0x175F, + 0x176D, 0x176D, + 0x1771, 0x1771, + 0x1774, 0x177F, + 0x17DD, 0x17DF, + 0x17EA, 0x17FF, + 0x180F, 0x180F, + 0x181A, 0x181F, + 0x1878, 0x187F, + 0x18AA, 0x1DFF, + 0x1E9C, 0x1E9F, + 0x1EFA, 0x1EFF, + 0x1F16, 0x1F17, + 0x1F1E, 0x1F1F, + 0x1F46, 0x1F47, + 0x1F4E, 0x1F4F, + 0x1F58, 0x1F58, + 0x1F5A, 0x1F5A, + 0x1F5C, 0x1F5C, + 0x1F5E, 0x1F5E, + 0x1F7E, 0x1F7F, + 0x1FB5, 0x1FB5, + 0x1FC5, 0x1FC5, + 0x1FD4, 0x1FD5, + 0x1FDC, 0x1FDC, + 0x1FF0, 0x1FF1, + 0x1FF5, 0x1FF5, + 0x1FFF, 0x1FFF, + 0x2053, 0x2056, + 0x2058, 0x205E, + 0x2064, 0x2069, + 0x2072, 0x2073, + 0x208F, 0x209F, + 0x20B2, 0x20CF, + 0x20EB, 0x20FF, + 0x213B, 0x213C, + 0x214C, 0x2152, + 0x2184, 0x218F, + 0x23CF, 0x23FF, + 0x2427, 0x243F, + 0x244B, 0x245F, + 0x24FF, 0x24FF, + 0x2614, 0x2615, + 0x2618, 0x2618, + 0x267E, 0x267F, + 0x268A, 0x2700, + 0x2705, 0x2705, + 0x270A, 0x270B, + 0x2728, 0x2728, + 0x274C, 0x274C, + 0x274E, 0x274E, + 0x2753, 0x2755, + 0x2757, 0x2757, + 0x275F, 0x2760, + 0x2795, 0x2797, + 0x27B0, 0x27B0, + 0x27BF, 0x27CF, + 0x27EC, 0x27EF, + 0x2B00, 0x2E7F, + 0x2E9A, 0x2E9A, + 0x2EF4, 0x2EFF, + 0x2FD6, 0x2FEF, + 0x2FFC, 0x2FFF, + 0x3040, 0x3040, + 0x3097, 0x3098, + 0x3100, 0x3104, + 0x312D, 0x3130, + 0x318F, 0x318F, + 0x31B8, 0x31EF, + 0x321D, 0x321F, + 0x3244, 0x3250, + 0x327C, 0x327E, + 0x32CC, 0x32CF, + 0x32FF, 0x32FF, + 0x3377, 0x337A, + 0x33DE, 0x33DF, + 0x33FF, 0x33FF, + 0x4DB6, 0x4DFF, + 0x9FA6, 0x9FFF, + 0xA48D, 0xA48F, + 0xA4C7, 0xABFF, + 0xD7A4, 0xD7FF, + 0xFA2E, 0xFA2F, + 0xFA6B, 0xFAFF, + 0xFB07, 0xFB12, + 0xFB18, 0xFB1C, + 0xFB37, 0xFB37, + 0xFB3D, 0xFB3D, + 0xFB3F, 0xFB3F, + 0xFB42, 0xFB42, + 0xFB45, 0xFB45, + 0xFBB2, 0xFBD2, + 0xFD40, 0xFD4F, + 0xFD90, 0xFD91, + 0xFDC8, 0xFDCF, + 0xFDFD, 0xFDFF, + 0xFE10, 0xFE1F, + 0xFE24, 0xFE2F, + 0xFE47, 0xFE48, + 0xFE53, 0xFE53, + 0xFE67, 0xFE67, + 0xFE6C, 0xFE6F, + 0xFE75, 0xFE75, + 0xFEFD, 0xFEFE, + 0xFF00, 0xFF00, + 0xFFBF, 0xFFC1, + 0xFFC8, 0xFFC9, + 0xFFD0, 0xFFD1, + 0xFFD8, 0xFFD9, + 0xFFDD, 0xFFDF, + 0xFFE7, 0xFFE7, + 0xFFEF, 0xFFF8, + 0x10000, 0x102FF, + 0x1031F, 0x1031F, + 0x10324, 0x1032F, + 0x1034B, 0x103FF, + 0x10426, 0x10427, + 0x1044E, 0x1CFFF, + 0x1D0F6, 0x1D0FF, + 0x1D127, 0x1D129, + 0x1D1DE, 0x1D3FF, + 0x1D455, 0x1D455, + 0x1D49D, 0x1D49D, + 0x1D4A0, 0x1D4A1, + 0x1D4A3, 0x1D4A4, + 0x1D4A7, 0x1D4A8, + 0x1D4AD, 0x1D4AD, + 0x1D4BA, 0x1D4BA, + 0x1D4BC, 0x1D4BC, + 0x1D4C1, 0x1D4C1, + 0x1D4C4, 0x1D4C4, + 0x1D506, 0x1D506, + 0x1D50B, 0x1D50C, + 0x1D515, 0x1D515, + 0x1D51D, 0x1D51D, + 0x1D53A, 0x1D53A, + 0x1D53F, 0x1D53F, + 0x1D545, 0x1D545, + 0x1D547, 0x1D549, + 0x1D551, 0x1D551, + 0x1D6A4, 0x1D6A7, + 0x1D7CA, 0x1D7CD, + 0x1D800, 0x1FFFD, + 0x2A6D7, 0x2F7FF, + 0x2FA1E, 0x2FFFD, + 0x30000, 0x3FFFD, + 0x40000, 0x4FFFD, + 0x50000, 0x5FFFD, + 0x60000, 0x6FFFD, + 0x70000, 0x7FFFD, + 0x80000, 0x8FFFD, + 0x90000, 0x9FFFD, + 0xA0000, 0xAFFFD, + 0xB0000, 0xBFFFD, + 0xC0000, 0xCFFFD, + 0xD0000, 0xDFFFD, + 0xE0000, 0xE0000, + 0xE0002, 0xE001F, + 0xE0080, 0xEFFFD +}; + +/* D.1 Characters with bidirectional property "R" or "AL" */ +static const pg_wchar RandALCat_codepoint_ranges[] = +{ + 0x05BE, 0x05BE, + 0x05C0, 0x05C0, + 0x05C3, 0x05C3, + 0x05D0, 0x05EA, + 0x05F0, 0x05F4, + 0x061B, 0x061B, + 0x061F, 0x061F, + 0x0621, 0x063A, + 0x0640, 0x064A, + 0x066D, 0x066F, + 0x0671, 0x06D5, + 0x06DD, 0x06DD, + 0x06E5, 0x06E6, + 0x06FA, 0x06FE, + 0x0700, 0x070D, + 0x0710, 0x0710, + 0x0712, 0x072C, + 0x0780, 0x07A5, + 0x07B1, 0x07B1, + 0x200F, 0x200F, + 0xFB1D, 0xFB1D, + 0xFB1F, 0xFB28, + 0xFB2A, 0xFB36, + 0xFB38, 0xFB3C, + 0xFB3E, 0xFB3E, + 0xFB40, 0xFB41, + 0xFB43, 0xFB44, + 0xFB46, 0xFBB1, + 0xFBD3, 0xFD3D, + 0xFD50, 0xFD8F, + 0xFD92, 0xFDC7, + 0xFDF0, 0xFDFC, + 0xFE70, 0xFE74, + 0xFE76, 0xFEFC +}; + +/* D.2 Characters with bidirectional property "L" */ +static const pg_wchar LCat_codepoint_ranges[] = +{ + 0x0041, 0x005A, + 0x0061, 0x007A, + 0x00AA, 0x00AA, + 0x00B5, 0x00B5, + 0x00BA, 0x00BA, + 0x00C0, 0x00D6, + 0x00D8, 0x00F6, + 0x00F8, 0x0220, + 0x0222, 0x0233, + 0x0250, 0x02AD, + 0x02B0, 0x02B8, + 0x02BB, 0x02C1, + 0x02D0, 0x02D1, + 0x02E0, 0x02E4, + 0x02EE, 0x02EE, + 0x037A, 0x037A, + 0x0386, 0x0386, + 0x0388, 0x038A, + 0x038C, 0x038C, + 0x038E, 0x03A1, + 0x03A3, 0x03CE, + 0x03D0, 0x03F5, + 0x0400, 0x0482, + 0x048A, 0x04CE, + 0x04D0, 0x04F5, + 0x04F8, 0x04F9, + 0x0500, 0x050F, + 0x0531, 0x0556, + 0x0559, 0x055F, + 0x0561, 0x0587, + 0x0589, 0x0589, + 0x0903, 0x0903, + 0x0905, 0x0939, + 0x093D, 0x0940, + 0x0949, 0x094C, + 0x0950, 0x0950, + 0x0958, 0x0961, + 0x0964, 0x0970, + 0x0982, 0x0983, + 0x0985, 0x098C, + 0x098F, 0x0990, + 0x0993, 0x09A8, + 0x09AA, 0x09B0, + 0x09B2, 0x09B2, + 0x09B6, 0x09B9, + 0x09BE, 0x09C0, + 0x09C7, 0x09C8, + 0x09CB, 0x09CC, + 0x09D7, 0x09D7, + 0x09DC, 0x09DD, + 0x09DF, 0x09E1, + 0x09E6, 0x09F1, + 0x09F4, 0x09FA, + 0x0A05, 0x0A0A, + 0x0A0F, 0x0A10, + 0x0A13, 0x0A28, + 0x0A2A, 0x0A30, + 0x0A32, 0x0A33, + 0x0A35, 0x0A36, + 0x0A38, 0x0A39, + 0x0A3E, 0x0A40, + 0x0A59, 0x0A5C, + 0x0A5E, 0x0A5E, + 0x0A66, 0x0A6F, + 0x0A72, 0x0A74, + 0x0A83, 0x0A83, + 0x0A85, 0x0A8B, + 0x0A8D, 0x0A8D, + 0x0A8F, 0x0A91, + 0x0A93, 0x0AA8, + 0x0AAA, 0x0AB0, + 0x0AB2, 0x0AB3, + 0x0AB5, 0x0AB9, + 0x0ABD, 0x0AC0, + 0x0AC9, 0x0AC9, + 0x0ACB, 0x0ACC, + 0x0AD0, 0x0AD0, + 0x0AE0, 0x0AE0, + 0x0AE6, 0x0AEF, + 0x0B02, 0x0B03, + 0x0B05, 0x0B0C, + 0x0B0F, 0x0B10, + 0x0B13, 0x0B28, + 0x0B2A, 0x0B30, + 0x0B32, 0x0B33, + 0x0B36, 0x0B39, + 0x0B3D, 0x0B3E, + 0x0B40, 0x0B40, + 0x0B47, 0x0B48, + 0x0B4B, 0x0B4C, + 0x0B57, 0x0B57, + 0x0B5C, 0x0B5D, + 0x0B5F, 0x0B61, + 0x0B66, 0x0B70, + 0x0B83, 0x0B83, + 0x0B85, 0x0B8A, + 0x0B8E, 0x0B90, + 0x0B92, 0x0B95, + 0x0B99, 0x0B9A, + 0x0B9C, 0x0B9C, + 0x0B9E, 0x0B9F, + 0x0BA3, 0x0BA4, + 0x0BA8, 0x0BAA, + 0x0BAE, 0x0BB5, + 0x0BB7, 0x0BB9, + 0x0BBE, 0x0BBF, + 0x0BC1, 0x0BC2, + 0x0BC6, 0x0BC8, + 0x0BCA, 0x0BCC, + 0x0BD7, 0x0BD7, + 0x0BE7, 0x0BF2, + 0x0C01, 0x0C03, + 0x0C05, 0x0C0C, + 0x0C0E, 0x0C10, + 0x0C12, 0x0C28, + 0x0C2A, 0x0C33, + 0x0C35, 0x0C39, + 0x0C41, 0x0C44, + 0x0C60, 0x0C61, + 0x0C66, 0x0C6F, + 0x0C82, 0x0C83, + 0x0C85, 0x0C8C, + 0x0C8E, 0x0C90, + 0x0C92, 0x0CA8, + 0x0CAA, 0x0CB3, + 0x0CB5, 0x0CB9, + 0x0CBE, 0x0CBE, + 0x0CC0, 0x0CC4, + 0x0CC7, 0x0CC8, + 0x0CCA, 0x0CCB, + 0x0CD5, 0x0CD6, + 0x0CDE, 0x0CDE, + 0x0CE0, 0x0CE1, + 0x0CE6, 0x0CEF, + 0x0D02, 0x0D03, + 0x0D05, 0x0D0C, + 0x0D0E, 0x0D10, + 0x0D12, 0x0D28, + 0x0D2A, 0x0D39, + 0x0D3E, 0x0D40, + 0x0D46, 0x0D48, + 0x0D4A, 0x0D4C, + 0x0D57, 0x0D57, + 0x0D60, 0x0D61, + 0x0D66, 0x0D6F, + 0x0D82, 0x0D83, + 0x0D85, 0x0D96, + 0x0D9A, 0x0DB1, + 0x0DB3, 0x0DBB, + 0x0DBD, 0x0DBD, + 0x0DC0, 0x0DC6, + 0x0DCF, 0x0DD1, + 0x0DD8, 0x0DDF, + 0x0DF2, 0x0DF4, + 0x0E01, 0x0E30, + 0x0E32, 0x0E33, + 0x0E40, 0x0E46, + 0x0E4F, 0x0E5B, + 0x0E81, 0x0E82, + 0x0E84, 0x0E84, + 0x0E87, 0x0E88, + 0x0E8A, 0x0E8A, + 0x0E8D, 0x0E8D, + 0x0E94, 0x0E97, + 0x0E99, 0x0E9F, + 0x0EA1, 0x0EA3, + 0x0EA5, 0x0EA5, + 0x0EA7, 0x0EA7, + 0x0EAA, 0x0EAB, + 0x0EAD, 0x0EB0, + 0x0EB2, 0x0EB3, + 0x0EBD, 0x0EBD, + 0x0EC0, 0x0EC4, + 0x0EC6, 0x0EC6, + 0x0ED0, 0x0ED9, + 0x0EDC, 0x0EDD, + 0x0F00, 0x0F17, + 0x0F1A, 0x0F34, + 0x0F36, 0x0F36, + 0x0F38, 0x0F38, + 0x0F3E, 0x0F47, + 0x0F49, 0x0F6A, + 0x0F7F, 0x0F7F, + 0x0F85, 0x0F85, + 0x0F88, 0x0F8B, + 0x0FBE, 0x0FC5, + 0x0FC7, 0x0FCC, + 0x0FCF, 0x0FCF, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102A, + 0x102C, 0x102C, + 0x1031, 0x1031, + 0x1038, 0x1038, + 0x1040, 0x1057, + 0x10A0, 0x10C5, + 0x10D0, 0x10F8, + 0x10FB, 0x10FB, + 0x1100, 0x1159, + 0x115F, 0x11A2, + 0x11A8, 0x11F9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124A, 0x124D, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125A, 0x125D, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128A, 0x128D, + 0x1290, 0x12AE, + 0x12B0, 0x12B0, + 0x12B2, 0x12B5, + 0x12B8, 0x12BE, + 0x12C0, 0x12C0, + 0x12C2, 0x12C5, + 0x12C8, 0x12CE, + 0x12D0, 0x12D6, + 0x12D8, 0x12EE, + 0x12F0, 0x130E, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131E, + 0x1320, 0x1346, + 0x1348, 0x135A, + 0x1361, 0x137C, + 0x13A0, 0x13F4, + 0x1401, 0x1676, + 0x1681, 0x169A, + 0x16A0, 0x16F0, + 0x1700, 0x170C, + 0x170E, 0x1711, + 0x1720, 0x1731, + 0x1735, 0x1736, + 0x1740, 0x1751, + 0x1760, 0x176C, + 0x176E, 0x1770, + 0x1780, 0x17B6, + 0x17BE, 0x17C5, + 0x17C7, 0x17C8, + 0x17D4, 0x17DA, + 0x17DC, 0x17DC, + 0x17E0, 0x17E9, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18A8, + 0x1E00, 0x1E9B, + 0x1EA0, 0x1EF9, + 0x1F00, 0x1F15, + 0x1F18, 0x1F1D, + 0x1F20, 0x1F45, + 0x1F48, 0x1F4D, + 0x1F50, 0x1F57, + 0x1F59, 0x1F59, + 0x1F5B, 0x1F5B, + 0x1F5D, 0x1F5D, + 0x1F5F, 0x1F7D, + 0x1F80, 0x1FB4, + 0x1FB6, 0x1FBC, + 0x1FBE, 0x1FBE, + 0x1FC2, 0x1FC4, + 0x1FC6, 0x1FCC, + 0x1FD0, 0x1FD3, + 0x1FD6, 0x1FDB, + 0x1FE0, 0x1FEC, + 0x1FF2, 0x1FF4, + 0x1FF6, 0x1FFC, + 0x200E, 0x200E, + 0x2071, 0x2071, + 0x207F, 0x207F, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210A, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211D, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212A, 0x212D, + 0x212F, 0x2131, + 0x2133, 0x2139, + 0x213D, 0x213F, + 0x2145, 0x2149, + 0x2160, 0x2183, + 0x2336, 0x237A, + 0x2395, 0x2395, + 0x249C, 0x24E9, + 0x3005, 0x3007, + 0x3021, 0x3029, + 0x3031, 0x3035, + 0x3038, 0x303C, + 0x3041, 0x3096, + 0x309D, 0x309F, + 0x30A1, 0x30FA, + 0x30FC, 0x30FF, + 0x3105, 0x312C, + 0x3131, 0x318E, + 0x3190, 0x31B7, + 0x31F0, 0x321C, + 0x3220, 0x3243, + 0x3260, 0x327B, + 0x327F, 0x32B0, + 0x32C0, 0x32CB, + 0x32D0, 0x32FE, + 0x3300, 0x3376, + 0x337B, 0x33DD, + 0x33E0, 0x33FE, + 0x3400, 0x4DB5, + 0x4E00, 0x9FA5, + 0xA000, 0xA48C, + 0xAC00, 0xD7A3, + 0xD800, 0xFA2D, + 0xFA30, 0xFA6A, + 0xFB00, 0xFB06, + 0xFB13, 0xFB17, + 0xFF21, 0xFF3A, + 0xFF41, 0xFF5A, + 0xFF66, 0xFFBE, + 0xFFC2, 0xFFC7, + 0xFFCA, 0xFFCF, + 0xFFD2, 0xFFD7, + 0xFFDA, 0xFFDC, + 0x10300, 0x1031E, + 0x10320, 0x10323, + 0x10330, 0x1034A, + 0x10400, 0x10425, + 0x10428, 0x1044D, + 0x1D000, 0x1D0F5, + 0x1D100, 0x1D126, + 0x1D12A, 0x1D166, + 0x1D16A, 0x1D172, + 0x1D183, 0x1D184, + 0x1D18C, 0x1D1A9, + 0x1D1AE, 0x1D1DD, + 0x1D400, 0x1D454, + 0x1D456, 0x1D49C, + 0x1D49E, 0x1D49F, + 0x1D4A2, 0x1D4A2, + 0x1D4A5, 0x1D4A6, + 0x1D4A9, 0x1D4AC, + 0x1D4AE, 0x1D4B9, + 0x1D4BB, 0x1D4BB, + 0x1D4BD, 0x1D4C0, + 0x1D4C2, 0x1D4C3, + 0x1D4C5, 0x1D505, + 0x1D507, 0x1D50A, + 0x1D50D, 0x1D514, + 0x1D516, 0x1D51C, + 0x1D51E, 0x1D539, + 0x1D53B, 0x1D53E, + 0x1D540, 0x1D544, + 0x1D546, 0x1D546, + 0x1D54A, 0x1D550, + 0x1D552, 0x1D6A3, + 0x1D6A8, 0x1D7C9, + 0x20000, 0x2A6D6, + 0x2F800, 0x2FA1D, + 0xF0000, 0xFFFFD, + 0x100000, 0x10FFFD +}; + +/* End of stringprep tables */ + + +/* Is the given Unicode codepoint in the given table of ranges? */ +#define IS_CODE_IN_TABLE(code, map) is_code_in_table(code, map, lengthof(map)) + +static int +codepoint_range_cmp(const void *a, const void *b) +{ + const pg_wchar *key = (const pg_wchar *) a; + const pg_wchar *range = (const pg_wchar *) b; + + if (*key < range[0]) + return -1; /* less than lower bound */ + if (*key > range[1]) + return 1; /* greater than upper bound */ + + return 0; /* within range */ +} + +static bool +is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize) +{ + Assert(mapsize % 2 == 0); + + if (code < map[0] || code > map[mapsize - 1]) + return false; + + if (bsearch(&code, map, mapsize / 2, sizeof(pg_wchar) * 2, + codepoint_range_cmp)) + return true; + else + return false; +} + +/* + * Calculate the length in characters of a null-terminated UTF-8 string. + * + * Returns -1 if the input is not valid UTF-8. + */ +static int +pg_utf8_string_len(const char *source) +{ + const unsigned char *p = (const unsigned char *) source; + int l; + int num_chars = 0; + + while (*p) + { + l = pg_utf_mblen(p); + + if (!pg_utf8_islegal(p, l)) + return -1; + + p += l; + num_chars++; + } + + return num_chars; +} + + +/* + * pg_saslprep - Normalize a password with SASLprep. + * + * SASLprep requires the input to be in UTF-8 encoding, but PostgreSQL + * supports many encodings, so we don't blindly assume that. pg_saslprep + * will check if the input looks like valid UTF-8, and returns + * SASLPREP_INVALID_UTF8 if not. + * + * If the string contains prohibited characters (or more precisely, if the + * output string would contain prohibited characters after normalization), + * returns SASLPREP_PROHIBITED. + * + * On success, returns SASLPREP_SUCCESS, and the normalized string in + * *output. + * + * In frontend, the normalized string is malloc'd, and the caller is + * responsible for freeing it. If an allocation fails, returns + * SASLPREP_OOM. In backend, the normalized string is palloc'd instead, + * and a failed allocation leads to ereport(ERROR). + */ +pg_saslprep_rc +pg_saslprep(const char *input, char **output) +{ + pg_wchar *input_chars = NULL; + pg_wchar *output_chars = NULL; + int input_size; + char *result; + int result_size; + int count; + int i; + bool contains_RandALCat; + unsigned char *p; + pg_wchar *wp; + + /* Ensure we return *output as NULL on failure */ + *output = NULL; + + /* + * Quick check if the input is pure ASCII. An ASCII string requires no + * further processing. + */ + if (pg_is_ascii(input)) + { + *output = STRDUP(input); + if (!(*output)) + goto oom; + return SASLPREP_SUCCESS; + } + + /* + * Convert the input from UTF-8 to an array of Unicode codepoints. + * + * This also checks that the input is a legal UTF-8 string. + */ + input_size = pg_utf8_string_len(input); + if (input_size < 0) + return SASLPREP_INVALID_UTF8; + + input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar)); + if (!input_chars) + goto oom; + + p = (unsigned char *) input; + for (i = 0; i < input_size; i++) + { + input_chars[i] = utf8_to_unicode(p); + p += pg_utf_mblen(p); + } + input_chars[i] = (pg_wchar) '\0'; + + /* + * The steps below correspond to the steps listed in [RFC3454], Section + * "2. Preparation Overview" + */ + + /* + * 1) Map -- For each character in the input, check if it has a mapping + * and, if so, replace it with its mapping. + */ + count = 0; + for (i = 0; i < input_size; i++) + { + pg_wchar code = input_chars[i]; + + if (IS_CODE_IN_TABLE(code, non_ascii_space_ranges)) + input_chars[count++] = 0x0020; + else if (IS_CODE_IN_TABLE(code, commonly_mapped_to_nothing_ranges)) + { + /* map to nothing */ + } + else + input_chars[count++] = code; + } + input_chars[count] = (pg_wchar) '\0'; + input_size = count; + + if (input_size == 0) + goto prohibited; /* don't allow empty password */ + + /* + * 2) Normalize -- Normalize the result of step 1 using Unicode + * normalization. + */ + output_chars = unicode_normalize(UNICODE_NFKC, input_chars); + if (!output_chars) + goto oom; + + /* + * 3) Prohibit -- Check for any characters that are not allowed in the + * output. If any are found, return an error. + */ + for (i = 0; i < input_size; i++) + { + pg_wchar code = input_chars[i]; + + if (IS_CODE_IN_TABLE(code, prohibited_output_ranges)) + goto prohibited; + if (IS_CODE_IN_TABLE(code, unassigned_codepoint_ranges)) + goto prohibited; + } + + /* + * 4) Check bidi -- Possibly check for right-to-left characters, and if + * any are found, make sure that the whole string satisfies the + * requirements for bidirectional strings. If the string does not satisfy + * the requirements for bidirectional strings, return an error. + * + * [RFC3454], Section "6. Bidirectional Characters" explains in more + * detail what that means: + * + * "In any profile that specifies bidirectional character handling, all + * three of the following requirements MUST be met: + * + * 1) The characters in section 5.8 MUST be prohibited. + * + * 2) If a string contains any RandALCat character, the string MUST NOT + * contain any LCat character. + * + * 3) If a string contains any RandALCat character, a RandALCat character + * MUST be the first character of the string, and a RandALCat character + * MUST be the last character of the string." + */ + contains_RandALCat = false; + for (i = 0; i < input_size; i++) + { + pg_wchar code = input_chars[i]; + + if (IS_CODE_IN_TABLE(code, RandALCat_codepoint_ranges)) + { + contains_RandALCat = true; + break; + } + } + + if (contains_RandALCat) + { + pg_wchar first = input_chars[0]; + pg_wchar last = input_chars[input_size - 1]; + + for (i = 0; i < input_size; i++) + { + pg_wchar code = input_chars[i]; + + if (IS_CODE_IN_TABLE(code, LCat_codepoint_ranges)) + goto prohibited; + } + + if (!IS_CODE_IN_TABLE(first, RandALCat_codepoint_ranges) || + !IS_CODE_IN_TABLE(last, RandALCat_codepoint_ranges)) + goto prohibited; + } + + /* + * Finally, convert the result back to UTF-8. + */ + result_size = 0; + for (wp = output_chars; *wp; wp++) + { + unsigned char buf[4]; + + unicode_to_utf8(*wp, buf); + result_size += pg_utf_mblen(buf); + } + + result = ALLOC(result_size + 1); + if (!result) + goto oom; + + /* + * There are no error exits below here, so the error exit paths don't need + * to worry about possibly freeing "result". + */ + p = (unsigned char *) result; + for (wp = output_chars; *wp; wp++) + { + unicode_to_utf8(*wp, p); + p += pg_utf_mblen(p); + } + Assert((char *) p == result + result_size); + *p = '\0'; + + FREE(input_chars); + FREE(output_chars); + + *output = result; + return SASLPREP_SUCCESS; + +prohibited: + if (input_chars) + FREE(input_chars); + if (output_chars) + FREE(output_chars); + + return SASLPREP_PROHIBITED; + +oom: + if (input_chars) + FREE(input_chars); + if (output_chars) + FREE(output_chars); + + return SASLPREP_OOM; +} diff --git a/src/common/scram-common.c b/src/common/scram-common.c new file mode 100644 index 0000000..6448564 --- /dev/null +++ b/src/common/scram-common.c @@ -0,0 +1,330 @@ +/*------------------------------------------------------------------------- + * scram-common.c + * Shared frontend/backend code for SCRAM authentication + * + * This contains the common low-level functions needed in both frontend and + * backend, for implement the Salted Challenge Response Authentication + * Mechanism (SCRAM), per IETF's RFC 5802. + * + * Portions Copyright (c) 2017-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/scram-common.c + * + *------------------------------------------------------------------------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/base64.h" +#include "common/hmac.h" +#include "common/scram-common.h" +#ifndef FRONTEND +#include "miscadmin.h" +#endif +#include "port/pg_bswap.h" + +/* + * Calculate SaltedPassword. + * + * The password should already be normalized by SASLprep. Returns 0 on + * success, -1 on failure with *errstr pointing to a message about the + * error details. + */ +int +scram_SaltedPassword(const char *password, + pg_cryptohash_type hash_type, int key_length, + const char *salt, int saltlen, int iterations, + uint8 *result, const char **errstr) +{ + int password_len = strlen(password); + uint32 one = pg_hton32(1); + int i, + j; + uint8 Ui[SCRAM_MAX_KEY_LEN]; + uint8 Ui_prev[SCRAM_MAX_KEY_LEN]; + pg_hmac_ctx *hmac_ctx = pg_hmac_create(hash_type); + + if (hmac_ctx == NULL) + { + *errstr = pg_hmac_error(NULL); /* returns OOM */ + return -1; + } + + /* + * Iterate hash calculation of HMAC entry using given salt. This is + * essentially PBKDF2 (see RFC2898) with HMAC() as the pseudorandom + * function. + */ + + /* First iteration */ + if (pg_hmac_init(hmac_ctx, (uint8 *) password, password_len) < 0 || + pg_hmac_update(hmac_ctx, (uint8 *) salt, saltlen) < 0 || + pg_hmac_update(hmac_ctx, (uint8 *) &one, sizeof(uint32)) < 0 || + pg_hmac_final(hmac_ctx, Ui_prev, key_length) < 0) + { + *errstr = pg_hmac_error(hmac_ctx); + pg_hmac_free(hmac_ctx); + return -1; + } + + memcpy(result, Ui_prev, key_length); + + /* Subsequent iterations */ + for (i = 2; i <= iterations; i++) + { +#ifndef FRONTEND + /* + * Make sure that this is interruptible as scram_iterations could be + * set to a large value. + */ + CHECK_FOR_INTERRUPTS(); +#endif + + if (pg_hmac_init(hmac_ctx, (uint8 *) password, password_len) < 0 || + pg_hmac_update(hmac_ctx, (uint8 *) Ui_prev, key_length) < 0 || + pg_hmac_final(hmac_ctx, Ui, key_length) < 0) + { + *errstr = pg_hmac_error(hmac_ctx); + pg_hmac_free(hmac_ctx); + return -1; + } + + for (j = 0; j < key_length; j++) + result[j] ^= Ui[j]; + memcpy(Ui_prev, Ui, key_length); + } + + pg_hmac_free(hmac_ctx); + return 0; +} + + +/* + * Calculate hash for a NULL-terminated string. (The NULL terminator is + * not included in the hash). Returns 0 on success, -1 on failure with *errstr + * pointing to a message about the error details. + */ +int +scram_H(const uint8 *input, pg_cryptohash_type hash_type, int key_length, + uint8 *result, const char **errstr) +{ + pg_cryptohash_ctx *ctx; + + ctx = pg_cryptohash_create(hash_type); + if (ctx == NULL) + { + *errstr = pg_cryptohash_error(NULL); /* returns OOM */ + return -1; + } + + if (pg_cryptohash_init(ctx) < 0 || + pg_cryptohash_update(ctx, input, key_length) < 0 || + pg_cryptohash_final(ctx, result, key_length) < 0) + { + *errstr = pg_cryptohash_error(ctx); + pg_cryptohash_free(ctx); + return -1; + } + + pg_cryptohash_free(ctx); + return 0; +} + +/* + * Calculate ClientKey. Returns 0 on success, -1 on failure with *errstr + * pointing to a message about the error details. + */ +int +scram_ClientKey(const uint8 *salted_password, + pg_cryptohash_type hash_type, int key_length, + uint8 *result, const char **errstr) +{ + pg_hmac_ctx *ctx = pg_hmac_create(hash_type); + + if (ctx == NULL) + { + *errstr = pg_hmac_error(NULL); /* returns OOM */ + return -1; + } + + if (pg_hmac_init(ctx, salted_password, key_length) < 0 || + pg_hmac_update(ctx, (uint8 *) "Client Key", strlen("Client Key")) < 0 || + pg_hmac_final(ctx, result, key_length) < 0) + { + *errstr = pg_hmac_error(ctx); + pg_hmac_free(ctx); + return -1; + } + + pg_hmac_free(ctx); + return 0; +} + +/* + * Calculate ServerKey. Returns 0 on success, -1 on failure with *errstr + * pointing to a message about the error details. + */ +int +scram_ServerKey(const uint8 *salted_password, + pg_cryptohash_type hash_type, int key_length, + uint8 *result, const char **errstr) +{ + pg_hmac_ctx *ctx = pg_hmac_create(hash_type); + + if (ctx == NULL) + { + *errstr = pg_hmac_error(NULL); /* returns OOM */ + return -1; + } + + if (pg_hmac_init(ctx, salted_password, key_length) < 0 || + pg_hmac_update(ctx, (uint8 *) "Server Key", strlen("Server Key")) < 0 || + pg_hmac_final(ctx, result, key_length) < 0) + { + *errstr = pg_hmac_error(ctx); + pg_hmac_free(ctx); + return -1; + } + + pg_hmac_free(ctx); + return 0; +} + + +/* + * Construct a SCRAM secret, for storing in pg_authid.rolpassword. + * + * The password should already have been processed with SASLprep, if necessary! + * + * If iterations is 0, default number of iterations is used. The result is + * palloc'd or malloc'd, so caller is responsible for freeing it. + * + * On error, returns NULL and sets *errstr to point to a message about the + * error details. + */ +char * +scram_build_secret(pg_cryptohash_type hash_type, int key_length, + const char *salt, int saltlen, int iterations, + const char *password, const char **errstr) +{ + uint8 salted_password[SCRAM_MAX_KEY_LEN]; + uint8 stored_key[SCRAM_MAX_KEY_LEN]; + uint8 server_key[SCRAM_MAX_KEY_LEN]; + char *result; + char *p; + int maxlen; + int encoded_salt_len; + int encoded_stored_len; + int encoded_server_len; + int encoded_result; + + /* Only this hash method is supported currently */ + Assert(hash_type == PG_SHA256); + + Assert(iterations > 0); + + /* Calculate StoredKey and ServerKey */ + if (scram_SaltedPassword(password, hash_type, key_length, + salt, saltlen, iterations, + salted_password, errstr) < 0 || + scram_ClientKey(salted_password, hash_type, key_length, + stored_key, errstr) < 0 || + scram_H(stored_key, hash_type, key_length, + stored_key, errstr) < 0 || + scram_ServerKey(salted_password, hash_type, key_length, + server_key, errstr) < 0) + { + /* errstr is filled already here */ +#ifdef FRONTEND + return NULL; +#else + elog(ERROR, "could not calculate stored key and server key: %s", + *errstr); +#endif + } + + /*---------- + * The format is: + * SCRAM-SHA-256$<iteration count>:<salt>$<StoredKey>:<ServerKey> + *---------- + */ + encoded_salt_len = pg_b64_enc_len(saltlen); + encoded_stored_len = pg_b64_enc_len(key_length); + encoded_server_len = pg_b64_enc_len(key_length); + + maxlen = strlen("SCRAM-SHA-256") + 1 + + 10 + 1 /* iteration count */ + + encoded_salt_len + 1 /* Base64-encoded salt */ + + encoded_stored_len + 1 /* Base64-encoded StoredKey */ + + encoded_server_len + 1; /* Base64-encoded ServerKey */ + +#ifdef FRONTEND + result = malloc(maxlen); + if (!result) + { + *errstr = _("out of memory"); + return NULL; + } +#else + result = palloc(maxlen); +#endif + + p = result + sprintf(result, "SCRAM-SHA-256$%d:", iterations); + + /* salt */ + encoded_result = pg_b64_encode(salt, saltlen, p, encoded_salt_len); + if (encoded_result < 0) + { + *errstr = _("could not encode salt"); +#ifdef FRONTEND + free(result); + return NULL; +#else + elog(ERROR, "%s", *errstr); +#endif + } + p += encoded_result; + *(p++) = '$'; + + /* stored key */ + encoded_result = pg_b64_encode((char *) stored_key, key_length, p, + encoded_stored_len); + if (encoded_result < 0) + { + *errstr = _("could not encode stored key"); +#ifdef FRONTEND + free(result); + return NULL; +#else + elog(ERROR, "%s", *errstr); +#endif + } + + p += encoded_result; + *(p++) = ':'; + + /* server key */ + encoded_result = pg_b64_encode((char *) server_key, key_length, p, + encoded_server_len); + if (encoded_result < 0) + { + *errstr = _("could not encode server key"); +#ifdef FRONTEND + free(result); + return NULL; +#else + elog(ERROR, "%s", *errstr); +#endif + } + + p += encoded_result; + *(p++) = '\0'; + + Assert(p - result <= maxlen); + + return result; +} diff --git a/src/common/sha1.c b/src/common/sha1.c new file mode 100644 index 0000000..29dc55e --- /dev/null +++ b/src/common/sha1.c @@ -0,0 +1,369 @@ +/*------------------------------------------------------------------------- + * + * sha1.c + * Implements the SHA1 Secure Hash Algorithm + * + * Fallback implementation of SHA1, as specified in RFC 3174. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/sha1.c + * + *------------------------------------------------------------------------- + */ + +/* $KAME: sha1.c,v 1.3 2000/02/22 14:01:18 itojun Exp $ */ + +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * FIPS pub 180-1: Secure Hash Algorithm (SHA-1) + * based on: http://www.itl.nist.gov/fipspubs/fip180-1.htm + * implemented by Jun-ichiro itojun Itoh <itojun@itojun.org> + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <sys/param.h> + +#include "sha1_int.h" + +/* constant table */ +static uint32 _K[] = {0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6}; + +#define K(t) _K[(t) / 20] + +#define F0(b, c, d) (((b) & (c)) | ((~(b)) & (d))) +#define F1(b, c, d) (((b) ^ (c)) ^ (d)) +#define F2(b, c, d) (((b) & (c)) | ((b) & (d)) | ((c) & (d))) +#define F3(b, c, d) (((b) ^ (c)) ^ (d)) + +#define S(n, x) (((x) << (n)) | ((x) >> (32 - (n)))) + +#define H(n) (ctx->h.b32[(n)]) +#define COUNT (ctx->count) +#define BCOUNT (ctx->c.b64[0] / 8) +#define W(n) (ctx->m.b32[(n)]) + +#define PUTPAD(x) \ +do { \ + ctx->m.b8[(COUNT % 64)] = (x); \ + COUNT++; \ + COUNT %= 64; \ + if (COUNT % 64 == 0) \ + sha1_step(ctx); \ +} while (0) + +static void +sha1_step(pg_sha1_ctx *ctx) +{ + uint32 a, + b, + c, + d, + e; + size_t t, + s; + uint32 tmp; + +#ifndef WORDS_BIGENDIAN + pg_sha1_ctx tctx; + + memmove(&tctx.m.b8[0], &ctx->m.b8[0], 64); + ctx->m.b8[0] = tctx.m.b8[3]; + ctx->m.b8[1] = tctx.m.b8[2]; + ctx->m.b8[2] = tctx.m.b8[1]; + ctx->m.b8[3] = tctx.m.b8[0]; + ctx->m.b8[4] = tctx.m.b8[7]; + ctx->m.b8[5] = tctx.m.b8[6]; + ctx->m.b8[6] = tctx.m.b8[5]; + ctx->m.b8[7] = tctx.m.b8[4]; + ctx->m.b8[8] = tctx.m.b8[11]; + ctx->m.b8[9] = tctx.m.b8[10]; + ctx->m.b8[10] = tctx.m.b8[9]; + ctx->m.b8[11] = tctx.m.b8[8]; + ctx->m.b8[12] = tctx.m.b8[15]; + ctx->m.b8[13] = tctx.m.b8[14]; + ctx->m.b8[14] = tctx.m.b8[13]; + ctx->m.b8[15] = tctx.m.b8[12]; + ctx->m.b8[16] = tctx.m.b8[19]; + ctx->m.b8[17] = tctx.m.b8[18]; + ctx->m.b8[18] = tctx.m.b8[17]; + ctx->m.b8[19] = tctx.m.b8[16]; + ctx->m.b8[20] = tctx.m.b8[23]; + ctx->m.b8[21] = tctx.m.b8[22]; + ctx->m.b8[22] = tctx.m.b8[21]; + ctx->m.b8[23] = tctx.m.b8[20]; + ctx->m.b8[24] = tctx.m.b8[27]; + ctx->m.b8[25] = tctx.m.b8[26]; + ctx->m.b8[26] = tctx.m.b8[25]; + ctx->m.b8[27] = tctx.m.b8[24]; + ctx->m.b8[28] = tctx.m.b8[31]; + ctx->m.b8[29] = tctx.m.b8[30]; + ctx->m.b8[30] = tctx.m.b8[29]; + ctx->m.b8[31] = tctx.m.b8[28]; + ctx->m.b8[32] = tctx.m.b8[35]; + ctx->m.b8[33] = tctx.m.b8[34]; + ctx->m.b8[34] = tctx.m.b8[33]; + ctx->m.b8[35] = tctx.m.b8[32]; + ctx->m.b8[36] = tctx.m.b8[39]; + ctx->m.b8[37] = tctx.m.b8[38]; + ctx->m.b8[38] = tctx.m.b8[37]; + ctx->m.b8[39] = tctx.m.b8[36]; + ctx->m.b8[40] = tctx.m.b8[43]; + ctx->m.b8[41] = tctx.m.b8[42]; + ctx->m.b8[42] = tctx.m.b8[41]; + ctx->m.b8[43] = tctx.m.b8[40]; + ctx->m.b8[44] = tctx.m.b8[47]; + ctx->m.b8[45] = tctx.m.b8[46]; + ctx->m.b8[46] = tctx.m.b8[45]; + ctx->m.b8[47] = tctx.m.b8[44]; + ctx->m.b8[48] = tctx.m.b8[51]; + ctx->m.b8[49] = tctx.m.b8[50]; + ctx->m.b8[50] = tctx.m.b8[49]; + ctx->m.b8[51] = tctx.m.b8[48]; + ctx->m.b8[52] = tctx.m.b8[55]; + ctx->m.b8[53] = tctx.m.b8[54]; + ctx->m.b8[54] = tctx.m.b8[53]; + ctx->m.b8[55] = tctx.m.b8[52]; + ctx->m.b8[56] = tctx.m.b8[59]; + ctx->m.b8[57] = tctx.m.b8[58]; + ctx->m.b8[58] = tctx.m.b8[57]; + ctx->m.b8[59] = tctx.m.b8[56]; + ctx->m.b8[60] = tctx.m.b8[63]; + ctx->m.b8[61] = tctx.m.b8[62]; + ctx->m.b8[62] = tctx.m.b8[61]; + ctx->m.b8[63] = tctx.m.b8[60]; +#endif + + a = H(0); + b = H(1); + c = H(2); + d = H(3); + e = H(4); + + for (t = 0; t < 20; t++) + { + s = t & 0x0f; + if (t >= 16) + W(s) = S(1, W((s + 13) & 0x0f) ^ W((s + 8) & 0x0f) ^ W((s + 2) & 0x0f) ^ W(s)); + tmp = S(5, a) + F0(b, c, d) + e + W(s) + K(t); + e = d; + d = c; + c = S(30, b); + b = a; + a = tmp; + } + for (t = 20; t < 40; t++) + { + s = t & 0x0f; + W(s) = S(1, W((s + 13) & 0x0f) ^ W((s + 8) & 0x0f) ^ W((s + 2) & 0x0f) ^ W(s)); + tmp = S(5, a) + F1(b, c, d) + e + W(s) + K(t); + e = d; + d = c; + c = S(30, b); + b = a; + a = tmp; + } + for (t = 40; t < 60; t++) + { + s = t & 0x0f; + W(s) = S(1, W((s + 13) & 0x0f) ^ W((s + 8) & 0x0f) ^ W((s + 2) & 0x0f) ^ W(s)); + tmp = S(5, a) + F2(b, c, d) + e + W(s) + K(t); + e = d; + d = c; + c = S(30, b); + b = a; + a = tmp; + } + for (t = 60; t < 80; t++) + { + s = t & 0x0f; + W(s) = S(1, W((s + 13) & 0x0f) ^ W((s + 8) & 0x0f) ^ W((s + 2) & 0x0f) ^ W(s)); + tmp = S(5, a) + F3(b, c, d) + e + W(s) + K(t); + e = d; + d = c; + c = S(30, b); + b = a; + a = tmp; + } + + H(0) = H(0) + a; + H(1) = H(1) + b; + H(2) = H(2) + c; + H(3) = H(3) + d; + H(4) = H(4) + e; + + memset(&ctx->m.b8[0], 0, 64); +} + +static void +sha1_pad(pg_sha1_ctx *ctx) +{ + size_t padlen; /* pad length in bytes */ + size_t padstart; + + PUTPAD(0x80); + + padstart = COUNT % 64; + padlen = 64 - padstart; + if (padlen < 8) + { + memset(&ctx->m.b8[padstart], 0, padlen); + COUNT += padlen; + COUNT %= 64; + sha1_step(ctx); + padstart = COUNT % 64; /* should be 0 */ + padlen = 64 - padstart; /* should be 64 */ + } + memset(&ctx->m.b8[padstart], 0, padlen - 8); + COUNT += (padlen - 8); + COUNT %= 64; +#ifdef WORDS_BIGENDIAN + PUTPAD(ctx->c.b8[0]); + PUTPAD(ctx->c.b8[1]); + PUTPAD(ctx->c.b8[2]); + PUTPAD(ctx->c.b8[3]); + PUTPAD(ctx->c.b8[4]); + PUTPAD(ctx->c.b8[5]); + PUTPAD(ctx->c.b8[6]); + PUTPAD(ctx->c.b8[7]); +#else + PUTPAD(ctx->c.b8[7]); + PUTPAD(ctx->c.b8[6]); + PUTPAD(ctx->c.b8[5]); + PUTPAD(ctx->c.b8[4]); + PUTPAD(ctx->c.b8[3]); + PUTPAD(ctx->c.b8[2]); + PUTPAD(ctx->c.b8[1]); + PUTPAD(ctx->c.b8[0]); +#endif +} + +static void +sha1_result(uint8 *digest0, pg_sha1_ctx *ctx) +{ + uint8 *digest; + + digest = (uint8 *) digest0; + +#ifdef WORDS_BIGENDIAN + memmove(digest, &ctx->h.b8[0], 20); +#else + digest[0] = ctx->h.b8[3]; + digest[1] = ctx->h.b8[2]; + digest[2] = ctx->h.b8[1]; + digest[3] = ctx->h.b8[0]; + digest[4] = ctx->h.b8[7]; + digest[5] = ctx->h.b8[6]; + digest[6] = ctx->h.b8[5]; + digest[7] = ctx->h.b8[4]; + digest[8] = ctx->h.b8[11]; + digest[9] = ctx->h.b8[10]; + digest[10] = ctx->h.b8[9]; + digest[11] = ctx->h.b8[8]; + digest[12] = ctx->h.b8[15]; + digest[13] = ctx->h.b8[14]; + digest[14] = ctx->h.b8[13]; + digest[15] = ctx->h.b8[12]; + digest[16] = ctx->h.b8[19]; + digest[17] = ctx->h.b8[18]; + digest[18] = ctx->h.b8[17]; + digest[19] = ctx->h.b8[16]; +#endif +} + +/* External routines for this SHA1 implementation */ + +/* + * pg_sha1_init + * + * Initialize a SHA1 context. + */ +void +pg_sha1_init(pg_sha1_ctx *ctx) +{ + memset(ctx, 0, sizeof(pg_sha1_ctx)); + H(0) = 0x67452301; + H(1) = 0xefcdab89; + H(2) = 0x98badcfe; + H(3) = 0x10325476; + H(4) = 0xc3d2e1f0; +} + +/* + * pg_sha1_update + * + * Update a SHA1 context. + */ +void +pg_sha1_update(pg_sha1_ctx *ctx, const uint8 *data, size_t len) +{ + const uint8 *input; + size_t gaplen; + size_t gapstart; + size_t off; + size_t copysiz; + + input = (const uint8 *) data; + off = 0; + + while (off < len) + { + gapstart = COUNT % 64; + gaplen = 64 - gapstart; + + copysiz = (gaplen < len - off) ? gaplen : len - off; + memmove(&ctx->m.b8[gapstart], &input[off], copysiz); + COUNT += copysiz; + COUNT %= 64; + ctx->c.b64[0] += copysiz * 8; + if (COUNT % 64 == 0) + sha1_step(ctx); + off += copysiz; + } +} + +/* + * pg_sha1_final + * + * Finalize a SHA1 context. + */ +void +pg_sha1_final(pg_sha1_ctx *ctx, uint8 *dest) +{ + sha1_pad(ctx); + sha1_result(dest, ctx); +} diff --git a/src/common/sha1_int.h b/src/common/sha1_int.h new file mode 100644 index 0000000..0ec2c69 --- /dev/null +++ b/src/common/sha1_int.h @@ -0,0 +1,81 @@ +/*------------------------------------------------------------------------- + * + * sha1_int.h + * Internal headers for fallback implementation of SHA1 + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/sha1_int.h + * + *------------------------------------------------------------------------- + */ + +/* $KAME: sha1.h,v 1.4 2000/02/22 14:01:18 itojun Exp $ */ + +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * FIPS pub 180-1: Secure Hash Algorithm (SHA-1) + * based on: http://www.itl.nist.gov/fipspubs/fip180-1.htm + * implemented by Jun-ichiro itojun Itoh <itojun@itojun.org> + */ + +#ifndef PG_SHA1_INT_H +#define PG_SHA1_INT_H + +#include "common/sha1.h" + +typedef struct +{ + union + { + uint8 b8[20]; + uint32 b32[5]; + } h; + union + { + uint8 b8[8]; + uint64 b64[1]; + } c; + union + { + uint8 b8[64]; + uint32 b32[16]; + } m; + uint8 count; +} pg_sha1_ctx; + +/* Interface routines for SHA1 */ +extern void pg_sha1_init(pg_sha1_ctx *ctx); +extern void pg_sha1_update(pg_sha1_ctx *ctx, const uint8 *data, size_t len); +extern void pg_sha1_final(pg_sha1_ctx *ctx, uint8 *dest); + +#endif /* PG_SHA1_INT_H */ diff --git a/src/common/sha2.c b/src/common/sha2.c new file mode 100644 index 0000000..89b28b8 --- /dev/null +++ b/src/common/sha2.c @@ -0,0 +1,1017 @@ +/*------------------------------------------------------------------------- + * + * sha2.c + * SHA functions for SHA-224, SHA-256, SHA-384 and SHA-512. + * + * This includes the fallback implementation for SHA2 cryptographic + * hashes. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/sha2.c + * + *------------------------------------------------------------------------- + */ + +/* $OpenBSD: sha2.c,v 1.6 2004/05/03 02:57:36 millert Exp $ */ +/* + * FILE: sha2.c + * AUTHOR: Aaron D. Gifford <me@aarongifford.com> + * + * Copyright (c) 2000-2001, Aaron D. Gifford + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTOR(S) ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTOR(S) BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $From: sha2.c,v 1.1 2001/11/08 00:01:51 adg Exp adg $ + */ + + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "sha2_int.h" + +/* + * In backend, use palloc/pfree to ease the error handling. In frontend, + * use malloc to be able to return a failure status back to the caller. + */ +#ifndef FRONTEND +#define ALLOC(size) palloc(size) +#define FREE(ptr) pfree(ptr) +#else +#define ALLOC(size) malloc(size) +#define FREE(ptr) free(ptr) +#endif + +/* + * UNROLLED TRANSFORM LOOP NOTE: + * You can define SHA2_UNROLL_TRANSFORM to use the unrolled transform + * loop version for the hash transform rounds (defined using macros + * later in this file). Either define on the command line, for example: + * + * cc -DSHA2_UNROLL_TRANSFORM -o sha2 sha2.c sha2prog.c + * + * or define below: + * + * #define SHA2_UNROLL_TRANSFORM + * + */ + +/*** SHA-256/384/512 Various Length Definitions ***********************/ +#define PG_SHA256_SHORT_BLOCK_LENGTH (PG_SHA256_BLOCK_LENGTH - 8) +#define PG_SHA384_SHORT_BLOCK_LENGTH (PG_SHA384_BLOCK_LENGTH - 16) +#define PG_SHA512_SHORT_BLOCK_LENGTH (PG_SHA512_BLOCK_LENGTH - 16) + +/*** ENDIAN REVERSAL MACROS *******************************************/ +#ifndef WORDS_BIGENDIAN +#define REVERSE32(w,x) { \ + uint32 tmp = (w); \ + tmp = (tmp >> 16) | (tmp << 16); \ + (x) = ((tmp & 0xff00ff00UL) >> 8) | ((tmp & 0x00ff00ffUL) << 8); \ +} +#define REVERSE64(w,x) { \ + uint64 tmp = (w); \ + tmp = (tmp >> 32) | (tmp << 32); \ + tmp = ((tmp & 0xff00ff00ff00ff00ULL) >> 8) | \ + ((tmp & 0x00ff00ff00ff00ffULL) << 8); \ + (x) = ((tmp & 0xffff0000ffff0000ULL) >> 16) | \ + ((tmp & 0x0000ffff0000ffffULL) << 16); \ +} +#endif /* not bigendian */ + +/* + * Macro for incrementally adding the unsigned 64-bit integer n to the + * unsigned 128-bit integer (represented using a two-element array of + * 64-bit words): + */ +#define ADDINC128(w,n) { \ + (w)[0] += (uint64)(n); \ + if ((w)[0] < (n)) { \ + (w)[1]++; \ + } \ +} + +/*** THE SIX LOGICAL FUNCTIONS ****************************************/ +/* + * Bit shifting and rotation (used by the six SHA-XYZ logical functions: + * + * NOTE: The naming of R and S appears backwards here (R is a SHIFT and + * S is a ROTATION) because the SHA-256/384/512 description document + * (see http://www.iwar.org.uk/comsec/resources/cipher/sha256-384-512.pdf) + * uses this same "backwards" definition. + */ +/* Shift-right (used in SHA-256, SHA-384, and SHA-512): */ +#define R(b,x) ((x) >> (b)) +/* 32-bit Rotate-right (used in SHA-256): */ +#define S32(b,x) (((x) >> (b)) | ((x) << (32 - (b)))) +/* 64-bit Rotate-right (used in SHA-384 and SHA-512): */ +#define S64(b,x) (((x) >> (b)) | ((x) << (64 - (b)))) + +/* Two of six logical functions used in SHA-256, SHA-384, and SHA-512: */ +#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) +#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + +/* Four of six logical functions used in SHA-256: */ +#define Sigma0_256(x) (S32(2, (x)) ^ S32(13, (x)) ^ S32(22, (x))) +#define Sigma1_256(x) (S32(6, (x)) ^ S32(11, (x)) ^ S32(25, (x))) +#define sigma0_256(x) (S32(7, (x)) ^ S32(18, (x)) ^ R(3 , (x))) +#define sigma1_256(x) (S32(17, (x)) ^ S32(19, (x)) ^ R(10, (x))) + +/* Four of six logical functions used in SHA-384 and SHA-512: */ +#define Sigma0_512(x) (S64(28, (x)) ^ S64(34, (x)) ^ S64(39, (x))) +#define Sigma1_512(x) (S64(14, (x)) ^ S64(18, (x)) ^ S64(41, (x))) +#define sigma0_512(x) (S64( 1, (x)) ^ S64( 8, (x)) ^ R( 7, (x))) +#define sigma1_512(x) (S64(19, (x)) ^ S64(61, (x)) ^ R( 6, (x))) + +/*** INTERNAL FUNCTION PROTOTYPES *************************************/ +/* NOTE: These should not be accessed directly from outside this + * library -- they are intended for private internal visibility/use + * only. + */ +static void SHA512_Last(pg_sha512_ctx *context); +static void SHA256_Transform(pg_sha256_ctx *context, const uint8 *data); +static void SHA512_Transform(pg_sha512_ctx *context, const uint8 *data); + +/*** SHA-XYZ INITIAL HASH VALUES AND CONSTANTS ************************/ +/* Hash constant words K for SHA-256: */ +static const uint32 K256[64] = { + 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, + 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, + 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, + 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL, + 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, + 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, + 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, + 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL, + 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, + 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, + 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, + 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, + 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, + 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL, + 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, + 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL +}; + +/* Initial hash value H for SHA-224: */ +static const uint32 sha224_initial_hash_value[8] = { + 0xc1059ed8UL, + 0x367cd507UL, + 0x3070dd17UL, + 0xf70e5939UL, + 0xffc00b31UL, + 0x68581511UL, + 0x64f98fa7UL, + 0xbefa4fa4UL +}; + +/* Initial hash value H for SHA-256: */ +static const uint32 sha256_initial_hash_value[8] = { + 0x6a09e667UL, + 0xbb67ae85UL, + 0x3c6ef372UL, + 0xa54ff53aUL, + 0x510e527fUL, + 0x9b05688cUL, + 0x1f83d9abUL, + 0x5be0cd19UL +}; + +/* Hash constant words K for SHA-384 and SHA-512: */ +static const uint64 K512[80] = { + 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, + 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, + 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, + 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, + 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, + 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, + 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, + 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, + 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, + 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, + 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, + 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, + 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, + 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, + 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, + 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, + 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, + 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, + 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, + 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, + 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, + 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, + 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, + 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, + 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, + 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, + 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, + 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, + 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, + 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, + 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, + 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, + 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, + 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, + 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, + 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, + 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, + 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, + 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, + 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL +}; + +/* Initial hash value H for SHA-384 */ +static const uint64 sha384_initial_hash_value[8] = { + 0xcbbb9d5dc1059ed8ULL, + 0x629a292a367cd507ULL, + 0x9159015a3070dd17ULL, + 0x152fecd8f70e5939ULL, + 0x67332667ffc00b31ULL, + 0x8eb44a8768581511ULL, + 0xdb0c2e0d64f98fa7ULL, + 0x47b5481dbefa4fa4ULL +}; + +/* Initial hash value H for SHA-512 */ +static const uint64 sha512_initial_hash_value[8] = { + 0x6a09e667f3bcc908ULL, + 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, + 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, + 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, + 0x5be0cd19137e2179ULL +}; + + +/*** SHA-256: *********************************************************/ +void +pg_sha256_init(pg_sha256_ctx *context) +{ + if (context == NULL) + return; + memcpy(context->state, sha256_initial_hash_value, PG_SHA256_DIGEST_LENGTH); + memset(context->buffer, 0, PG_SHA256_BLOCK_LENGTH); + context->bitcount = 0; +} + +#ifdef SHA2_UNROLL_TRANSFORM + +/* Unrolled SHA-256 round macros: */ + +#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h) do { \ + W256[j] = (uint32)data[3] | ((uint32)data[2] << 8) | \ + ((uint32)data[1] << 16) | ((uint32)data[0] << 24); \ + data += 4; \ + T1 = (h) + Sigma1_256((e)) + Ch((e), (f), (g)) + K256[j] + W256[j]; \ + (d) += T1; \ + (h) = T1 + Sigma0_256((a)) + Maj((a), (b), (c)); \ + j++; \ +} while(0) + +#define ROUND256(a,b,c,d,e,f,g,h) do { \ + s0 = W256[(j+1)&0x0f]; \ + s0 = sigma0_256(s0); \ + s1 = W256[(j+14)&0x0f]; \ + s1 = sigma1_256(s1); \ + T1 = (h) + Sigma1_256((e)) + Ch((e), (f), (g)) + K256[j] + \ + (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); \ + (d) += T1; \ + (h) = T1 + Sigma0_256((a)) + Maj((a), (b), (c)); \ + j++; \ +} while(0) + +static void +SHA256_Transform(pg_sha256_ctx *context, const uint8 *data) +{ + uint32 a, + b, + c, + d, + e, + f, + g, + h, + s0, + s1; + uint32 T1, + *W256; + int j; + + W256 = (uint32 *) context->buffer; + + /* Initialize registers with the prev. intermediate value */ + a = context->state[0]; + b = context->state[1]; + c = context->state[2]; + d = context->state[3]; + e = context->state[4]; + f = context->state[5]; + g = context->state[6]; + h = context->state[7]; + + j = 0; + do + { + /* Rounds 0 to 15 (unrolled): */ + ROUND256_0_TO_15(a, b, c, d, e, f, g, h); + ROUND256_0_TO_15(h, a, b, c, d, e, f, g); + ROUND256_0_TO_15(g, h, a, b, c, d, e, f); + ROUND256_0_TO_15(f, g, h, a, b, c, d, e); + ROUND256_0_TO_15(e, f, g, h, a, b, c, d); + ROUND256_0_TO_15(d, e, f, g, h, a, b, c); + ROUND256_0_TO_15(c, d, e, f, g, h, a, b); + ROUND256_0_TO_15(b, c, d, e, f, g, h, a); + } while (j < 16); + + /* Now for the remaining rounds to 64: */ + do + { + ROUND256(a, b, c, d, e, f, g, h); + ROUND256(h, a, b, c, d, e, f, g); + ROUND256(g, h, a, b, c, d, e, f); + ROUND256(f, g, h, a, b, c, d, e); + ROUND256(e, f, g, h, a, b, c, d); + ROUND256(d, e, f, g, h, a, b, c); + ROUND256(c, d, e, f, g, h, a, b); + ROUND256(b, c, d, e, f, g, h, a); + } while (j < 64); + + /* Compute the current intermediate hash value */ + context->state[0] += a; + context->state[1] += b; + context->state[2] += c; + context->state[3] += d; + context->state[4] += e; + context->state[5] += f; + context->state[6] += g; + context->state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = 0; +} +#else /* SHA2_UNROLL_TRANSFORM */ + +static void +SHA256_Transform(pg_sha256_ctx *context, const uint8 *data) +{ + uint32 a, + b, + c, + d, + e, + f, + g, + h, + s0, + s1; + uint32 T1, + T2, + *W256; + int j; + + W256 = (uint32 *) context->buffer; + + /* Initialize registers with the prev. intermediate value */ + a = context->state[0]; + b = context->state[1]; + c = context->state[2]; + d = context->state[3]; + e = context->state[4]; + f = context->state[5]; + g = context->state[6]; + h = context->state[7]; + + j = 0; + do + { + W256[j] = (uint32) data[3] | ((uint32) data[2] << 8) | + ((uint32) data[1] << 16) | ((uint32) data[0] << 24); + data += 4; + /* Apply the SHA-256 compression function to update a..h */ + T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + W256[j]; + T2 = Sigma0_256(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 16); + + do + { + /* Part of the message block expansion: */ + s0 = W256[(j + 1) & 0x0f]; + s0 = sigma0_256(s0); + s1 = W256[(j + 14) & 0x0f]; + s1 = sigma1_256(s1); + + /* Apply the SHA-256 compression function to update a..h */ + T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + + (W256[j & 0x0f] += s1 + W256[(j + 9) & 0x0f] + s0); + T2 = Sigma0_256(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 64); + + /* Compute the current intermediate hash value */ + context->state[0] += a; + context->state[1] += b; + context->state[2] += c; + context->state[3] += d; + context->state[4] += e; + context->state[5] += f; + context->state[6] += g; + context->state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = T2 = 0; +} +#endif /* SHA2_UNROLL_TRANSFORM */ + +void +pg_sha256_update(pg_sha256_ctx *context, const uint8 *data, size_t len) +{ + size_t freespace, + usedspace; + + /* Calling with no data is valid (we do nothing) */ + if (len == 0) + return; + + usedspace = (context->bitcount >> 3) % PG_SHA256_BLOCK_LENGTH; + if (usedspace > 0) + { + /* Calculate how much free space is available in the buffer */ + freespace = PG_SHA256_BLOCK_LENGTH - usedspace; + + if (len >= freespace) + { + /* Fill the buffer completely and process it */ + memcpy(&context->buffer[usedspace], data, freespace); + context->bitcount += freespace << 3; + len -= freespace; + data += freespace; + SHA256_Transform(context, context->buffer); + } + else + { + /* The buffer is not yet full */ + memcpy(&context->buffer[usedspace], data, len); + context->bitcount += len << 3; + /* Clean up: */ + usedspace = freespace = 0; + return; + } + } + while (len >= PG_SHA256_BLOCK_LENGTH) + { + /* Process as many complete blocks as we can */ + SHA256_Transform(context, data); + context->bitcount += PG_SHA256_BLOCK_LENGTH << 3; + len -= PG_SHA256_BLOCK_LENGTH; + data += PG_SHA256_BLOCK_LENGTH; + } + if (len > 0) + { + /* There's left-overs, so save 'em */ + memcpy(context->buffer, data, len); + context->bitcount += len << 3; + } + /* Clean up: */ + usedspace = freespace = 0; +} + +static void +SHA256_Last(pg_sha256_ctx *context) +{ + unsigned int usedspace; + + usedspace = (context->bitcount >> 3) % PG_SHA256_BLOCK_LENGTH; +#ifndef WORDS_BIGENDIAN + /* Convert FROM host byte order */ + REVERSE64(context->bitcount, context->bitcount); +#endif + if (usedspace > 0) + { + /* Begin padding with a 1 bit: */ + context->buffer[usedspace++] = 0x80; + + if (usedspace <= PG_SHA256_SHORT_BLOCK_LENGTH) + { + /* Set-up for the last transform: */ + memset(&context->buffer[usedspace], 0, PG_SHA256_SHORT_BLOCK_LENGTH - usedspace); + } + else + { + if (usedspace < PG_SHA256_BLOCK_LENGTH) + { + memset(&context->buffer[usedspace], 0, PG_SHA256_BLOCK_LENGTH - usedspace); + } + /* Do second-to-last transform: */ + SHA256_Transform(context, context->buffer); + + /* And set-up for the last transform: */ + memset(context->buffer, 0, PG_SHA256_SHORT_BLOCK_LENGTH); + } + } + else + { + /* Set-up for the last transform: */ + memset(context->buffer, 0, PG_SHA256_SHORT_BLOCK_LENGTH); + + /* Begin padding with a 1 bit: */ + *context->buffer = 0x80; + } + /* Set the bit count: */ + *(uint64 *) &context->buffer[PG_SHA256_SHORT_BLOCK_LENGTH] = context->bitcount; + + /* Final transform: */ + SHA256_Transform(context, context->buffer); +} + +void +pg_sha256_final(pg_sha256_ctx *context, uint8 *digest) +{ + /* If no digest buffer is passed, we don't bother doing this: */ + if (digest != NULL) + { + SHA256_Last(context); + +#ifndef WORDS_BIGENDIAN + { + /* Convert TO host byte order */ + int j; + + for (j = 0; j < 8; j++) + { + REVERSE32(context->state[j], context->state[j]); + } + } +#endif + memcpy(digest, context->state, PG_SHA256_DIGEST_LENGTH); + } + + /* Clean up state data: */ + memset(context, 0, sizeof(pg_sha256_ctx)); +} + + +/*** SHA-512: *********************************************************/ +void +pg_sha512_init(pg_sha512_ctx *context) +{ + if (context == NULL) + return; + memcpy(context->state, sha512_initial_hash_value, PG_SHA512_DIGEST_LENGTH); + memset(context->buffer, 0, PG_SHA512_BLOCK_LENGTH); + context->bitcount[0] = context->bitcount[1] = 0; +} + +#ifdef SHA2_UNROLL_TRANSFORM + +/* Unrolled SHA-512 round macros: */ + +#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h) do { \ + W512[j] = (uint64)data[7] | ((uint64)data[6] << 8) | \ + ((uint64)data[5] << 16) | ((uint64)data[4] << 24) | \ + ((uint64)data[3] << 32) | ((uint64)data[2] << 40) | \ + ((uint64)data[1] << 48) | ((uint64)data[0] << 56); \ + data += 8; \ + T1 = (h) + Sigma1_512((e)) + Ch((e), (f), (g)) + K512[j] + W512[j]; \ + (d) += T1; \ + (h) = T1 + Sigma0_512((a)) + Maj((a), (b), (c)); \ + j++; \ +} while(0) + + +#define ROUND512(a,b,c,d,e,f,g,h) do { \ + s0 = W512[(j+1)&0x0f]; \ + s0 = sigma0_512(s0); \ + s1 = W512[(j+14)&0x0f]; \ + s1 = sigma1_512(s1); \ + T1 = (h) + Sigma1_512((e)) + Ch((e), (f), (g)) + K512[j] + \ + (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0); \ + (d) += T1; \ + (h) = T1 + Sigma0_512((a)) + Maj((a), (b), (c)); \ + j++; \ +} while(0) + +static void +SHA512_Transform(pg_sha512_ctx *context, const uint8 *data) +{ + uint64 a, + b, + c, + d, + e, + f, + g, + h, + s0, + s1; + uint64 T1, + *W512 = (uint64 *) context->buffer; + int j; + + /* Initialize registers with the prev. intermediate value */ + a = context->state[0]; + b = context->state[1]; + c = context->state[2]; + d = context->state[3]; + e = context->state[4]; + f = context->state[5]; + g = context->state[6]; + h = context->state[7]; + + j = 0; + do + { + ROUND512_0_TO_15(a, b, c, d, e, f, g, h); + ROUND512_0_TO_15(h, a, b, c, d, e, f, g); + ROUND512_0_TO_15(g, h, a, b, c, d, e, f); + ROUND512_0_TO_15(f, g, h, a, b, c, d, e); + ROUND512_0_TO_15(e, f, g, h, a, b, c, d); + ROUND512_0_TO_15(d, e, f, g, h, a, b, c); + ROUND512_0_TO_15(c, d, e, f, g, h, a, b); + ROUND512_0_TO_15(b, c, d, e, f, g, h, a); + } while (j < 16); + + /* Now for the remaining rounds up to 79: */ + do + { + ROUND512(a, b, c, d, e, f, g, h); + ROUND512(h, a, b, c, d, e, f, g); + ROUND512(g, h, a, b, c, d, e, f); + ROUND512(f, g, h, a, b, c, d, e); + ROUND512(e, f, g, h, a, b, c, d); + ROUND512(d, e, f, g, h, a, b, c); + ROUND512(c, d, e, f, g, h, a, b); + ROUND512(b, c, d, e, f, g, h, a); + } while (j < 80); + + /* Compute the current intermediate hash value */ + context->state[0] += a; + context->state[1] += b; + context->state[2] += c; + context->state[3] += d; + context->state[4] += e; + context->state[5] += f; + context->state[6] += g; + context->state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = 0; +} +#else /* SHA2_UNROLL_TRANSFORM */ + +static void +SHA512_Transform(pg_sha512_ctx *context, const uint8 *data) +{ + uint64 a, + b, + c, + d, + e, + f, + g, + h, + s0, + s1; + uint64 T1, + T2, + *W512 = (uint64 *) context->buffer; + int j; + + /* Initialize registers with the prev. intermediate value */ + a = context->state[0]; + b = context->state[1]; + c = context->state[2]; + d = context->state[3]; + e = context->state[4]; + f = context->state[5]; + g = context->state[6]; + h = context->state[7]; + + j = 0; + do + { + W512[j] = (uint64) data[7] | ((uint64) data[6] << 8) | + ((uint64) data[5] << 16) | ((uint64) data[4] << 24) | + ((uint64) data[3] << 32) | ((uint64) data[2] << 40) | + ((uint64) data[1] << 48) | ((uint64) data[0] << 56); + data += 8; + /* Apply the SHA-512 compression function to update a..h */ + T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + W512[j]; + T2 = Sigma0_512(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 16); + + do + { + /* Part of the message block expansion: */ + s0 = W512[(j + 1) & 0x0f]; + s0 = sigma0_512(s0); + s1 = W512[(j + 14) & 0x0f]; + s1 = sigma1_512(s1); + + /* Apply the SHA-512 compression function to update a..h */ + T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + + (W512[j & 0x0f] += s1 + W512[(j + 9) & 0x0f] + s0); + T2 = Sigma0_512(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 80); + + /* Compute the current intermediate hash value */ + context->state[0] += a; + context->state[1] += b; + context->state[2] += c; + context->state[3] += d; + context->state[4] += e; + context->state[5] += f; + context->state[6] += g; + context->state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = T2 = 0; +} +#endif /* SHA2_UNROLL_TRANSFORM */ + +void +pg_sha512_update(pg_sha512_ctx *context, const uint8 *data, size_t len) +{ + size_t freespace, + usedspace; + + /* Calling with no data is valid (we do nothing) */ + if (len == 0) + return; + + usedspace = (context->bitcount[0] >> 3) % PG_SHA512_BLOCK_LENGTH; + if (usedspace > 0) + { + /* Calculate how much free space is available in the buffer */ + freespace = PG_SHA512_BLOCK_LENGTH - usedspace; + + if (len >= freespace) + { + /* Fill the buffer completely and process it */ + memcpy(&context->buffer[usedspace], data, freespace); + ADDINC128(context->bitcount, freespace << 3); + len -= freespace; + data += freespace; + SHA512_Transform(context, context->buffer); + } + else + { + /* The buffer is not yet full */ + memcpy(&context->buffer[usedspace], data, len); + ADDINC128(context->bitcount, len << 3); + /* Clean up: */ + usedspace = freespace = 0; + return; + } + } + while (len >= PG_SHA512_BLOCK_LENGTH) + { + /* Process as many complete blocks as we can */ + SHA512_Transform(context, data); + ADDINC128(context->bitcount, PG_SHA512_BLOCK_LENGTH << 3); + len -= PG_SHA512_BLOCK_LENGTH; + data += PG_SHA512_BLOCK_LENGTH; + } + if (len > 0) + { + /* There's left-overs, so save 'em */ + memcpy(context->buffer, data, len); + ADDINC128(context->bitcount, len << 3); + } + /* Clean up: */ + usedspace = freespace = 0; +} + +static void +SHA512_Last(pg_sha512_ctx *context) +{ + unsigned int usedspace; + + usedspace = (context->bitcount[0] >> 3) % PG_SHA512_BLOCK_LENGTH; +#ifndef WORDS_BIGENDIAN + /* Convert FROM host byte order */ + REVERSE64(context->bitcount[0], context->bitcount[0]); + REVERSE64(context->bitcount[1], context->bitcount[1]); +#endif + if (usedspace > 0) + { + /* Begin padding with a 1 bit: */ + context->buffer[usedspace++] = 0x80; + + if (usedspace <= PG_SHA512_SHORT_BLOCK_LENGTH) + { + /* Set-up for the last transform: */ + memset(&context->buffer[usedspace], 0, PG_SHA512_SHORT_BLOCK_LENGTH - usedspace); + } + else + { + if (usedspace < PG_SHA512_BLOCK_LENGTH) + { + memset(&context->buffer[usedspace], 0, PG_SHA512_BLOCK_LENGTH - usedspace); + } + /* Do second-to-last transform: */ + SHA512_Transform(context, context->buffer); + + /* And set-up for the last transform: */ + memset(context->buffer, 0, PG_SHA512_BLOCK_LENGTH - 2); + } + } + else + { + /* Prepare for final transform: */ + memset(context->buffer, 0, PG_SHA512_SHORT_BLOCK_LENGTH); + + /* Begin padding with a 1 bit: */ + *context->buffer = 0x80; + } + /* Store the length of input data (in bits): */ + *(uint64 *) &context->buffer[PG_SHA512_SHORT_BLOCK_LENGTH] = context->bitcount[1]; + *(uint64 *) &context->buffer[PG_SHA512_SHORT_BLOCK_LENGTH + 8] = context->bitcount[0]; + + /* Final transform: */ + SHA512_Transform(context, context->buffer); +} + +void +pg_sha512_final(pg_sha512_ctx *context, uint8 *digest) +{ + /* If no digest buffer is passed, we don't bother doing this: */ + if (digest != NULL) + { + SHA512_Last(context); + + /* Save the hash data for output: */ +#ifndef WORDS_BIGENDIAN + { + /* Convert TO host byte order */ + int j; + + for (j = 0; j < 8; j++) + { + REVERSE64(context->state[j], context->state[j]); + } + } +#endif + memcpy(digest, context->state, PG_SHA512_DIGEST_LENGTH); + } + + /* Zero out state data */ + memset(context, 0, sizeof(pg_sha512_ctx)); +} + + +/*** SHA-384: *********************************************************/ +void +pg_sha384_init(pg_sha384_ctx *context) +{ + if (context == NULL) + return; + memcpy(context->state, sha384_initial_hash_value, PG_SHA512_DIGEST_LENGTH); + memset(context->buffer, 0, PG_SHA384_BLOCK_LENGTH); + context->bitcount[0] = context->bitcount[1] = 0; +} + +void +pg_sha384_update(pg_sha384_ctx *context, const uint8 *data, size_t len) +{ + pg_sha512_update((pg_sha512_ctx *) context, data, len); +} + +void +pg_sha384_final(pg_sha384_ctx *context, uint8 *digest) +{ + /* If no digest buffer is passed, we don't bother doing this: */ + if (digest != NULL) + { + SHA512_Last((pg_sha512_ctx *) context); + + /* Save the hash data for output: */ +#ifndef WORDS_BIGENDIAN + { + /* Convert TO host byte order */ + int j; + + for (j = 0; j < 6; j++) + { + REVERSE64(context->state[j], context->state[j]); + } + } +#endif + memcpy(digest, context->state, PG_SHA384_DIGEST_LENGTH); + } + + /* Zero out state data */ + memset(context, 0, sizeof(pg_sha384_ctx)); +} + +/*** SHA-224: *********************************************************/ +void +pg_sha224_init(pg_sha224_ctx *context) +{ + if (context == NULL) + return; + memcpy(context->state, sha224_initial_hash_value, PG_SHA256_DIGEST_LENGTH); + memset(context->buffer, 0, PG_SHA256_BLOCK_LENGTH); + context->bitcount = 0; +} + +void +pg_sha224_update(pg_sha224_ctx *context, const uint8 *data, size_t len) +{ + pg_sha256_update((pg_sha256_ctx *) context, data, len); +} + +void +pg_sha224_final(pg_sha224_ctx *context, uint8 *digest) +{ + /* If no digest buffer is passed, we don't bother doing this: */ + if (digest != NULL) + { + SHA256_Last(context); + +#ifndef WORDS_BIGENDIAN + { + /* Convert TO host byte order */ + int j; + + for (j = 0; j < 8; j++) + { + REVERSE32(context->state[j], context->state[j]); + } + } +#endif + memcpy(digest, context->state, PG_SHA224_DIGEST_LENGTH); + } + + /* Clean up state data: */ + memset(context, 0, sizeof(pg_sha224_ctx)); +} diff --git a/src/common/sha2_int.h b/src/common/sha2_int.h new file mode 100644 index 0000000..954e5d7 --- /dev/null +++ b/src/common/sha2_int.h @@ -0,0 +1,91 @@ +/*------------------------------------------------------------------------- + * + * sha2_int.h + * Internal headers for fallback implementation of SHA{224,256,384,512} + * + * Portions Copyright (c) 2016-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/sha2_int.h + * + *------------------------------------------------------------------------- + */ + +/* $OpenBSD: sha2.h,v 1.2 2004/04/28 23:11:57 millert Exp $ */ + +/* + * FILE: sha2.h + * AUTHOR: Aaron D. Gifford <me@aarongifford.com> + * + * Copyright (c) 2000-2001, Aaron D. Gifford + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTOR(S) ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTOR(S) BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $From: sha2.h,v 1.1 2001/11/08 00:02:01 adg Exp adg $ + */ + +#ifndef PG_SHA2_INT_H +#define PG_SHA2_INT_H + +#include "common/sha2.h" + +typedef struct pg_sha256_ctx +{ + uint32 state[8]; + uint64 bitcount; + uint8 buffer[PG_SHA256_BLOCK_LENGTH]; +} pg_sha256_ctx; +typedef struct pg_sha512_ctx +{ + uint64 state[8]; + uint64 bitcount[2]; + uint8 buffer[PG_SHA512_BLOCK_LENGTH]; +} pg_sha512_ctx; +typedef struct pg_sha256_ctx pg_sha224_ctx; +typedef struct pg_sha512_ctx pg_sha384_ctx; + +/* Interface routines for SHA224/256/384/512 */ +extern void pg_sha224_init(pg_sha224_ctx *ctx); +extern void pg_sha224_update(pg_sha224_ctx *ctx, const uint8 *input0, + size_t len); +extern void pg_sha224_final(pg_sha224_ctx *ctx, uint8 *dest); + +extern void pg_sha256_init(pg_sha256_ctx *ctx); +extern void pg_sha256_update(pg_sha256_ctx *ctx, const uint8 *input0, + size_t len); +extern void pg_sha256_final(pg_sha256_ctx *ctx, uint8 *dest); + +extern void pg_sha384_init(pg_sha384_ctx *ctx); +extern void pg_sha384_update(pg_sha384_ctx *ctx, + const uint8 *, size_t len); +extern void pg_sha384_final(pg_sha384_ctx *ctx, uint8 *dest); + +extern void pg_sha512_init(pg_sha512_ctx *ctx); +extern void pg_sha512_update(pg_sha512_ctx *ctx, const uint8 *input0, + size_t len); +extern void pg_sha512_final(pg_sha512_ctx *ctx, uint8 *dest); + +#endif /* PG_SHA2_INT_H */ diff --git a/src/common/sprompt.c b/src/common/sprompt.c new file mode 100644 index 0000000..201c831 --- /dev/null +++ b/src/common/sprompt.c @@ -0,0 +1,181 @@ +/*------------------------------------------------------------------------- + * + * sprompt.c + * simple_prompt() routine + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/sprompt.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "common/fe_memutils.h" +#include "common/string.h" + +#ifdef HAVE_TERMIOS_H +#include <termios.h> +#endif + + +/* + * simple_prompt + * + * Generalized function especially intended for reading in usernames and + * passwords interactively. Reads from /dev/tty or stdin/stderr. + * + * prompt: The prompt to print, or NULL if none (automatically localized) + * echo: Set to false if you want to hide what is entered (for passwords) + * + * The input (without trailing newline) is returned as a malloc'd string. + * Caller is responsible for freeing it when done. + */ +char * +simple_prompt(const char *prompt, bool echo) +{ + return simple_prompt_extended(prompt, echo, NULL); +} + +/* + * simple_prompt_extended + * + * This is the same as simple_prompt(), except that prompt_ctx can + * optionally be provided to allow this function to be canceled via an + * existing SIGINT signal handler that will longjmp to the specified place + * only when *(prompt_ctx->enabled) is true. If canceled, this function + * returns an empty string, and prompt_ctx->canceled is set to true. + */ +char * +simple_prompt_extended(const char *prompt, bool echo, + PromptInterruptContext *prompt_ctx) +{ + char *result; + FILE *termin, + *termout; +#if defined(HAVE_TERMIOS_H) + struct termios t_orig, + t; +#elif defined(WIN32) + HANDLE t = NULL; + DWORD t_orig = 0; +#endif + +#ifdef WIN32 + + /* + * A Windows console has an "input code page" and an "output code page"; + * these usually match each other, but they rarely match the "Windows ANSI + * code page" defined at system boot and expected of "char *" arguments to + * Windows API functions. The Microsoft CRT write() implementation + * automatically converts text between these code pages when writing to a + * console. To identify such file descriptors, it calls GetConsoleMode() + * on the underlying HANDLE, which in turn requires GENERIC_READ access on + * the HANDLE. Opening termout in mode "w+" allows that detection to + * succeed. Otherwise, write() would not recognize the descriptor as a + * console, and non-ASCII characters would display incorrectly. + * + * XXX fgets() still receives text in the console's input code page. This + * makes non-ASCII credentials unportable. + * + * Unintuitively, we also open termin in mode "w+", even though we only + * read it; that's needed for SetConsoleMode() to succeed. + */ + termin = fopen("CONIN$", "w+"); + termout = fopen("CONOUT$", "w+"); +#else + + /* + * Do not try to collapse these into one "w+" mode file. Doesn't work on + * some platforms (eg, HPUX 10.20). + */ + termin = fopen("/dev/tty", "r"); + termout = fopen("/dev/tty", "w"); +#endif + if (!termin || !termout +#ifdef WIN32 + + /* + * Direct console I/O does not work from the MSYS 1.0.10 console. Writes + * reach nowhere user-visible; reads block indefinitely. XXX This affects + * most Windows terminal environments, including rxvt, mintty, Cygwin + * xterm, Cygwin sshd, and PowerShell ISE. Switch to a more-generic test. + */ + || (getenv("OSTYPE") && strcmp(getenv("OSTYPE"), "msys") == 0) +#endif + ) + { + if (termin) + fclose(termin); + if (termout) + fclose(termout); + termin = stdin; + termout = stderr; + } + + if (!echo) + { +#if defined(HAVE_TERMIOS_H) + /* disable echo via tcgetattr/tcsetattr */ + tcgetattr(fileno(termin), &t); + t_orig = t; + t.c_lflag &= ~ECHO; + tcsetattr(fileno(termin), TCSAFLUSH, &t); +#elif defined(WIN32) + /* need the file's HANDLE to turn echo off */ + t = (HANDLE) _get_osfhandle(_fileno(termin)); + + /* save the old configuration first */ + GetConsoleMode(t, &t_orig); + + /* set to the new mode */ + SetConsoleMode(t, ENABLE_LINE_INPUT | ENABLE_PROCESSED_INPUT); +#endif + } + + if (prompt) + { + fputs(_(prompt), termout); + fflush(termout); + } + + result = pg_get_line(termin, prompt_ctx); + + /* If we failed to read anything, just return an empty string */ + if (result == NULL) + result = pg_strdup(""); + + /* strip trailing newline, including \r in case we're on Windows */ + (void) pg_strip_crlf(result); + + if (!echo) + { + /* restore previous echo behavior, then echo \n */ +#if defined(HAVE_TERMIOS_H) + tcsetattr(fileno(termin), TCSAFLUSH, &t_orig); + fputs("\n", termout); + fflush(termout); +#elif defined(WIN32) + SetConsoleMode(t, t_orig); + fputs("\n", termout); + fflush(termout); +#endif + } + else if (prompt_ctx && prompt_ctx->canceled) + { + /* also echo \n if prompt was canceled */ + fputs("\n", termout); + fflush(termout); + } + + if (termin != stdin) + { + fclose(termin); + fclose(termout); + } + + return result; +} diff --git a/src/common/string.c b/src/common/string.c new file mode 100644 index 0000000..de97413 --- /dev/null +++ b/src/common/string.c @@ -0,0 +1,164 @@ +/*------------------------------------------------------------------------- + * + * string.c + * string handling helpers + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/string.c + * + *------------------------------------------------------------------------- + */ + + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/string.h" +#include "lib/stringinfo.h" + + +/* + * Returns whether the string `str' has the postfix `end'. + */ +bool +pg_str_endswith(const char *str, const char *end) +{ + size_t slen = strlen(str); + size_t elen = strlen(end); + + /* can't be a postfix if longer */ + if (elen > slen) + return false; + + /* compare the end of the strings */ + str += slen - elen; + return strcmp(str, end) == 0; +} + + +/* + * strtoint --- just like strtol, but returns int not long + */ +int +strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base) +{ + long val; + + val = strtol(str, endptr, base); + if (val != (int) val) + errno = ERANGE; + return (int) val; +} + + +/* + * pg_clean_ascii -- Replace any non-ASCII chars with a "\xXX" string + * + * Makes a newly allocated copy of the string passed in, which must be + * '\0'-terminated. In the backend, additional alloc_flags may be provided and + * will be passed as-is to palloc_extended(); in the frontend, alloc_flags is + * ignored and the copy is malloc'd. + * + * This function exists specifically to deal with filtering out + * non-ASCII characters in a few places where the client can provide an almost + * arbitrary string (and it isn't checked to ensure it's a valid username or + * database name or similar) and we don't want to have control characters or other + * things ending up in the log file where server admins might end up with a + * messed up terminal when looking at them. + * + * In general, this function should NOT be used- instead, consider how to handle + * the string without needing to filter out the non-ASCII characters. + * + * Ultimately, we'd like to improve the situation to not require replacing all + * non-ASCII but perform more intelligent filtering which would allow UTF or + * similar, but it's unclear exactly what we should allow, so stick to ASCII only + * for now. + */ +char * +pg_clean_ascii(const char *str, int alloc_flags) +{ + size_t dstlen; + char *dst; + const char *p; + size_t i = 0; + + /* Worst case, each byte can become four bytes, plus a null terminator. */ + dstlen = strlen(str) * 4 + 1; + +#ifdef FRONTEND + dst = malloc(dstlen); +#else + dst = palloc_extended(dstlen, alloc_flags); +#endif + + if (!dst) + return NULL; + + for (p = str; *p != '\0'; p++) + { + + /* Only allow clean ASCII chars in the string */ + if (*p < 32 || *p > 126) + { + Assert(i < (dstlen - 3)); + snprintf(&dst[i], dstlen - i, "\\x%02x", (unsigned char) *p); + i += 4; + } + else + { + Assert(i < dstlen); + dst[i] = *p; + i++; + } + } + + Assert(i < dstlen); + dst[i] = '\0'; + return dst; +} + + +/* + * pg_is_ascii -- Check if string is made only of ASCII characters + */ +bool +pg_is_ascii(const char *str) +{ + while (*str) + { + if (IS_HIGHBIT_SET(*str)) + return false; + str++; + } + return true; +} + + +/* + * pg_strip_crlf -- Remove any trailing newline and carriage return + * + * Removes any trailing newline and carriage return characters (\r on + * Windows) in the input string, zero-terminating it. + * + * The passed in string must be zero-terminated. This function returns + * the new length of the string. + */ +int +pg_strip_crlf(char *str) +{ + int len = strlen(str); + + while (len > 0 && (str[len - 1] == '\n' || + str[len - 1] == '\r')) + str[--len] = '\0'; + + return len; +} diff --git a/src/common/stringinfo.c b/src/common/stringinfo.c new file mode 100644 index 0000000..05b22b5 --- /dev/null +++ b/src/common/stringinfo.c @@ -0,0 +1,343 @@ +/*------------------------------------------------------------------------- + * + * stringinfo.c + * + * StringInfo provides an extensible string data type (currently limited to a + * length of 1GB). It can be used to buffer either ordinary C strings + * (null-terminated text) or arbitrary binary data. All storage is allocated + * with palloc() (falling back to malloc in frontend code). + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/common/stringinfo.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND + +#include "postgres.h" +#include "utils/memutils.h" + +#else + +#include "postgres_fe.h" + +/* It's possible we could use a different value for this in frontend code */ +#define MaxAllocSize ((Size) 0x3fffffff) /* 1 gigabyte - 1 */ + +#endif + +#include "lib/stringinfo.h" + + +/* + * makeStringInfo + * + * Create an empty 'StringInfoData' & return a pointer to it. + */ +StringInfo +makeStringInfo(void) +{ + StringInfo res; + + res = (StringInfo) palloc(sizeof(StringInfoData)); + + initStringInfo(res); + + return res; +} + +/* + * initStringInfo + * + * Initialize a StringInfoData struct (with previously undefined contents) + * to describe an empty string. + */ +void +initStringInfo(StringInfo str) +{ + int size = 1024; /* initial default buffer size */ + + str->data = (char *) palloc(size); + str->maxlen = size; + resetStringInfo(str); +} + +/* + * resetStringInfo + * + * Reset the StringInfo: the data buffer remains valid, but its + * previous content, if any, is cleared. + */ +void +resetStringInfo(StringInfo str) +{ + str->data[0] = '\0'; + str->len = 0; + str->cursor = 0; +} + +/* + * appendStringInfo + * + * Format text data under the control of fmt (an sprintf-style format string) + * and append it to whatever is already in str. More space is allocated + * to str if necessary. This is sort of like a combination of sprintf and + * strcat. + */ +void +appendStringInfo(StringInfo str, const char *fmt,...) +{ + int save_errno = errno; + + for (;;) + { + va_list args; + int needed; + + /* Try to format the data. */ + errno = save_errno; + va_start(args, fmt); + needed = appendStringInfoVA(str, fmt, args); + va_end(args); + + if (needed == 0) + break; /* success */ + + /* Increase the buffer size and try again. */ + enlargeStringInfo(str, needed); + } +} + +/* + * appendStringInfoVA + * + * Attempt to format text data under the control of fmt (an sprintf-style + * format string) and append it to whatever is already in str. If successful + * return zero; if not (because there's not enough space), return an estimate + * of the space needed, without modifying str. Typically the caller should + * pass the return value to enlargeStringInfo() before trying again; see + * appendStringInfo for standard usage pattern. + * + * Caution: callers must be sure to preserve their entry-time errno + * when looping, in case the fmt contains "%m". + * + * XXX This API is ugly, but there seems no alternative given the C spec's + * restrictions on what can portably be done with va_list arguments: you have + * to redo va_start before you can rescan the argument list, and we can't do + * that from here. + */ +int +appendStringInfoVA(StringInfo str, const char *fmt, va_list args) +{ + int avail; + size_t nprinted; + + Assert(str != NULL); + + /* + * If there's hardly any space, don't bother trying, just fail to make the + * caller enlarge the buffer first. We have to guess at how much to + * enlarge, since we're skipping the formatting work. + */ + avail = str->maxlen - str->len; + if (avail < 16) + return 32; + + nprinted = pvsnprintf(str->data + str->len, (size_t) avail, fmt, args); + + if (nprinted < (size_t) avail) + { + /* Success. Note nprinted does not include trailing null. */ + str->len += (int) nprinted; + return 0; + } + + /* Restore the trailing null so that str is unmodified. */ + str->data[str->len] = '\0'; + + /* + * Return pvsnprintf's estimate of the space needed. (Although this is + * given as a size_t, we know it will fit in int because it's not more + * than MaxAllocSize.) + */ + return (int) nprinted; +} + +/* + * appendStringInfoString + * + * Append a null-terminated string to str. + * Like appendStringInfo(str, "%s", s) but faster. + */ +void +appendStringInfoString(StringInfo str, const char *s) +{ + appendBinaryStringInfo(str, s, strlen(s)); +} + +/* + * appendStringInfoChar + * + * Append a single byte to str. + * Like appendStringInfo(str, "%c", ch) but much faster. + */ +void +appendStringInfoChar(StringInfo str, char ch) +{ + /* Make more room if needed */ + if (str->len + 1 >= str->maxlen) + enlargeStringInfo(str, 1); + + /* OK, append the character */ + str->data[str->len] = ch; + str->len++; + str->data[str->len] = '\0'; +} + +/* + * appendStringInfoSpaces + * + * Append the specified number of spaces to a buffer. + */ +void +appendStringInfoSpaces(StringInfo str, int count) +{ + if (count > 0) + { + /* Make more room if needed */ + enlargeStringInfo(str, count); + + /* OK, append the spaces */ + memset(&str->data[str->len], ' ', count); + str->len += count; + str->data[str->len] = '\0'; + } +} + +/* + * appendBinaryStringInfo + * + * Append arbitrary binary data to a StringInfo, allocating more space + * if necessary. Ensures that a trailing null byte is present. + */ +void +appendBinaryStringInfo(StringInfo str, const void *data, int datalen) +{ + Assert(str != NULL); + + /* Make more room if needed */ + enlargeStringInfo(str, datalen); + + /* OK, append the data */ + memcpy(str->data + str->len, data, datalen); + str->len += datalen; + + /* + * Keep a trailing null in place, even though it's probably useless for + * binary data. (Some callers are dealing with text but call this because + * their input isn't null-terminated.) + */ + str->data[str->len] = '\0'; +} + +/* + * appendBinaryStringInfoNT + * + * Append arbitrary binary data to a StringInfo, allocating more space + * if necessary. Does not ensure a trailing null-byte exists. + */ +void +appendBinaryStringInfoNT(StringInfo str, const void *data, int datalen) +{ + Assert(str != NULL); + + /* Make more room if needed */ + enlargeStringInfo(str, datalen); + + /* OK, append the data */ + memcpy(str->data + str->len, data, datalen); + str->len += datalen; +} + +/* + * enlargeStringInfo + * + * Make sure there is enough space for 'needed' more bytes + * ('needed' does not include the terminating null). + * + * External callers usually need not concern themselves with this, since + * all stringinfo.c routines do it automatically. However, if a caller + * knows that a StringInfo will eventually become X bytes large, it + * can save some palloc overhead by enlarging the buffer before starting + * to store data in it. + * + * NB: In the backend, because we use repalloc() to enlarge the buffer, the + * string buffer will remain allocated in the same memory context that was + * current when initStringInfo was called, even if another context is now + * current. This is the desired and indeed critical behavior! + */ +void +enlargeStringInfo(StringInfo str, int needed) +{ + int newlen; + + /* + * Guard against out-of-range "needed" values. Without this, we can get + * an overflow or infinite loop in the following. + */ + if (needed < 0) /* should not happen */ + { +#ifndef FRONTEND + elog(ERROR, "invalid string enlargement request size: %d", needed); +#else + fprintf(stderr, "invalid string enlargement request size: %d\n", needed); + exit(EXIT_FAILURE); +#endif + } + if (((Size) needed) >= (MaxAllocSize - (Size) str->len)) + { +#ifndef FRONTEND + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("out of memory"), + errdetail("Cannot enlarge string buffer containing %d bytes by %d more bytes.", + str->len, needed))); +#else + fprintf(stderr, + _("out of memory\n\nCannot enlarge string buffer containing %d bytes by %d more bytes.\n"), + str->len, needed); + exit(EXIT_FAILURE); +#endif + } + + needed += str->len + 1; /* total space required now */ + + /* Because of the above test, we now have needed <= MaxAllocSize */ + + if (needed <= str->maxlen) + return; /* got enough space already */ + + /* + * We don't want to allocate just a little more space with each append; + * for efficiency, double the buffer size each time it overflows. + * Actually, we might need to more than double it if 'needed' is big... + */ + newlen = 2 * str->maxlen; + while (needed > newlen) + newlen = 2 * newlen; + + /* + * Clamp to MaxAllocSize in case we went past it. Note we are assuming + * here that MaxAllocSize <= INT_MAX/2, else the above loop could + * overflow. We will still have newlen >= needed. + */ + if (newlen > (int) MaxAllocSize) + newlen = (int) MaxAllocSize; + + str->data = (char *) repalloc(str->data, newlen); + + str->maxlen = newlen; +} diff --git a/src/common/unicode/.gitignore b/src/common/unicode/.gitignore new file mode 100644 index 0000000..46243f7 --- /dev/null +++ b/src/common/unicode/.gitignore @@ -0,0 +1,9 @@ +/norm_test +/norm_test_table.h + +# Downloaded files +/CompositionExclusions.txt +/DerivedNormalizationProps.txt +/EastAsianWidth.txt +/NormalizationTest.txt +/UnicodeData.txt diff --git a/src/common/unicode/Makefile b/src/common/unicode/Makefile new file mode 100644 index 0000000..382da47 --- /dev/null +++ b/src/common/unicode/Makefile @@ -0,0 +1,72 @@ +#------------------------------------------------------------------------- +# +# Makefile +# Makefile for src/common/unicode +# +# IDENTIFICATION +# src/common/unicode/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/common/unicode +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +override CPPFLAGS := -DFRONTEND -I. $(CPPFLAGS) +LIBS += $(PTHREAD_LIBS) + +# By default, do nothing. +all: + +update-unicode: unicode_norm_table.h unicode_nonspacing_table.h unicode_east_asian_fw_table.h unicode_normprops_table.h unicode_norm_hashfunc.h + mv $^ $(top_srcdir)/src/include/common/ + $(MAKE) normalization-check + +# These files are part of the Unicode Character Database. Download +# them on demand. The dependency on Makefile.global is for +# UNICODE_VERSION. +UnicodeData.txt EastAsianWidth.txt DerivedNormalizationProps.txt CompositionExclusions.txt NormalizationTest.txt: $(top_builddir)/src/Makefile.global + $(DOWNLOAD) https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/$(@F) + +# Generation of conversion tables used for string normalization with +# UTF-8 strings. +unicode_norm_hashfunc.h: unicode_norm_table.h + +unicode_norm_table.h: generate-unicode_norm_table.pl UnicodeData.txt CompositionExclusions.txt + $(PERL) $< + +unicode_nonspacing_table.h: generate-unicode_nonspacing_table.pl UnicodeData.txt + $(PERL) $^ >$@ + +unicode_east_asian_fw_table.h: generate-unicode_east_asian_fw_table.pl EastAsianWidth.txt + $(PERL) $^ >$@ + +unicode_normprops_table.h: generate-unicode_normprops_table.pl DerivedNormalizationProps.txt + $(PERL) $^ >$@ + +# Test suite +normalization-check: norm_test + ./norm_test + +norm_test: norm_test.o ../unicode_norm.o | submake-common + +norm_test.o: norm_test_table.h + +.PHONY: submake-common + +submake-common: + $(MAKE) -C .. all + +norm_test_table.h: generate-norm_test_table.pl NormalizationTest.txt + perl $^ $@ + +.PHONY: normalization-check + + +clean: + rm -f $(OBJS) norm_test norm_test.o + +distclean: clean + rm -f UnicodeData.txt EastAsianWidth.txt CompositionExclusions.txt NormalizationTest.txt norm_test_table.h unicode_norm_table.h + +maintainer-clean: distclean diff --git a/src/common/unicode/README b/src/common/unicode/README new file mode 100644 index 0000000..56956f6 --- /dev/null +++ b/src/common/unicode/README @@ -0,0 +1,28 @@ +This directory contains tools to generate the tables in +src/include/common/unicode_norm.h, used for Unicode normalization. The +generated .h file is included in the source tree, so these are normally not +needed to build PostgreSQL, only if you need to re-generate the .h file +from the Unicode data files for some reason, e.g. to update to a new version +of Unicode. + +Generating unicode_norm_table.h +------------------------------- + +Run + + make update-unicode + +from the top level of the source tree and commit the result. + +Tests +----- + +The Unicode consortium publishes a comprehensive test suite for the +normalization algorithm, in a file called NormalizationTest.txt. This +directory also contains a perl script and some C code, to run our +normalization code with all the test strings in NormalizationTest.txt. +To download NormalizationTest.txt and run the tests: + + make normalization-check + +This is also run as part of the update-unicode target. diff --git a/src/common/unicode/generate-norm_test_table.pl b/src/common/unicode/generate-norm_test_table.pl new file mode 100644 index 0000000..3434f7e --- /dev/null +++ b/src/common/unicode/generate-norm_test_table.pl @@ -0,0 +1,106 @@ +#!/usr/bin/perl +# +# Read Unicode consortium's normalization test suite, NormalizationTest.txt, +# and generate a C array from it, for norm_test.c. +# +# NormalizationTest.txt is part of the Unicode Character Database. +# +# Copyright (c) 2000-2023, PostgreSQL Global Development Group + +use strict; +use warnings; + +use File::Basename; + +die "Usage: $0 INPUT_FILE OUTPUT_FILE\n" if @ARGV != 2; +my $input_file = $ARGV[0]; +my $output_file = $ARGV[1]; +my $output_base = basename($output_file); + +# Open the input and output files +open my $INPUT, '<', $input_file + or die "Could not open input file $input_file: $!"; +open my $OUTPUT, '>', $output_file + or die "Could not open output file $output_file: $!\n"; + +# Print header of output file. +print $OUTPUT <<HEADER; +/*------------------------------------------------------------------------- + * + * norm_test_table.h + * Test strings for Unicode normalization. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/common/unicode/norm_test_table.h + * + *------------------------------------------------------------------------- + */ + +/* + * File auto-generated by src/common/unicode/generate-norm_test_table.pl, do + * not edit. There is deliberately not an #ifndef PG_NORM_TEST_TABLE_H + * here. + */ + +typedef struct +{ + int linenum; + pg_wchar input[50]; + pg_wchar output[4][50]; +} pg_unicode_test; + +/* test table */ +HEADER +print $OUTPUT + "static const pg_unicode_test UnicodeNormalizationTests[] =\n{\n"; + +# Helper routine to convert a space-separated list of Unicode characters to +# hexadecimal list format, suitable for outputting in a C array. +sub codepoint_string_to_hex +{ + my $codepoint_string = shift; + + my $result; + + foreach (split(' ', $codepoint_string)) + { + my $cp = $_; + my $utf8 = "0x$cp, "; + $result .= $utf8; + } + $result .= '0'; # null-terminated the array + return $result; +} + +# Process the input file line by line +my $linenum = 0; +while (my $line = <$INPUT>) +{ + $linenum = $linenum + 1; + if ($line =~ /^\s*#/) { next; } # ignore comments + + if ($line =~ /^@/) { next; } # ignore @Part0 like headers + + # Split the line wanted and get the fields needed: + # + # source; NFC; NFD; NFKC; NFKD + my ($source, $nfc, $nfd, $nfkc, $nfkd) = split(';', $line); + + my $source_utf8 = codepoint_string_to_hex($source); + my $nfc_utf8 = codepoint_string_to_hex($nfc); + my $nfd_utf8 = codepoint_string_to_hex($nfd); + my $nfkc_utf8 = codepoint_string_to_hex($nfkc); + my $nfkd_utf8 = codepoint_string_to_hex($nfkd); + + print $OUTPUT + "\t{ $linenum, { $source_utf8 }, { { $nfc_utf8 }, { $nfd_utf8 }, { $nfkc_utf8 }, { $nfkd_utf8 } } },\n"; +} + +# Output terminator entry +print $OUTPUT "\t{ 0, { 0 }, { { 0 }, { 0 }, { 0 }, { 0 } } }"; +print $OUTPUT "\n};\n"; + +close $OUTPUT; +close $INPUT; diff --git a/src/common/unicode/generate-unicode_east_asian_fw_table.pl b/src/common/unicode/generate-unicode_east_asian_fw_table.pl new file mode 100644 index 0000000..2b2df37 --- /dev/null +++ b/src/common/unicode/generate-unicode_east_asian_fw_table.pl @@ -0,0 +1,76 @@ +#!/usr/bin/perl +# +# Generate a sorted list of non-overlapping intervals of East Asian Wide (W) +# and East Asian Fullwidth (F) characters, using Unicode data files as input. +# Pass EastAsianWidth.txt as argument. The output is on stdout. +# +# Copyright (c) 2019-2023, PostgreSQL Global Development Group + +use strict; +use warnings; + +my $range_start = undef; +my ($first, $last); +my $prev_last; + +print + "/* generated by src/common/unicode/generate-unicode_east_asian_fw_table.pl, do not edit */\n\n"; + +print "static const struct mbinterval east_asian_fw[] = {\n"; + +foreach my $line (<ARGV>) +{ + chomp $line; + $line =~ s/\s*#.*$//; + next if $line eq ''; + my ($codepoint, $width) = split ';', $line; + + if ($codepoint =~ /\.\./) + { + ($first, $last) = split /\.\./, $codepoint; + } + else + { + $first = $last = $codepoint; + } + + ($first, $last) = map(hex, ($first, $last)); + + if ($width eq 'F' || $width eq 'W') + { + # fullwidth/wide characters + if (!defined($range_start)) + { + # save for start of range if one hasn't been started yet + $range_start = $first; + } + elsif ($first != $prev_last + 1) + { + # ranges aren't contiguous; emit the last and start a new one + printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_last; + $range_start = $first; + } + } + else + { + # not wide characters, print out previous range if any + if (defined($range_start)) + { + printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_last; + $range_start = undef; + } + } +} +continue +{ + $prev_last = $last; +} + +# don't forget any ranges at the very end of the database (though there are none +# as of Unicode 13.0) +if (defined($range_start)) +{ + printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_last; +} + +print "};\n"; diff --git a/src/common/unicode/generate-unicode_nonspacing_table.pl b/src/common/unicode/generate-unicode_nonspacing_table.pl new file mode 100644 index 0000000..ae86e82 --- /dev/null +++ b/src/common/unicode/generate-unicode_nonspacing_table.pl @@ -0,0 +1,53 @@ +#!/usr/bin/perl +# +# Generate sorted list of non-overlapping intervals of non-spacing +# characters, using Unicode data files as input. Pass UnicodeData.txt +# as argument. The output is on stdout. +# +# Copyright (c) 2019-2023, PostgreSQL Global Development Group + +use strict; +use warnings; + +my $range_start = undef; +my $codepoint; +my $prev_codepoint; +my $count = 0; + +print + "/* generated by src/common/unicode/generate-unicode_nonspacing_table.pl, do not edit */\n\n"; + +print "static const struct mbinterval nonspacing[] = {\n"; + +foreach my $line (<ARGV>) +{ + chomp $line; + my @fields = split ';', $line; + $codepoint = hex $fields[0]; + + # Me and Mn refer to combining characters + # Cf refers to format characters + if ($fields[2] eq 'Me' || $fields[2] eq 'Mn' || $fields[2] eq 'Cf') + { + # non-spacing character, save for start of range + if (!defined($range_start)) + { + $range_start = $codepoint; + } + } + else + { + # not a non-spacing character, print out previous range if any + if (defined($range_start)) + { + printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint; + $range_start = undef; + } + } +} +continue +{ + $prev_codepoint = $codepoint; +} + +print "};\n"; diff --git a/src/common/unicode/generate-unicode_norm_table.pl b/src/common/unicode/generate-unicode_norm_table.pl new file mode 100644 index 0000000..d591411 --- /dev/null +++ b/src/common/unicode/generate-unicode_norm_table.pl @@ -0,0 +1,412 @@ +#!/usr/bin/perl +# +# Generate a composition table and its lookup utilities, using Unicode data +# files as input. +# +# Input: UnicodeData.txt and CompositionExclusions.txt +# Output: unicode_norm_table.h and unicode_norm_hashfunc.h +# +# Copyright (c) 2000-2023, PostgreSQL Global Development Group + +use strict; +use warnings; +use Getopt::Long; + +use FindBin; +use lib "$FindBin::RealBin/../../tools/"; +use PerfectHash; + +my $output_path = '.'; + +GetOptions('outdir:s' => \$output_path); + +my $output_table_file = "$output_path/unicode_norm_table.h"; +my $output_func_file = "$output_path/unicode_norm_hashfunc.h"; + + +my $FH; + +# Read list of codes that should be excluded from re-composition. +my @composition_exclusion_codes = (); +open($FH, '<', "$output_path/CompositionExclusions.txt") + or die "Could not open $output_path/CompositionExclusions.txt: $!."; +while (my $line = <$FH>) +{ + if ($line =~ /^([[:xdigit:]]+)/) + { + push @composition_exclusion_codes, $1; + } +} +close $FH; + +# Read entries from UnicodeData.txt into a list, and a hash table. We need +# three fields from each row: the codepoint, canonical combining class, +# and character decomposition mapping +my @characters = (); +my %character_hash = (); +open($FH, '<', "$output_path/UnicodeData.txt") + or die "Could not open $output_path/UnicodeData.txt: $!."; +while (my $line = <$FH>) +{ + + # Split the line wanted and get the fields needed: + # - Unicode code value + # - Canonical Combining Class + # - Character Decomposition Mapping + my @elts = split(';', $line); + my $code = $elts[0]; + my $class = $elts[3]; + my $decomp = $elts[5]; + + # Skip codepoints above U+10FFFF. They cannot be represented in 4 bytes + # in UTF-8, and PostgreSQL doesn't support UTF-8 characters longer than + # 4 bytes. (This is just pro forma, as there aren't any such entries in + # the data file, currently.) + next if hex($code) > 0x10FFFF; + + # Skip characters with no decompositions and a class of 0, to reduce the + # table size. + next if $class eq '0' && $decomp eq ''; + + my %char_entry = (code => $code, class => $class, decomp => $decomp); + push(@characters, \%char_entry); + $character_hash{$code} = \%char_entry; +} +close $FH; + +my $num_characters = scalar @characters; + +# Start writing out the output files +open my $OT, '>', $output_table_file + or die "Could not open output file $output_table_file: $!\n"; +open my $OF, '>', $output_func_file + or die "Could not open output file $output_func_file: $!\n"; + +print $OT <<HEADER; +/*------------------------------------------------------------------------- + * + * unicode_norm_table.h + * Composition table used for Unicode normalization + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/common/unicode_norm_table.h + * + *------------------------------------------------------------------------- + */ + +/* + * File auto-generated by src/common/unicode/generate-unicode_norm_table.pl, + * do not edit. There is deliberately not an #ifndef PG_UNICODE_NORM_TABLE_H + * here. + */ +typedef struct +{ + uint32 codepoint; /* Unicode codepoint */ + uint8 comb_class; /* combining class of character */ + uint8 dec_size_flags; /* size and flags of decomposition code list */ + uint16 dec_index; /* index into UnicodeDecomp_codepoints, or the + * decomposition itself if DECOMP_INLINE */ +} pg_unicode_decomposition; + +#define DECOMP_NO_COMPOSE 0x80 /* don't use for re-composition */ +#define DECOMP_INLINE 0x40 /* decomposition is stored inline in + * dec_index */ +#define DECOMP_COMPAT 0x20 /* compatibility mapping */ + +#define DECOMPOSITION_SIZE(x) ((x)->dec_size_flags & 0x1F) +#define DECOMPOSITION_NO_COMPOSE(x) (((x)->dec_size_flags & (DECOMP_NO_COMPOSE | DECOMP_COMPAT)) != 0) +#define DECOMPOSITION_IS_INLINE(x) (((x)->dec_size_flags & DECOMP_INLINE) != 0) +#define DECOMPOSITION_IS_COMPAT(x) (((x)->dec_size_flags & DECOMP_COMPAT) != 0) + +/* Table of Unicode codepoints and their decompositions */ +static const pg_unicode_decomposition UnicodeDecompMain[$num_characters] = +{ +HEADER + +print $OF <<HEADER; +/*------------------------------------------------------------------------- + * + * unicode_norm_hashfunc.h + * Perfect hash functions used for Unicode normalization + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/common/unicode_norm_hashfunc.h + * + *------------------------------------------------------------------------- + */ + +/* + * File auto-generated by src/common/unicode/generate-unicode_norm_table.pl, + * do not edit. There is deliberately not an #ifndef PG_UNICODE_NORM_HASHFUNC_H + * here. + */ + +#include "common/unicode_norm_table.h" + +/* Typedef for perfect hash functions */ +typedef int (*cp_hash_func) (const void *key); + +/* Information for lookups with perfect hash functions */ +typedef struct +{ + const pg_unicode_decomposition *decomps; + cp_hash_func hash; + int num_decomps; +} pg_unicode_decompinfo; + +typedef struct +{ + const uint16 *inverse_lookup; + cp_hash_func hash; + int num_recomps; +} pg_unicode_recompinfo; + +HEADER + +my $decomp_index = 0; +my $decomp_string = ""; +my @dec_cp_packed; +my $main_index = 0; +my @rec_info; + +my $last_code = $characters[-1]->{code}; +foreach my $char (@characters) +{ + my $code = $char->{code}; + my $class = $char->{class}; + my $decomp = $char->{decomp}; + + # Save the code point bytes as a string in network order. + push @dec_cp_packed, pack('N', hex($char->{code})); + + # The character decomposition mapping field in UnicodeData.txt is a list + # of unicode codepoints, separated by space. But it can be prefixed with + # so-called compatibility formatting tag, like "<compat>", or "<font>". + # The entries with compatibility formatting tags should not be used for + # re-composing characters during normalization, so flag them in the table. + # (The tag doesn't matter, only whether there is a tag or not) + my $compat = 0; + if ($decomp =~ /\<.*\>/) + { + $compat = 1; + $decomp =~ s/\<[^][]*\>//g; + } + my @decomp_elts = split(" ", $decomp); + + # Decomposition size + # Print size of decomposition + my $decomp_size = scalar(@decomp_elts); + die if $decomp_size > 0x1F; # to not overrun bitmask + + my $first_decomp = shift @decomp_elts; + + my $flags = ""; + my $comment = ""; + + if ($compat) + { + $flags .= " | DECOMP_COMPAT"; + } + + if ($decomp_size == 2) + { + # Should this be used for recomposition? + if ( $character_hash{$first_decomp} + && $character_hash{$first_decomp}->{class} != 0) + { + $flags .= " | DECOMP_NO_COMPOSE"; + $comment = "non-starter decomposition"; + } + else + { + foreach my $lcode (@composition_exclusion_codes) + { + if ($lcode eq $code) + { + $flags .= " | DECOMP_NO_COMPOSE"; + $comment = "in exclusion list"; + last; + } + } + } + + # Save info for recomposeable codepoints. + # Note that this MUST match the macro DECOMPOSITION_NO_COMPOSE in C + # above! See also the inverse lookup in recompose_code() found in + # src/common/unicode_norm.c. + if (!($flags =~ /DECOMP_COMPAT/ || $flags =~ /DECOMP_NO_COMPOSE/)) + { + push @rec_info, + { + code => $code, + main_index => $main_index, + first => $first_decomp, + second => $decomp_elts[0] + }; + } + } + + if ($decomp_size == 0) + { + print $OT "\t{0x$code, $class, 0$flags, 0}"; + } + elsif ($decomp_size == 1 && length($first_decomp) <= 4) + { + + # The decomposition consists of a single codepoint, and it fits + # in a uint16, so we can store it "inline" in the main table. + $flags .= " | DECOMP_INLINE"; + print $OT "\t{0x$code, $class, 1$flags, 0x$first_decomp}"; + } + else + { + print $OT "\t{0x$code, $class, $decomp_size$flags, $decomp_index}"; + + # Now save the decompositions into a dedicated area that will + # be written afterwards. First build the entry dedicated to + # a sub-table with the code and decomposition. + $decomp_string .= ",\n" if ($decomp_string ne ""); + + $decomp_string .= "\t /* $decomp_index */ 0x$first_decomp"; + foreach (@decomp_elts) + { + $decomp_string .= ", 0x$_"; + } + + $decomp_index = $decomp_index + $decomp_size; + } + + # Print a comma after all items except the last one. + print $OT "," unless ($code eq $last_code); + + print $OT "\t/* $comment */" if ($comment ne ""); + print $OT "\n"; + + $main_index++; +} +print $OT "\n};\n\n"; + +# Print the array of decomposed codes. +print $OT <<HEADER; +/* codepoints array */ +static const uint32 UnicodeDecomp_codepoints[$decomp_index] = +{ +$decomp_string +}; +HEADER + +# Emit the definition of the decomp hash function. +my $dec_funcname = 'Decomp_hash_func'; +my $dec_func = PerfectHash::generate_hash_function(\@dec_cp_packed, + $dec_funcname, fixed_key_length => 4); +print $OF "/* Perfect hash function for decomposition */\n"; +print $OF "static $dec_func\n"; + +# Emit the structure that wraps the hash lookup information into +# one variable. +print $OF <<HEADER; +/* Hash lookup information for decomposition */ +static const pg_unicode_decompinfo UnicodeDecompInfo = +{ + UnicodeDecompMain, + $dec_funcname, + $num_characters +}; + +HEADER + +# Find the lowest codepoint that decomposes to each recomposeable +# code pair and create a mapping to it. +my $recomp_string = ""; +my @rec_cp_packed; +my %seenit; +my $firstentry = 1; +foreach my $rec (sort recomp_sort @rec_info) +{ + # The hash key is formed by concatenating the bytes of the two + # codepoints. See also recompose_code() in common/unicode_norm.c. + my $hashkey = (hex($rec->{first}) << 32) | hex($rec->{second}); + + # We are only interested in the lowest code point that decomposes + # to the given code pair. + next if $seenit{$hashkey}; + + # Save the hash key bytes in network order + push @rec_cp_packed, pack('Q>', $hashkey); + + # Append inverse lookup element + $recomp_string .= ",\n" if !$firstentry; + $recomp_string .= sprintf "\t/* U+%s+%s -> U+%s */ %s", + $rec->{first}, + $rec->{second}, + $rec->{code}, + $rec->{main_index}; + + $seenit{$hashkey} = 1; + $firstentry = 0; +} + +# Emit the inverse lookup array containing indexes into UnicodeDecompMain. +my $num_recomps = scalar @rec_cp_packed; +print $OF <<HEADER; +/* Inverse lookup array -- contains indexes into UnicodeDecompMain[] */ +static const uint16 RecompInverseLookup[$num_recomps] = +{ +$recomp_string +}; + +HEADER + +# Emit the definition of the recomposition hash function. +my $rec_funcname = 'Recomp_hash_func'; +my $rec_func = + PerfectHash::generate_hash_function(\@rec_cp_packed, $rec_funcname, + fixed_key_length => 8); +print $OF "/* Perfect hash function for recomposition */\n"; +print $OF "static $rec_func\n"; + +# Emit the structure that wraps the hash lookup information into +# one variable. +print $OF <<HEADER; +/* Hash lookup information for recomposition */ +static const pg_unicode_recompinfo UnicodeRecompInfo = +{ + RecompInverseLookup, + $rec_funcname, + $num_recomps +}; +HEADER + +close $OT; +close $OF; + +sub recomp_sort +{ + my $a1 = hex($a->{first}); + my $b1 = hex($b->{first}); + + my $a2 = hex($a->{second}); + my $b2 = hex($b->{second}); + + # First sort by the first code point + return -1 if $a1 < $b1; + return 1 if $a1 > $b1; + + # Then sort by the second code point + return -1 if $a2 < $b2; + return 1 if $a2 > $b2; + + # Finally sort by the code point that decomposes into first and + # second ones. + my $acode = hex($a->{code}); + my $bcode = hex($b->{code}); + + return -1 if $acode < $bcode; + return 1 if $acode > $bcode; + + die "found duplicate entries of recomposeable code pairs"; +} diff --git a/src/common/unicode/generate-unicode_normprops_table.pl b/src/common/unicode/generate-unicode_normprops_table.pl new file mode 100644 index 0000000..1b74731 --- /dev/null +++ b/src/common/unicode/generate-unicode_normprops_table.pl @@ -0,0 +1,125 @@ +#!/usr/bin/perl +# +# Generate table of Unicode normalization "quick check" properties +# (see UAX #15). Pass DerivedNormalizationProps.txt as argument. The +# output is on stdout. +# +# Copyright (c) 2020-2023, PostgreSQL Global Development Group + +use strict; +use warnings; + +use FindBin; +use lib "$FindBin::RealBin/../../tools/"; +use PerfectHash; + +my %data; + +print + "/* generated by src/common/unicode/generate-unicode_normprops_table.pl, do not edit */\n\n"; + +print <<EOS; +#include "common/unicode_norm.h" + +/* + * Normalization quick check entry for codepoint. We use a bit field + * here to save space. + */ +typedef struct +{ + unsigned int codepoint:21; + signed int quickcheck:4; /* really UnicodeNormalizationQC */ +} pg_unicode_normprops; + +/* Typedef for hash function on quick check table */ +typedef int (*qc_hash_func) (const void *key); + +/* Information for quick check lookup with perfect hash function */ +typedef struct +{ + const pg_unicode_normprops *normprops; + qc_hash_func hash; + int num_normprops; +} pg_unicode_norminfo; +EOS + +foreach my $line (<ARGV>) +{ + chomp $line; + $line =~ s/\s*#.*$//; + next if $line eq ''; + my ($codepoint, $prop, $value) = split /\s*;\s*/, $line; + next if $prop !~ /_QC/; + + my ($first, $last); + if ($codepoint =~ /\.\./) + { + ($first, $last) = split /\.\./, $codepoint; + } + else + { + $first = $last = $codepoint; + } + + foreach my $cp (hex($first) .. hex($last)) + { + $data{$prop}{$cp} = $value; + } +} + +# We create a separate array for each normalization form rather than, +# say, a two-dimensional array, because that array would be very +# sparse and would create unnecessary overhead especially for the NFC +# lookup. +foreach my $prop (sort keys %data) +{ + # Don't build the tables for the "D" forms because they are too + # big. See also unicode_is_normalized_quickcheck(). + next if $prop eq "NFD_QC" || $prop eq "NFKD_QC"; + + print "\n"; + print + "static const pg_unicode_normprops UnicodeNormProps_${prop}[] = {\n"; + + my %subdata = %{ $data{$prop} }; + my @cp_packed; + foreach my $cp (sort { $a <=> $b } keys %subdata) + { + my $qc; + if ($subdata{$cp} eq 'N') + { + $qc = 'UNICODE_NORM_QC_NO'; + } + elsif ($subdata{$cp} eq 'M') + { + $qc = 'UNICODE_NORM_QC_MAYBE'; + } + else + { + die; + } + printf "\t{0x%04X, %s},\n", $cp, $qc; + + # Save the bytes as a string in network order. + push @cp_packed, pack('N', $cp); + } + + print "};\n"; + + # Emit the definition of the perfect hash function. + my $funcname = $prop . '_hash_func'; + my $f = PerfectHash::generate_hash_function(\@cp_packed, $funcname, + fixed_key_length => 4); + printf "\n/* Perfect hash function for %s */", $prop; + print "\nstatic $f\n"; + + # Emit the structure that wraps the hash lookup information into + # one variable. + printf "/* Hash lookup information for %s */", $prop; + printf "\nstatic const pg_unicode_norminfo "; + printf "UnicodeNormInfo_%s = {\n", $prop; + printf "\tUnicodeNormProps_%s,\n", $prop; + printf "\t%s,\n", $funcname; + printf "\t%d\n", scalar @cp_packed; + printf "};\n"; +} diff --git a/src/common/unicode/meson.build b/src/common/unicode/meson.build new file mode 100644 index 0000000..9033c4a --- /dev/null +++ b/src/common/unicode/meson.build @@ -0,0 +1,111 @@ +# Copyright (c) 2022-2023, PostgreSQL Global Development Group + +UNICODE_VERSION = '15.0.0' + +unicode_data = {} +unicode_baseurl = 'https://www.unicode.org/Public/@0@/ucd/@1@' + +if not wget.found() or not cp.found() + subdir_done() +endif + +# These files are part of the Unicode Character Database. Download them on +# demand. +foreach f : ['UnicodeData.txt', 'EastAsianWidth.txt', 'DerivedNormalizationProps.txt', 'CompositionExclusions.txt', 'NormalizationTest.txt'] + url = unicode_baseurl.format(UNICODE_VERSION, f) + target = custom_target(f, + output: f, + command: [wget, wget_flags, url], + build_by_default: false, + ) + unicode_data += {f: target} +endforeach + + +update_unicode_targets = [] + +update_unicode_targets += \ + custom_target('unicode_norm_table.h', + input: [unicode_data['UnicodeData.txt'], unicode_data['CompositionExclusions.txt']], + output: ['unicode_norm_table.h', 'unicode_norm_hashfunc.h'], + depend_files: perfect_hash_pm, + command: [ + perl, files('generate-unicode_norm_table.pl'), + '--outdir', '@OUTDIR@', '@INPUT@'], + build_by_default: false, + ) + +update_unicode_targets += \ + custom_target('unicode_nonspacing_table.h', + input: [unicode_data['UnicodeData.txt']], + output: ['unicode_nonspacing_table.h'], + depend_files: perfect_hash_pm, + command: [perl, files('generate-unicode_nonspacing_table.pl'), '@INPUT@'], + build_by_default: false, + capture: true, + ) + +update_unicode_targets += \ + custom_target('unicode_east_asian_fw_table.h', + input: [unicode_data['EastAsianWidth.txt']], + output: ['unicode_east_asian_fw_table.h'], + command: [perl, files('generate-unicode_east_asian_fw_table.pl'), '@INPUT@'], + build_by_default: false, + capture: true, + ) + +update_unicode_targets += \ + custom_target('unicode_normprops_table.h', + input: [unicode_data['DerivedNormalizationProps.txt']], + output: ['unicode_normprops_table.h'], + depend_files: perfect_hash_pm, + command: [perl, files('generate-unicode_normprops_table.pl'), '@INPUT@'], + build_by_default: false, + capture: true, + ) + +norm_test_table = custom_target('norm_test_table.h', + input: [unicode_data['NormalizationTest.txt']], + output: ['norm_test_table.h'], + command: [perl, files('generate-norm_test_table.pl'), '@INPUT@', '@OUTPUT@'], + build_by_default: false, + ) + +inc = include_directories('.') + +norm_test = executable('norm_test', + ['norm_test.c', norm_test_table], + dependencies: [frontend_port_code], + include_directories: inc, + link_with: [common_static, pgport_static], + build_by_default: false, + kwargs: default_bin_args + { + 'install': false, + } +) + +update_unicode_dep = [] + +if not meson.is_cross_build() + update_unicode_dep += custom_target('norm_test.run', + output: 'norm_test.run', + input: update_unicode_targets, + command: [norm_test], + build_by_default: false, + build_always_stale: true, + ) +endif + + +# Use a custom target, as run targets serialize the output, making this harder +# to debug, and don't deal well with targets with multiple outputs. +update_unicode = custom_target('update-unicode', + depends: update_unicode_dep, + output: ['dont-exist'], + input: update_unicode_targets, + command: [cp, '@INPUT@', '@SOURCE_ROOT@/src/include/common/'], + build_by_default: false, + build_always_stale: true, +) + +alias_target('update-unicode', update_unicode) diff --git a/src/common/unicode/norm_test.c b/src/common/unicode/norm_test.c new file mode 100644 index 0000000..809a6de --- /dev/null +++ b/src/common/unicode/norm_test.c @@ -0,0 +1,86 @@ +/*------------------------------------------------------------------------- + * norm_test.c + * Program to test Unicode normalization functions. + * + * Portions Copyright (c) 2017-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/unicode/norm_test.c + * + *------------------------------------------------------------------------- + */ +#include "postgres_fe.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "common/unicode_norm.h" + +#include "norm_test_table.h" + +static char * +print_wchar_str(const pg_wchar *s) +{ +#define BUF_DIGITS 50 + static char buf[BUF_DIGITS * 11 + 1]; + int i; + char *p; + + i = 0; + p = buf; + while (*s && i < BUF_DIGITS) + { + p += sprintf(p, "U+%04X ", *s); + i++; + s++; + } + *p = '\0'; + + return buf; +} + +static int +pg_wcscmp(const pg_wchar *s1, const pg_wchar *s2) +{ + for (;;) + { + if (*s1 < *s2) + return -1; + if (*s1 > *s2) + return 1; + if (*s1 == 0) + return 0; + s1++; + s2++; + } +} + +int +main(int argc, char **argv) +{ + const pg_unicode_test *test; + + for (test = UnicodeNormalizationTests; test->input[0] != 0; test++) + { + for (int form = 0; form < 4; form++) + { + pg_wchar *result; + + result = unicode_normalize(form, test->input); + + if (pg_wcscmp(test->output[form], result) != 0) + { + printf("FAILURE (NormalizationTest.txt line %d form %d):\n", test->linenum, form); + printf("input: %s\n", print_wchar_str(test->input)); + printf("expected: %s\n", print_wchar_str(test->output[form])); + printf("got: %s\n", print_wchar_str(result)); + printf("\n"); + exit(1); + } + } + } + + printf("All tests successful!\n"); + exit(0); +} diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c new file mode 100644 index 0000000..8e17cf0 --- /dev/null +++ b/src/common/unicode_norm.c @@ -0,0 +1,634 @@ +/*------------------------------------------------------------------------- + * unicode_norm.c + * Normalize a Unicode string + * + * This implements Unicode normalization, per the documentation at + * https://www.unicode.org/reports/tr15/. + * + * Portions Copyright (c) 2017-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/unicode_norm.c + * + *------------------------------------------------------------------------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/unicode_norm.h" +#ifndef FRONTEND +#include "common/unicode_norm_hashfunc.h" +#include "common/unicode_normprops_table.h" +#include "port/pg_bswap.h" +#else +#include "common/unicode_norm_table.h" +#endif + +#ifndef FRONTEND +#define ALLOC(size) palloc(size) +#define FREE(size) pfree(size) +#else +#define ALLOC(size) malloc(size) +#define FREE(size) free(size) +#endif + +/* Constants for calculations with Hangul characters */ +#define SBASE 0xAC00 /* U+AC00 */ +#define LBASE 0x1100 /* U+1100 */ +#define VBASE 0x1161 /* U+1161 */ +#define TBASE 0x11A7 /* U+11A7 */ +#define LCOUNT 19 +#define VCOUNT 21 +#define TCOUNT 28 +#define NCOUNT VCOUNT * TCOUNT +#define SCOUNT LCOUNT * NCOUNT + +#ifdef FRONTEND +/* comparison routine for bsearch() of decomposition lookup table. */ +static int +conv_compare(const void *p1, const void *p2) +{ + uint32 v1, + v2; + + v1 = *(const uint32 *) p1; + v2 = ((const pg_unicode_decomposition *) p2)->codepoint; + return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1); +} + +#endif + +/* + * get_code_entry + * + * Get the entry corresponding to code in the decomposition lookup table. + * The backend version of this code uses a perfect hash function for the + * lookup, while the frontend version uses a binary search. + */ +static const pg_unicode_decomposition * +get_code_entry(pg_wchar code) +{ +#ifndef FRONTEND + int h; + uint32 hashkey; + pg_unicode_decompinfo decompinfo = UnicodeDecompInfo; + + /* + * Compute the hash function. The hash key is the codepoint with the bytes + * in network order. + */ + hashkey = pg_hton32(code); + h = decompinfo.hash(&hashkey); + + /* An out-of-range result implies no match */ + if (h < 0 || h >= decompinfo.num_decomps) + return NULL; + + /* + * Since it's a perfect hash, we need only match to the specific codepoint + * it identifies. + */ + if (code != decompinfo.decomps[h].codepoint) + return NULL; + + /* Success! */ + return &decompinfo.decomps[h]; +#else + return bsearch(&(code), + UnicodeDecompMain, + lengthof(UnicodeDecompMain), + sizeof(pg_unicode_decomposition), + conv_compare); +#endif +} + +/* + * Get the combining class of the given codepoint. + */ +static uint8 +get_canonical_class(pg_wchar code) +{ + const pg_unicode_decomposition *entry = get_code_entry(code); + + /* + * If no entries are found, the character used is either an Hangul + * character or a character with a class of 0 and no decompositions. + */ + if (!entry) + return 0; + else + return entry->comb_class; +} + +/* + * Given a decomposition entry looked up earlier, get the decomposed + * characters. + * + * Note: the returned pointer can point to statically allocated buffer, and + * is only valid until next call to this function! + */ +static const pg_wchar * +get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size) +{ + static pg_wchar x; + + if (DECOMPOSITION_IS_INLINE(entry)) + { + Assert(DECOMPOSITION_SIZE(entry) == 1); + x = (pg_wchar) entry->dec_index; + *dec_size = 1; + return &x; + } + else + { + *dec_size = DECOMPOSITION_SIZE(entry); + return &UnicodeDecomp_codepoints[entry->dec_index]; + } +} + +/* + * Calculate how many characters a given character will decompose to. + * + * This needs to recurse, if the character decomposes into characters that + * are, in turn, decomposable. + */ +static int +get_decomposed_size(pg_wchar code, bool compat) +{ + const pg_unicode_decomposition *entry; + int size = 0; + int i; + const uint32 *decomp; + int dec_size; + + /* + * Fast path for Hangul characters not stored in tables to save memory as + * decomposition is algorithmic. See + * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details + * on the matter. + */ + if (code >= SBASE && code < SBASE + SCOUNT) + { + uint32 tindex, + sindex; + + sindex = code - SBASE; + tindex = sindex % TCOUNT; + + if (tindex != 0) + return 3; + return 2; + } + + entry = get_code_entry(code); + + /* + * Just count current code if no other decompositions. A NULL entry is + * equivalent to a character with class 0 and no decompositions. + */ + if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 || + (!compat && DECOMPOSITION_IS_COMPAT(entry))) + return 1; + + /* + * If this entry has other decomposition codes look at them as well. First + * get its decomposition in the list of tables available. + */ + decomp = get_code_decomposition(entry, &dec_size); + for (i = 0; i < dec_size; i++) + { + uint32 lcode = decomp[i]; + + size += get_decomposed_size(lcode, compat); + } + + return size; +} + +/* + * Recompose a set of characters. For hangul characters, the calculation + * is algorithmic. For others, an inverse lookup at the decomposition + * table is necessary. Returns true if a recomposition can be done, and + * false otherwise. + */ +static bool +recompose_code(uint32 start, uint32 code, uint32 *result) +{ + /* + * Handle Hangul characters algorithmically, per the Unicode spec. + * + * Check if two current characters are L and V. + */ + if (start >= LBASE && start < LBASE + LCOUNT && + code >= VBASE && code < VBASE + VCOUNT) + { + /* make syllable of form LV */ + uint32 lindex = start - LBASE; + uint32 vindex = code - VBASE; + + *result = SBASE + (lindex * VCOUNT + vindex) * TCOUNT; + return true; + } + /* Check if two current characters are LV and T */ + else if (start >= SBASE && start < (SBASE + SCOUNT) && + ((start - SBASE) % TCOUNT) == 0 && + code >= TBASE && code < (TBASE + TCOUNT)) + { + /* make syllable of form LVT */ + uint32 tindex = code - TBASE; + + *result = start + tindex; + return true; + } + else + { + const pg_unicode_decomposition *entry; + + /* + * Do an inverse lookup of the decomposition tables to see if anything + * matches. The comparison just needs to be a perfect match on the + * sub-table of size two, because the start character has already been + * recomposed partially. This lookup uses a perfect hash function for + * the backend code. + */ +#ifndef FRONTEND + + int h, + inv_lookup_index; + uint64 hashkey; + pg_unicode_recompinfo recompinfo = UnicodeRecompInfo; + + /* + * Compute the hash function. The hash key is formed by concatenating + * bytes of the two codepoints in network order. See also + * src/common/unicode/generate-unicode_norm_table.pl. + */ + hashkey = pg_hton64(((uint64) start << 32) | (uint64) code); + h = recompinfo.hash(&hashkey); + + /* An out-of-range result implies no match */ + if (h < 0 || h >= recompinfo.num_recomps) + return false; + + inv_lookup_index = recompinfo.inverse_lookup[h]; + entry = &UnicodeDecompMain[inv_lookup_index]; + + if (start == UnicodeDecomp_codepoints[entry->dec_index] && + code == UnicodeDecomp_codepoints[entry->dec_index + 1]) + { + *result = entry->codepoint; + return true; + } + +#else + + int i; + + for (i = 0; i < lengthof(UnicodeDecompMain); i++) + { + entry = &UnicodeDecompMain[i]; + + if (DECOMPOSITION_SIZE(entry) != 2) + continue; + + if (DECOMPOSITION_NO_COMPOSE(entry)) + continue; + + if (start == UnicodeDecomp_codepoints[entry->dec_index] && + code == UnicodeDecomp_codepoints[entry->dec_index + 1]) + { + *result = entry->codepoint; + return true; + } + } +#endif /* !FRONTEND */ + } + + return false; +} + +/* + * Decompose the given code into the array given by caller. The + * decomposition begins at the position given by caller, saving one + * lookup on the decomposition table. The current position needs to be + * updated here to let the caller know from where to continue filling + * in the array result. + */ +static void +decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current) +{ + const pg_unicode_decomposition *entry; + int i; + const uint32 *decomp; + int dec_size; + + /* + * Fast path for Hangul characters not stored in tables to save memory as + * decomposition is algorithmic. See + * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details + * on the matter. + */ + if (code >= SBASE && code < SBASE + SCOUNT) + { + uint32 l, + v, + tindex, + sindex; + pg_wchar *res = *result; + + sindex = code - SBASE; + l = LBASE + sindex / (VCOUNT * TCOUNT); + v = VBASE + (sindex % (VCOUNT * TCOUNT)) / TCOUNT; + tindex = sindex % TCOUNT; + + res[*current] = l; + (*current)++; + res[*current] = v; + (*current)++; + + if (tindex != 0) + { + res[*current] = TBASE + tindex; + (*current)++; + } + + return; + } + + entry = get_code_entry(code); + + /* + * Just fill in with the current decomposition if there are no + * decomposition codes to recurse to. A NULL entry is equivalent to a + * character with class 0 and no decompositions, so just leave also in + * this case. + */ + if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 || + (!compat && DECOMPOSITION_IS_COMPAT(entry))) + { + pg_wchar *res = *result; + + res[*current] = code; + (*current)++; + return; + } + + /* + * If this entry has other decomposition codes look at them as well. + */ + decomp = get_code_decomposition(entry, &dec_size); + for (i = 0; i < dec_size; i++) + { + pg_wchar lcode = (pg_wchar) decomp[i]; + + /* Leave if no more decompositions */ + decompose_code(lcode, compat, result, current); + } +} + +/* + * unicode_normalize - Normalize a Unicode string to the specified form. + * + * The input is a 0-terminated array of codepoints. + * + * In frontend, returns a 0-terminated array of codepoints, allocated with + * malloc. Or NULL if we run out of memory. In backend, the returned + * string is palloc'd instead, and OOM is reported with ereport(). + */ +pg_wchar * +unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input) +{ + bool compat = (form == UNICODE_NFKC || form == UNICODE_NFKD); + bool recompose = (form == UNICODE_NFC || form == UNICODE_NFKC); + pg_wchar *decomp_chars; + pg_wchar *recomp_chars; + int decomp_size, + current_size; + int count; + const pg_wchar *p; + + /* variables for recomposition */ + int last_class; + int starter_pos; + int target_pos; + uint32 starter_ch; + + /* First, do character decomposition */ + + /* + * Calculate how many characters long the decomposed version will be. + */ + decomp_size = 0; + for (p = input; *p; p++) + decomp_size += get_decomposed_size(*p, compat); + + decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar)); + if (decomp_chars == NULL) + return NULL; + + /* + * Now fill in each entry recursively. This needs a second pass on the + * decomposition table. + */ + current_size = 0; + for (p = input; *p; p++) + decompose_code(*p, compat, &decomp_chars, ¤t_size); + decomp_chars[decomp_size] = '\0'; + Assert(decomp_size == current_size); + + /* Leave if there is nothing to decompose */ + if (decomp_size == 0) + return decomp_chars; + + /* + * Now apply canonical ordering. + */ + for (count = 1; count < decomp_size; count++) + { + pg_wchar prev = decomp_chars[count - 1]; + pg_wchar next = decomp_chars[count]; + pg_wchar tmp; + const uint8 prevClass = get_canonical_class(prev); + const uint8 nextClass = get_canonical_class(next); + + /* + * Per Unicode (https://www.unicode.org/reports/tr15/tr15-18.html) + * annex 4, a sequence of two adjacent characters in a string is an + * exchangeable pair if the combining class (from the Unicode + * Character Database) for the first character is greater than the + * combining class for the second, and the second is not a starter. A + * character is a starter if its combining class is 0. + */ + if (prevClass == 0 || nextClass == 0) + continue; + + if (prevClass <= nextClass) + continue; + + /* exchange can happen */ + tmp = decomp_chars[count - 1]; + decomp_chars[count - 1] = decomp_chars[count]; + decomp_chars[count] = tmp; + + /* backtrack to check again */ + if (count > 1) + count -= 2; + } + + if (!recompose) + return decomp_chars; + + /* + * The last phase of NFC and NFKC is the recomposition of the reordered + * Unicode string using combining classes. The recomposed string cannot be + * longer than the decomposed one, so make the allocation of the output + * string based on that assumption. + */ + recomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar)); + if (!recomp_chars) + { + FREE(decomp_chars); + return NULL; + } + + last_class = -1; /* this eliminates a special check */ + starter_pos = 0; + target_pos = 1; + starter_ch = recomp_chars[0] = decomp_chars[0]; + + for (count = 1; count < decomp_size; count++) + { + pg_wchar ch = decomp_chars[count]; + int ch_class = get_canonical_class(ch); + pg_wchar composite; + + if (last_class < ch_class && + recompose_code(starter_ch, ch, &composite)) + { + recomp_chars[starter_pos] = composite; + starter_ch = composite; + } + else if (ch_class == 0) + { + starter_pos = target_pos; + starter_ch = ch; + last_class = -1; + recomp_chars[target_pos++] = ch; + } + else + { + last_class = ch_class; + recomp_chars[target_pos++] = ch; + } + } + recomp_chars[target_pos] = (pg_wchar) '\0'; + + FREE(decomp_chars); + + return recomp_chars; +} + +/* + * Normalization "quick check" algorithm; see + * <http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms> + */ + +/* We only need this in the backend. */ +#ifndef FRONTEND + +static const pg_unicode_normprops * +qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo) +{ + int h; + uint32 hashkey; + + /* + * Compute the hash function. The hash key is the codepoint with the bytes + * in network order. + */ + hashkey = pg_hton32(ch); + h = norminfo->hash(&hashkey); + + /* An out-of-range result implies no match */ + if (h < 0 || h >= norminfo->num_normprops) + return NULL; + + /* + * Since it's a perfect hash, we need only match to the specific codepoint + * it identifies. + */ + if (ch != norminfo->normprops[h].codepoint) + return NULL; + + /* Success! */ + return &norminfo->normprops[h]; +} + +/* + * Look up the normalization quick check character property + */ +static UnicodeNormalizationQC +qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch) +{ + const pg_unicode_normprops *found = NULL; + + switch (form) + { + case UNICODE_NFC: + found = qc_hash_lookup(ch, &UnicodeNormInfo_NFC_QC); + break; + case UNICODE_NFKC: + found = qc_hash_lookup(ch, &UnicodeNormInfo_NFKC_QC); + break; + default: + Assert(false); + break; + } + + if (found) + return found->quickcheck; + else + return UNICODE_NORM_QC_YES; +} + +UnicodeNormalizationQC +unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input) +{ + uint8 lastCanonicalClass = 0; + UnicodeNormalizationQC result = UNICODE_NORM_QC_YES; + + /* + * For the "D" forms, we don't run the quickcheck. We don't include the + * lookup tables for those because they are huge, checking for these + * particular forms is less common, and running the slow path is faster + * for the "D" forms than the "C" forms because you don't need to + * recompose, which is slow. + */ + if (form == UNICODE_NFD || form == UNICODE_NFKD) + return UNICODE_NORM_QC_MAYBE; + + for (const pg_wchar *p = input; *p; p++) + { + pg_wchar ch = *p; + uint8 canonicalClass; + UnicodeNormalizationQC check; + + canonicalClass = get_canonical_class(ch); + if (lastCanonicalClass > canonicalClass && canonicalClass != 0) + return UNICODE_NORM_QC_NO; + + check = qc_is_allowed(form, ch); + if (check == UNICODE_NORM_QC_NO) + return UNICODE_NORM_QC_NO; + else if (check == UNICODE_NORM_QC_MAYBE) + result = UNICODE_NORM_QC_MAYBE; + + lastCanonicalClass = canonicalClass; + } + return result; +} + +#endif /* !FRONTEND */ diff --git a/src/common/username.c b/src/common/username.c new file mode 100644 index 0000000..e8ac4c4 --- /dev/null +++ b/src/common/username.c @@ -0,0 +1,87 @@ +/*------------------------------------------------------------------------- + * + * username.c + * get user name + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/username.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <pwd.h> +#include <unistd.h> + +#include "common/username.h" + +/* + * Returns the current user name in a static buffer + * On error, returns NULL and sets *errstr to point to a palloc'd message + */ +const char * +get_user_name(char **errstr) +{ +#ifndef WIN32 + struct passwd *pw; + uid_t user_id = geteuid(); + + *errstr = NULL; + + errno = 0; /* clear errno before call */ + pw = getpwuid(user_id); + if (!pw) + { + *errstr = psprintf(_("could not look up effective user ID %ld: %s"), + (long) user_id, + errno ? strerror(errno) : _("user does not exist")); + return NULL; + } + + return pw->pw_name; +#else + /* Microsoft recommends buffer size of UNLEN+1, where UNLEN = 256 */ + /* "static" variable remains after function exit */ + static char username[256 + 1]; + DWORD len = sizeof(username); + + *errstr = NULL; + + if (!GetUserName(username, &len)) + { + *errstr = psprintf(_("user name lookup failure: error code %lu"), + GetLastError()); + return NULL; + } + + return username; +#endif +} + + +/* + * Returns the current user name in a static buffer or exits + */ +const char * +get_user_name_or_exit(const char *progname) +{ + const char *user_name; + char *errstr; + + user_name = get_user_name(&errstr); + + if (!user_name) + { + fprintf(stderr, "%s: %s\n", progname, errstr); + exit(1); + } + return user_name; +} diff --git a/src/common/wait_error.c b/src/common/wait_error.c new file mode 100644 index 0000000..a90b745 --- /dev/null +++ b/src/common/wait_error.c @@ -0,0 +1,148 @@ +/*------------------------------------------------------------------------- + * + * wait_error.c + * Convert a wait/waitpid(2) result code to a human-readable string + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/wait_error.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <signal.h> +#include <sys/wait.h> + +/* + * Return a human-readable string explaining the reason a child process + * terminated. The argument is a return code returned by wait(2) or + * waitpid(2), which also applies to pclose(3) and system(3). The result is a + * translated, palloc'd or malloc'd string. + */ +char * +wait_result_to_str(int exitstatus) +{ + char str[512]; + + /* + * To simplify using this after pclose() and system(), handle status -1 + * first. In that case, there is no wait result but some error indicated + * by errno. + */ + if (exitstatus == -1) + { + snprintf(str, sizeof(str), "%m"); + } + else if (WIFEXITED(exitstatus)) + { + /* + * Give more specific error message for some common exit codes that + * have a special meaning in shells. + */ + switch (WEXITSTATUS(exitstatus)) + { + case 126: + snprintf(str, sizeof(str), _("command not executable")); + break; + + case 127: + snprintf(str, sizeof(str), _("command not found")); + break; + + default: + snprintf(str, sizeof(str), + _("child process exited with exit code %d"), + WEXITSTATUS(exitstatus)); + } + } + else if (WIFSIGNALED(exitstatus)) + { +#if defined(WIN32) + snprintf(str, sizeof(str), + _("child process was terminated by exception 0x%X"), + WTERMSIG(exitstatus)); +#else + snprintf(str, sizeof(str), + _("child process was terminated by signal %d: %s"), + WTERMSIG(exitstatus), pg_strsignal(WTERMSIG(exitstatus))); +#endif + } + else + snprintf(str, sizeof(str), + _("child process exited with unrecognized status %d"), + exitstatus); + + return pstrdup(str); +} + +/* + * Return true if a wait(2) result indicates that the child process + * died due to the specified signal. + * + * The reason this is worth having a wrapper function for is that + * there are two cases: the signal might have been received by our + * immediate child process, or there might've been a shell process + * between us and the child that died. The shell will, per POSIX, + * report the child death using exit code 128 + signal number. + * + * If there is no possibility of an intermediate shell, this function + * need not (and probably should not) be used. + */ +bool +wait_result_is_signal(int exit_status, int signum) +{ + if (WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum) + return true; + if (WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == 128 + signum) + return true; + return false; +} + +/* + * Return true if a wait(2) result indicates that the child process + * died due to any signal. We consider either direct child death + * or a shell report of child process death as matching the condition. + * + * If include_command_not_found is true, also return true for shell + * exit codes indicating "command not found" and the like + * (specifically, exit codes 126 and 127; see above). + */ +bool +wait_result_is_any_signal(int exit_status, bool include_command_not_found) +{ + if (WIFSIGNALED(exit_status)) + return true; + if (WIFEXITED(exit_status) && + WEXITSTATUS(exit_status) > (include_command_not_found ? 125 : 128)) + return true; + return false; +} + +/* + * Return the shell exit code (normally 0 to 255) that corresponds to the + * given wait status. The argument is a wait status as returned by wait(2) + * or waitpid(2), which also applies to pclose(3) and system(3). To support + * the latter two cases, we pass through "-1" unchanged. + */ +int +wait_result_to_exit_code(int exit_status) +{ + if (exit_status == -1) + return -1; /* failure of pclose() or system() */ + if (WIFEXITED(exit_status)) + return WEXITSTATUS(exit_status); + if (WIFSIGNALED(exit_status)) + return 128 + WTERMSIG(exit_status); + /* On many systems, this is unreachable */ + return -1; +} diff --git a/src/common/wchar.c b/src/common/wchar.c new file mode 100644 index 0000000..fbac11d --- /dev/null +++ b/src/common/wchar.c @@ -0,0 +1,2194 @@ +/*------------------------------------------------------------------------- + * + * wchar.c + * Functions for working with multibyte characters in various encodings. + * + * Portions Copyright (c) 1998-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/wchar.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "mb/pg_wchar.h" +#include "utils/ascii.h" + + +/* + * Operations on multi-byte encodings are driven by a table of helper + * functions. + * + * To add an encoding support, define mblen(), dsplen(), verifychar() and + * verifystr() for the encoding. For server-encodings, also define mb2wchar() + * and wchar2mb() conversion functions. + * + * These functions generally assume that their input is validly formed. + * The "verifier" functions, further down in the file, have to be more + * paranoid. + * + * We expect that mblen() does not need to examine more than the first byte + * of the character to discover the correct length. GB18030 is an exception + * to that rule, though, as it also looks at second byte. But even that + * behaves in a predictable way, if you only pass the first byte: it will + * treat 4-byte encoded characters as two 2-byte encoded characters, which is + * good enough for all current uses. + * + * Note: for the display output of psql to work properly, the return values + * of the dsplen functions must conform to the Unicode standard. In particular + * the NUL character is zero width and control characters are generally + * width -1. It is recommended that non-ASCII encodings refer their ASCII + * subset to the ASCII routines to ensure consistency. + */ + +/* + * SQL/ASCII + */ +static int +pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ + int cnt = 0; + + while (len > 0 && *from) + { + *to++ = *from++; + len--; + cnt++; + } + *to = 0; + return cnt; +} + +static int +pg_ascii_mblen(const unsigned char *s) +{ + return 1; +} + +static int +pg_ascii_dsplen(const unsigned char *s) +{ + if (*s == '\0') + return 0; + if (*s < 0x20 || *s == 0x7f) + return -1; + + return 1; +} + +/* + * EUC + */ +static int +pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ + int cnt = 0; + + while (len > 0 && *from) + { + if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte + * KANA") */ + { + from++; + *to = (SS2 << 8) | *from++; + len -= 2; + } + else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */ + { + from++; + *to = (SS3 << 16) | (*from++ << 8); + *to |= *from++; + len -= 3; + } + else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */ + { + *to = *from++ << 8; + *to |= *from++; + len -= 2; + } + else /* must be ASCII */ + { + *to = *from++; + len--; + } + to++; + cnt++; + } + *to = 0; + return cnt; +} + +static inline int +pg_euc_mblen(const unsigned char *s) +{ + int len; + + if (*s == SS2) + len = 2; + else if (*s == SS3) + len = 3; + else if (IS_HIGHBIT_SET(*s)) + len = 2; + else + len = 1; + return len; +} + +static inline int +pg_euc_dsplen(const unsigned char *s) +{ + int len; + + if (*s == SS2) + len = 2; + else if (*s == SS3) + len = 2; + else if (IS_HIGHBIT_SET(*s)) + len = 2; + else + len = pg_ascii_dsplen(s); + return len; +} + +/* + * EUC_JP + */ +static int +pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ + return pg_euc2wchar_with_len(from, to, len); +} + +static int +pg_eucjp_mblen(const unsigned char *s) +{ + return pg_euc_mblen(s); +} + +static int +pg_eucjp_dsplen(const unsigned char *s) +{ + int len; + + if (*s == SS2) + len = 1; + else if (*s == SS3) + len = 2; + else if (IS_HIGHBIT_SET(*s)) + len = 2; + else + len = pg_ascii_dsplen(s); + return len; +} + +/* + * EUC_KR + */ +static int +pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ + return pg_euc2wchar_with_len(from, to, len); +} + +static int +pg_euckr_mblen(const unsigned char *s) +{ + return pg_euc_mblen(s); +} + +static int +pg_euckr_dsplen(const unsigned char *s) +{ + return pg_euc_dsplen(s); +} + +/* + * EUC_CN + * + */ +static int +pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ + int cnt = 0; + + while (len > 0 && *from) + { + if (*from == SS2 && len >= 3) /* code set 2 (unused?) */ + { + from++; + *to = (SS2 << 16) | (*from++ << 8); + *to |= *from++; + len -= 3; + } + else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */ + { + from++; + *to = (SS3 << 16) | (*from++ << 8); + *to |= *from++; + len -= 3; + } + else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */ + { + *to = *from++ << 8; + *to |= *from++; + len -= 2; + } + else + { + *to = *from++; + len--; + } + to++; + cnt++; + } + *to = 0; + return cnt; +} + +static int +pg_euccn_mblen(const unsigned char *s) +{ + int len; + + if (IS_HIGHBIT_SET(*s)) + len = 2; + else + len = 1; + return len; +} + +static int +pg_euccn_dsplen(const unsigned char *s) +{ + int len; + + if (IS_HIGHBIT_SET(*s)) + len = 2; + else + len = pg_ascii_dsplen(s); + return len; +} + +/* + * EUC_TW + * + */ +static int +pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ + int cnt = 0; + + while (len > 0 && *from) + { + if (*from == SS2 && len >= 4) /* code set 2 */ + { + from++; + *to = (((uint32) SS2) << 24) | (*from++ << 16); + *to |= *from++ << 8; + *to |= *from++; + len -= 4; + } + else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */ + { + from++; + *to = (SS3 << 16) | (*from++ << 8); + *to |= *from++; + len -= 3; + } + else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */ + { + *to = *from++ << 8; + *to |= *from++; + len -= 2; + } + else + { + *to = *from++; + len--; + } + to++; + cnt++; + } + *to = 0; + return cnt; +} + +static int +pg_euctw_mblen(const unsigned char *s) +{ + int len; + + if (*s == SS2) + len = 4; + else if (*s == SS3) + len = 3; + else if (IS_HIGHBIT_SET(*s)) + len = 2; + else + len = 1; + return len; +} + +static int +pg_euctw_dsplen(const unsigned char *s) +{ + int len; + + if (*s == SS2) + len = 2; + else if (*s == SS3) + len = 2; + else if (IS_HIGHBIT_SET(*s)) + len = 2; + else + len = pg_ascii_dsplen(s); + return len; +} + +/* + * Convert pg_wchar to EUC_* encoding. + * caller must allocate enough space for "to", including a trailing zero! + * len: length of from. + * "from" not necessarily null terminated. + */ +static int +pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len) +{ + int cnt = 0; + + while (len > 0 && *from) + { + unsigned char c; + + if ((c = (*from >> 24))) + { + *to++ = c; + *to++ = (*from >> 16) & 0xff; + *to++ = (*from >> 8) & 0xff; + *to++ = *from & 0xff; + cnt += 4; + } + else if ((c = (*from >> 16))) + { + *to++ = c; + *to++ = (*from >> 8) & 0xff; + *to++ = *from & 0xff; + cnt += 3; + } + else if ((c = (*from >> 8))) + { + *to++ = c; + *to++ = *from & 0xff; + cnt += 2; + } + else + { + *to++ = *from; + cnt++; + } + from++; + len--; + } + *to = 0; + return cnt; +} + + +/* + * JOHAB + */ +static int +pg_johab_mblen(const unsigned char *s) +{ + return pg_euc_mblen(s); +} + +static int +pg_johab_dsplen(const unsigned char *s) +{ + return pg_euc_dsplen(s); +} + +/* + * convert UTF8 string to pg_wchar (UCS-4) + * caller must allocate enough space for "to", including a trailing zero! + * len: length of from. + * "from" not necessarily null terminated. + */ +static int +pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ + int cnt = 0; + uint32 c1, + c2, + c3, + c4; + + while (len > 0 && *from) + { + if ((*from & 0x80) == 0) + { + *to = *from++; + len--; + } + else if ((*from & 0xe0) == 0xc0) + { + if (len < 2) + break; /* drop trailing incomplete char */ + c1 = *from++ & 0x1f; + c2 = *from++ & 0x3f; + *to = (c1 << 6) | c2; + len -= 2; + } + else if ((*from & 0xf0) == 0xe0) + { + if (len < 3) + break; /* drop trailing incomplete char */ + c1 = *from++ & 0x0f; + c2 = *from++ & 0x3f; + c3 = *from++ & 0x3f; + *to = (c1 << 12) | (c2 << 6) | c3; + len -= 3; + } + else if ((*from & 0xf8) == 0xf0) + { + if (len < 4) + break; /* drop trailing incomplete char */ + c1 = *from++ & 0x07; + c2 = *from++ & 0x3f; + c3 = *from++ & 0x3f; + c4 = *from++ & 0x3f; + *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4; + len -= 4; + } + else + { + /* treat a bogus char as length 1; not ours to raise error */ + *to = *from++; + len--; + } + to++; + cnt++; + } + *to = 0; + return cnt; +} + + +/* + * Map a Unicode code point to UTF-8. utf8string must have 4 bytes of + * space allocated. + */ +unsigned char * +unicode_to_utf8(pg_wchar c, unsigned char *utf8string) +{ + if (c <= 0x7F) + { + utf8string[0] = c; + } + else if (c <= 0x7FF) + { + utf8string[0] = 0xC0 | ((c >> 6) & 0x1F); + utf8string[1] = 0x80 | (c & 0x3F); + } + else if (c <= 0xFFFF) + { + utf8string[0] = 0xE0 | ((c >> 12) & 0x0F); + utf8string[1] = 0x80 | ((c >> 6) & 0x3F); + utf8string[2] = 0x80 | (c & 0x3F); + } + else + { + utf8string[0] = 0xF0 | ((c >> 18) & 0x07); + utf8string[1] = 0x80 | ((c >> 12) & 0x3F); + utf8string[2] = 0x80 | ((c >> 6) & 0x3F); + utf8string[3] = 0x80 | (c & 0x3F); + } + + return utf8string; +} + +/* + * Trivial conversion from pg_wchar to UTF-8. + * caller should allocate enough space for "to" + * len: length of from. + * "from" not necessarily null terminated. + */ +static int +pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len) +{ + int cnt = 0; + + while (len > 0 && *from) + { + int char_len; + + unicode_to_utf8(*from, to); + char_len = pg_utf_mblen(to); + cnt += char_len; + to += char_len; + from++; + len--; + } + *to = 0; + return cnt; +} + +/* + * Return the byte length of a UTF8 character pointed to by s + * + * Note: in the current implementation we do not support UTF8 sequences + * of more than 4 bytes; hence do NOT return a value larger than 4. + * We return "1" for any leading byte that is either flat-out illegal or + * indicates a length larger than we support. + * + * pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), and perhaps + * other places would need to be fixed to change this. + */ +int +pg_utf_mblen(const unsigned char *s) +{ + int len; + + if ((*s & 0x80) == 0) + len = 1; + else if ((*s & 0xe0) == 0xc0) + len = 2; + else if ((*s & 0xf0) == 0xe0) + len = 3; + else if ((*s & 0xf8) == 0xf0) + len = 4; +#ifdef NOT_USED + else if ((*s & 0xfc) == 0xf8) + len = 5; + else if ((*s & 0xfe) == 0xfc) + len = 6; +#endif + else + len = 1; + return len; +} + +/* + * This is an implementation of wcwidth() and wcswidth() as defined in + * "The Single UNIX Specification, Version 2, The Open Group, 1997" + * <http://www.unix.org/online.html> + * + * Markus Kuhn -- 2001-09-08 -- public domain + * + * customised for PostgreSQL + * + * original available at : http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + */ + +struct mbinterval +{ + unsigned int first; + unsigned int last; +}; + +/* auxiliary function for binary search in interval table */ +static int +mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max) +{ + int min = 0; + int mid; + + if (ucs < table[0].first || ucs > table[max].last) + return 0; + while (max >= min) + { + mid = (min + max) / 2; + if (ucs > table[mid].last) + min = mid + 1; + else if (ucs < table[mid].first) + max = mid - 1; + else + return 1; + } + + return 0; +} + + +/* The following functions define the column width of an ISO 10646 + * character as follows: + * + * - The null character (U+0000) has a column width of 0. + * + * - Other C0/C1 control characters and DEL will lead to a return + * value of -1. + * + * - Non-spacing and enclosing combining characters (general + * category code Mn, Me or Cf in the Unicode database) have a + * column width of 0. + * + * - Spacing characters in the East Asian Wide (W) or East Asian + * FullWidth (F) category as defined in Unicode Technical + * Report #11 have a column width of 2. + * + * - All remaining characters (including all printable + * ISO 8859-1 and WGL4 characters, Unicode control characters, + * etc.) have a column width of 1. + * + * This implementation assumes that wchar_t characters are encoded + * in ISO 10646. + */ + +static int +ucs_wcwidth(pg_wchar ucs) +{ +#include "common/unicode_nonspacing_table.h" +#include "common/unicode_east_asian_fw_table.h" + + /* test for 8-bit control characters */ + if (ucs == 0) + return 0; + + if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff) + return -1; + + /* + * binary search in table of non-spacing characters + * + * XXX: In the official Unicode sources, it is possible for a character to + * be described as both non-spacing and wide at the same time. As of + * Unicode 13.0, treating the non-spacing property as the determining + * factor for display width leads to the correct behavior, so do that + * search first. + */ + if (mbbisearch(ucs, nonspacing, + sizeof(nonspacing) / sizeof(struct mbinterval) - 1)) + return 0; + + /* binary search in table of wide characters */ + if (mbbisearch(ucs, east_asian_fw, + sizeof(east_asian_fw) / sizeof(struct mbinterval) - 1)) + return 2; + + return 1; +} + +/* + * Convert a UTF-8 character to a Unicode code point. + * This is a one-character version of pg_utf2wchar_with_len. + * + * No error checks here, c must point to a long-enough string. + */ +pg_wchar +utf8_to_unicode(const unsigned char *c) +{ + if ((*c & 0x80) == 0) + return (pg_wchar) c[0]; + else if ((*c & 0xe0) == 0xc0) + return (pg_wchar) (((c[0] & 0x1f) << 6) | + (c[1] & 0x3f)); + else if ((*c & 0xf0) == 0xe0) + return (pg_wchar) (((c[0] & 0x0f) << 12) | + ((c[1] & 0x3f) << 6) | + (c[2] & 0x3f)); + else if ((*c & 0xf8) == 0xf0) + return (pg_wchar) (((c[0] & 0x07) << 18) | + ((c[1] & 0x3f) << 12) | + ((c[2] & 0x3f) << 6) | + (c[3] & 0x3f)); + else + /* that is an invalid code on purpose */ + return 0xffffffff; +} + +static int +pg_utf_dsplen(const unsigned char *s) +{ + return ucs_wcwidth(utf8_to_unicode(s)); +} + +/* + * convert mule internal code to pg_wchar + * caller should allocate enough space for "to" + * len: length of from. + * "from" not necessarily null terminated. + */ +static int +pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ + int cnt = 0; + + while (len > 0 && *from) + { + if (IS_LC1(*from) && len >= 2) + { + *to = *from++ << 16; + *to |= *from++; + len -= 2; + } + else if (IS_LCPRV1(*from) && len >= 3) + { + from++; + *to = *from++ << 16; + *to |= *from++; + len -= 3; + } + else if (IS_LC2(*from) && len >= 3) + { + *to = *from++ << 16; + *to |= *from++ << 8; + *to |= *from++; + len -= 3; + } + else if (IS_LCPRV2(*from) && len >= 4) + { + from++; + *to = *from++ << 16; + *to |= *from++ << 8; + *to |= *from++; + len -= 4; + } + else + { /* assume ASCII */ + *to = (unsigned char) *from++; + len--; + } + to++; + cnt++; + } + *to = 0; + return cnt; +} + +/* + * convert pg_wchar to mule internal code + * caller should allocate enough space for "to" + * len: length of from. + * "from" not necessarily null terminated. + */ +static int +pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len) +{ + int cnt = 0; + + while (len > 0 && *from) + { + unsigned char lb; + + lb = (*from >> 16) & 0xff; + if (IS_LC1(lb)) + { + *to++ = lb; + *to++ = *from & 0xff; + cnt += 2; + } + else if (IS_LC2(lb)) + { + *to++ = lb; + *to++ = (*from >> 8) & 0xff; + *to++ = *from & 0xff; + cnt += 3; + } + else if (IS_LCPRV1_A_RANGE(lb)) + { + *to++ = LCPRV1_A; + *to++ = lb; + *to++ = *from & 0xff; + cnt += 3; + } + else if (IS_LCPRV1_B_RANGE(lb)) + { + *to++ = LCPRV1_B; + *to++ = lb; + *to++ = *from & 0xff; + cnt += 3; + } + else if (IS_LCPRV2_A_RANGE(lb)) + { + *to++ = LCPRV2_A; + *to++ = lb; + *to++ = (*from >> 8) & 0xff; + *to++ = *from & 0xff; + cnt += 4; + } + else if (IS_LCPRV2_B_RANGE(lb)) + { + *to++ = LCPRV2_B; + *to++ = lb; + *to++ = (*from >> 8) & 0xff; + *to++ = *from & 0xff; + cnt += 4; + } + else + { + *to++ = *from & 0xff; + cnt += 1; + } + from++; + len--; + } + *to = 0; + return cnt; +} + +/* exported for direct use by conv.c */ +int +pg_mule_mblen(const unsigned char *s) +{ + int len; + + if (IS_LC1(*s)) + len = 2; + else if (IS_LCPRV1(*s)) + len = 3; + else if (IS_LC2(*s)) + len = 3; + else if (IS_LCPRV2(*s)) + len = 4; + else + len = 1; /* assume ASCII */ + return len; +} + +static int +pg_mule_dsplen(const unsigned char *s) +{ + int len; + + /* + * Note: it's not really appropriate to assume that all multibyte charsets + * are double-wide on screen. But this seems an okay approximation for + * the MULE charsets we currently support. + */ + + if (IS_LC1(*s)) + len = 1; + else if (IS_LCPRV1(*s)) + len = 1; + else if (IS_LC2(*s)) + len = 2; + else if (IS_LCPRV2(*s)) + len = 2; + else + len = 1; /* assume ASCII */ + + return len; +} + +/* + * ISO8859-1 + */ +static int +pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ + int cnt = 0; + + while (len > 0 && *from) + { + *to++ = *from++; + len--; + cnt++; + } + *to = 0; + return cnt; +} + +/* + * Trivial conversion from pg_wchar to single byte encoding. Just ignores + * high bits. + * caller should allocate enough space for "to" + * len: length of from. + * "from" not necessarily null terminated. + */ +static int +pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len) +{ + int cnt = 0; + + while (len > 0 && *from) + { + *to++ = *from++; + len--; + cnt++; + } + *to = 0; + return cnt; +} + +static int +pg_latin1_mblen(const unsigned char *s) +{ + return 1; +} + +static int +pg_latin1_dsplen(const unsigned char *s) +{ + return pg_ascii_dsplen(s); +} + +/* + * SJIS + */ +static int +pg_sjis_mblen(const unsigned char *s) +{ + int len; + + if (*s >= 0xa1 && *s <= 0xdf) + len = 1; /* 1 byte kana? */ + else if (IS_HIGHBIT_SET(*s)) + len = 2; /* kanji? */ + else + len = 1; /* should be ASCII */ + return len; +} + +static int +pg_sjis_dsplen(const unsigned char *s) +{ + int len; + + if (*s >= 0xa1 && *s <= 0xdf) + len = 1; /* 1 byte kana? */ + else if (IS_HIGHBIT_SET(*s)) + len = 2; /* kanji? */ + else + len = pg_ascii_dsplen(s); /* should be ASCII */ + return len; +} + +/* + * Big5 + */ +static int +pg_big5_mblen(const unsigned char *s) +{ + int len; + + if (IS_HIGHBIT_SET(*s)) + len = 2; /* kanji? */ + else + len = 1; /* should be ASCII */ + return len; +} + +static int +pg_big5_dsplen(const unsigned char *s) +{ + int len; + + if (IS_HIGHBIT_SET(*s)) + len = 2; /* kanji? */ + else + len = pg_ascii_dsplen(s); /* should be ASCII */ + return len; +} + +/* + * GBK + */ +static int +pg_gbk_mblen(const unsigned char *s) +{ + int len; + + if (IS_HIGHBIT_SET(*s)) + len = 2; /* kanji? */ + else + len = 1; /* should be ASCII */ + return len; +} + +static int +pg_gbk_dsplen(const unsigned char *s) +{ + int len; + + if (IS_HIGHBIT_SET(*s)) + len = 2; /* kanji? */ + else + len = pg_ascii_dsplen(s); /* should be ASCII */ + return len; +} + +/* + * UHC + */ +static int +pg_uhc_mblen(const unsigned char *s) +{ + int len; + + if (IS_HIGHBIT_SET(*s)) + len = 2; /* 2byte? */ + else + len = 1; /* should be ASCII */ + return len; +} + +static int +pg_uhc_dsplen(const unsigned char *s) +{ + int len; + + if (IS_HIGHBIT_SET(*s)) + len = 2; /* 2byte? */ + else + len = pg_ascii_dsplen(s); /* should be ASCII */ + return len; +} + +/* + * GB18030 + * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp> + */ + +/* + * Unlike all other mblen() functions, this also looks at the second byte of + * the input. However, if you only pass the first byte of a multi-byte + * string, and \0 as the second byte, this still works in a predictable way: + * a 4-byte character will be reported as two 2-byte characters. That's + * enough for all current uses, as a client-only encoding. It works that + * way, because in any valid 4-byte GB18030-encoded character, the third and + * fourth byte look like a 2-byte encoded character, when looked at + * separately. + */ +static int +pg_gb18030_mblen(const unsigned char *s) +{ + int len; + + if (!IS_HIGHBIT_SET(*s)) + len = 1; /* ASCII */ + else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39) + len = 4; + else + len = 2; + return len; +} + +static int +pg_gb18030_dsplen(const unsigned char *s) +{ + int len; + + if (IS_HIGHBIT_SET(*s)) + len = 2; + else + len = pg_ascii_dsplen(s); /* ASCII */ + return len; +} + +/* + *------------------------------------------------------------------- + * multibyte sequence validators + * + * The verifychar functions accept "s", a pointer to the first byte of a + * string, and "len", the remaining length of the string. If there is a + * validly encoded character beginning at *s, return its length in bytes; + * else return -1. + * + * The verifystr functions also accept "s", a pointer to a string and "len", + * the length of the string. They verify the whole string, and return the + * number of input bytes (<= len) that are valid. In other words, if the + * whole string is valid, verifystr returns "len", otherwise it returns the + * byte offset of the first invalid character. The verifystr functions must + * test for and reject zeroes in the input. + * + * The verifychar functions can assume that len > 0 and that *s != '\0', but + * they must test for and reject zeroes in any additional bytes of a + * multibyte character. Note that this definition allows the function for a + * single-byte encoding to be just "return 1". + *------------------------------------------------------------------- + */ +static int +pg_ascii_verifychar(const unsigned char *s, int len) +{ + return 1; +} + +static int +pg_ascii_verifystr(const unsigned char *s, int len) +{ + const unsigned char *nullpos = memchr(s, 0, len); + + if (nullpos == NULL) + return len; + else + return nullpos - s; +} + +#define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe) + +static int +pg_eucjp_verifychar(const unsigned char *s, int len) +{ + int l; + unsigned char c1, + c2; + + c1 = *s++; + + switch (c1) + { + case SS2: /* JIS X 0201 */ + l = 2; + if (l > len) + return -1; + c2 = *s++; + if (c2 < 0xa1 || c2 > 0xdf) + return -1; + break; + + case SS3: /* JIS X 0212 */ + l = 3; + if (l > len) + return -1; + c2 = *s++; + if (!IS_EUC_RANGE_VALID(c2)) + return -1; + c2 = *s++; + if (!IS_EUC_RANGE_VALID(c2)) + return -1; + break; + + default: + if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */ + { + l = 2; + if (l > len) + return -1; + if (!IS_EUC_RANGE_VALID(c1)) + return -1; + c2 = *s++; + if (!IS_EUC_RANGE_VALID(c2)) + return -1; + } + else + /* must be ASCII */ + { + l = 1; + } + break; + } + + return l; +} + +static int +pg_eucjp_verifystr(const unsigned char *s, int len) +{ + const unsigned char *start = s; + + while (len > 0) + { + int l; + + /* fast path for ASCII-subset characters */ + if (!IS_HIGHBIT_SET(*s)) + { + if (*s == '\0') + break; + l = 1; + } + else + { + l = pg_eucjp_verifychar(s, len); + if (l == -1) + break; + } + s += l; + len -= l; + } + + return s - start; +} + +static int +pg_euckr_verifychar(const unsigned char *s, int len) +{ + int l; + unsigned char c1, + c2; + + c1 = *s++; + + if (IS_HIGHBIT_SET(c1)) + { + l = 2; + if (l > len) + return -1; + if (!IS_EUC_RANGE_VALID(c1)) + return -1; + c2 = *s++; + if (!IS_EUC_RANGE_VALID(c2)) + return -1; + } + else + /* must be ASCII */ + { + l = 1; + } + + return l; +} + +static int +pg_euckr_verifystr(const unsigned char *s, int len) +{ + const unsigned char *start = s; + + while (len > 0) + { + int l; + + /* fast path for ASCII-subset characters */ + if (!IS_HIGHBIT_SET(*s)) + { + if (*s == '\0') + break; + l = 1; + } + else + { + l = pg_euckr_verifychar(s, len); + if (l == -1) + break; + } + s += l; + len -= l; + } + + return s - start; +} + +/* EUC-CN byte sequences are exactly same as EUC-KR */ +#define pg_euccn_verifychar pg_euckr_verifychar +#define pg_euccn_verifystr pg_euckr_verifystr + +static int +pg_euctw_verifychar(const unsigned char *s, int len) +{ + int l; + unsigned char c1, + c2; + + c1 = *s++; + + switch (c1) + { + case SS2: /* CNS 11643 Plane 1-7 */ + l = 4; + if (l > len) + return -1; + c2 = *s++; + if (c2 < 0xa1 || c2 > 0xa7) + return -1; + c2 = *s++; + if (!IS_EUC_RANGE_VALID(c2)) + return -1; + c2 = *s++; + if (!IS_EUC_RANGE_VALID(c2)) + return -1; + break; + + case SS3: /* unused */ + return -1; + + default: + if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */ + { + l = 2; + if (l > len) + return -1; + /* no further range check on c1? */ + c2 = *s++; + if (!IS_EUC_RANGE_VALID(c2)) + return -1; + } + else + /* must be ASCII */ + { + l = 1; + } + break; + } + return l; +} + +static int +pg_euctw_verifystr(const unsigned char *s, int len) +{ + const unsigned char *start = s; + + while (len > 0) + { + int l; + + /* fast path for ASCII-subset characters */ + if (!IS_HIGHBIT_SET(*s)) + { + if (*s == '\0') + break; + l = 1; + } + else + { + l = pg_euctw_verifychar(s, len); + if (l == -1) + break; + } + s += l; + len -= l; + } + + return s - start; +} + +static int +pg_johab_verifychar(const unsigned char *s, int len) +{ + int l, + mbl; + unsigned char c; + + l = mbl = pg_johab_mblen(s); + + if (len < l) + return -1; + + if (!IS_HIGHBIT_SET(*s)) + return mbl; + + while (--l > 0) + { + c = *++s; + if (!IS_EUC_RANGE_VALID(c)) + return -1; + } + return mbl; +} + +static int +pg_johab_verifystr(const unsigned char *s, int len) +{ + const unsigned char *start = s; + + while (len > 0) + { + int l; + + /* fast path for ASCII-subset characters */ + if (!IS_HIGHBIT_SET(*s)) + { + if (*s == '\0') + break; + l = 1; + } + else + { + l = pg_johab_verifychar(s, len); + if (l == -1) + break; + } + s += l; + len -= l; + } + + return s - start; +} + +static int +pg_mule_verifychar(const unsigned char *s, int len) +{ + int l, + mbl; + unsigned char c; + + l = mbl = pg_mule_mblen(s); + + if (len < l) + return -1; + + while (--l > 0) + { + c = *++s; + if (!IS_HIGHBIT_SET(c)) + return -1; + } + return mbl; +} + +static int +pg_mule_verifystr(const unsigned char *s, int len) +{ + const unsigned char *start = s; + + while (len > 0) + { + int l; + + /* fast path for ASCII-subset characters */ + if (!IS_HIGHBIT_SET(*s)) + { + if (*s == '\0') + break; + l = 1; + } + else + { + l = pg_mule_verifychar(s, len); + if (l == -1) + break; + } + s += l; + len -= l; + } + + return s - start; +} + +static int +pg_latin1_verifychar(const unsigned char *s, int len) +{ + return 1; +} + +static int +pg_latin1_verifystr(const unsigned char *s, int len) +{ + const unsigned char *nullpos = memchr(s, 0, len); + + if (nullpos == NULL) + return len; + else + return nullpos - s; +} + +static int +pg_sjis_verifychar(const unsigned char *s, int len) +{ + int l, + mbl; + unsigned char c1, + c2; + + l = mbl = pg_sjis_mblen(s); + + if (len < l) + return -1; + + if (l == 1) /* pg_sjis_mblen already verified it */ + return mbl; + + c1 = *s++; + c2 = *s; + if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2)) + return -1; + return mbl; +} + +static int +pg_sjis_verifystr(const unsigned char *s, int len) +{ + const unsigned char *start = s; + + while (len > 0) + { + int l; + + /* fast path for ASCII-subset characters */ + if (!IS_HIGHBIT_SET(*s)) + { + if (*s == '\0') + break; + l = 1; + } + else + { + l = pg_sjis_verifychar(s, len); + if (l == -1) + break; + } + s += l; + len -= l; + } + + return s - start; +} + +static int +pg_big5_verifychar(const unsigned char *s, int len) +{ + int l, + mbl; + + l = mbl = pg_big5_mblen(s); + + if (len < l) + return -1; + + while (--l > 0) + { + if (*++s == '\0') + return -1; + } + + return mbl; +} + +static int +pg_big5_verifystr(const unsigned char *s, int len) +{ + const unsigned char *start = s; + + while (len > 0) + { + int l; + + /* fast path for ASCII-subset characters */ + if (!IS_HIGHBIT_SET(*s)) + { + if (*s == '\0') + break; + l = 1; + } + else + { + l = pg_big5_verifychar(s, len); + if (l == -1) + break; + } + s += l; + len -= l; + } + + return s - start; +} + +static int +pg_gbk_verifychar(const unsigned char *s, int len) +{ + int l, + mbl; + + l = mbl = pg_gbk_mblen(s); + + if (len < l) + return -1; + + while (--l > 0) + { + if (*++s == '\0') + return -1; + } + + return mbl; +} + +static int +pg_gbk_verifystr(const unsigned char *s, int len) +{ + const unsigned char *start = s; + + while (len > 0) + { + int l; + + /* fast path for ASCII-subset characters */ + if (!IS_HIGHBIT_SET(*s)) + { + if (*s == '\0') + break; + l = 1; + } + else + { + l = pg_gbk_verifychar(s, len); + if (l == -1) + break; + } + s += l; + len -= l; + } + + return s - start; +} + +static int +pg_uhc_verifychar(const unsigned char *s, int len) +{ + int l, + mbl; + + l = mbl = pg_uhc_mblen(s); + + if (len < l) + return -1; + + while (--l > 0) + { + if (*++s == '\0') + return -1; + } + + return mbl; +} + +static int +pg_uhc_verifystr(const unsigned char *s, int len) +{ + const unsigned char *start = s; + + while (len > 0) + { + int l; + + /* fast path for ASCII-subset characters */ + if (!IS_HIGHBIT_SET(*s)) + { + if (*s == '\0') + break; + l = 1; + } + else + { + l = pg_uhc_verifychar(s, len); + if (l == -1) + break; + } + s += l; + len -= l; + } + + return s - start; +} + +static int +pg_gb18030_verifychar(const unsigned char *s, int len) +{ + int l; + + if (!IS_HIGHBIT_SET(*s)) + l = 1; /* ASCII */ + else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39) + { + /* Should be 4-byte, validate remaining bytes */ + if (*s >= 0x81 && *s <= 0xfe && + *(s + 2) >= 0x81 && *(s + 2) <= 0xfe && + *(s + 3) >= 0x30 && *(s + 3) <= 0x39) + l = 4; + else + l = -1; + } + else if (len >= 2 && *s >= 0x81 && *s <= 0xfe) + { + /* Should be 2-byte, validate */ + if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) || + (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe)) + l = 2; + else + l = -1; + } + else + l = -1; + return l; +} + +static int +pg_gb18030_verifystr(const unsigned char *s, int len) +{ + const unsigned char *start = s; + + while (len > 0) + { + int l; + + /* fast path for ASCII-subset characters */ + if (!IS_HIGHBIT_SET(*s)) + { + if (*s == '\0') + break; + l = 1; + } + else + { + l = pg_gb18030_verifychar(s, len); + if (l == -1) + break; + } + s += l; + len -= l; + } + + return s - start; +} + +static int +pg_utf8_verifychar(const unsigned char *s, int len) +{ + int l; + + if ((*s & 0x80) == 0) + { + if (*s == '\0') + return -1; + return 1; + } + else if ((*s & 0xe0) == 0xc0) + l = 2; + else if ((*s & 0xf0) == 0xe0) + l = 3; + else if ((*s & 0xf8) == 0xf0) + l = 4; + else + l = 1; + + if (l > len) + return -1; + + if (!pg_utf8_islegal(s, l)) + return -1; + + return l; +} + +/* + * The fast path of the UTF-8 verifier uses a deterministic finite automaton + * (DFA) for multibyte characters. In a traditional table-driven DFA, the + * input byte and current state are used to compute an index into an array of + * state transitions. Since the address of the next transition is dependent + * on this computation, there is latency in executing the load instruction, + * and the CPU is not kept busy. + * + * Instead, we use a "shift-based" DFA as described by Per Vognsen: + * + * https://gist.github.com/pervognsen/218ea17743e1442e59bb60d29b1aa725 + * + * In a shift-based DFA, the input byte is an index into array of integers + * whose bit pattern encodes the state transitions. To compute the next + * state, we simply right-shift the integer by the current state and apply a + * mask. In this scheme, the address of the transition only depends on the + * input byte, so there is better pipelining. + * + * The naming convention for states and transitions was adopted from a UTF-8 + * to UTF-16/32 transcoder, whose table is reproduced below: + * + * https://github.com/BobSteagall/utf_utils/blob/6b7a465265de2f5fa6133d653df0c9bdd73bbcf8/src/utf_utils.cpp + * + * ILL ASC CR1 CR2 CR3 L2A L3A L3B L3C L4A L4B L4C CLASS / STATE + * ========================================================================== + * err, END, err, err, err, CS1, P3A, CS2, P3B, P4A, CS3, P4B, | BGN/END + * err, err, err, err, err, err, err, err, err, err, err, err, | ERR + * | + * err, err, END, END, END, err, err, err, err, err, err, err, | CS1 + * err, err, CS1, CS1, CS1, err, err, err, err, err, err, err, | CS2 + * err, err, CS2, CS2, CS2, err, err, err, err, err, err, err, | CS3 + * | + * err, err, err, err, CS1, err, err, err, err, err, err, err, | P3A + * err, err, CS1, CS1, err, err, err, err, err, err, err, err, | P3B + * | + * err, err, err, CS2, CS2, err, err, err, err, err, err, err, | P4A + * err, err, CS2, err, err, err, err, err, err, err, err, err, | P4B + * + * In the most straightforward implementation, a shift-based DFA for UTF-8 + * requires 64-bit integers to encode the transitions, but with an SMT solver + * it's possible to find state numbers such that the transitions fit within + * 32-bit integers, as Dougall Johnson demonstrated: + * + * https://gist.github.com/dougallj/166e326de6ad4cf2c94be97a204c025f + * + * This packed representation is the reason for the seemingly odd choice of + * state values below. + */ + +/* Error */ +#define ERR 0 +/* Begin */ +#define BGN 11 +/* Continuation states, expect 1/2/3 continuation bytes */ +#define CS1 16 +#define CS2 1 +#define CS3 5 +/* Partial states, where the first continuation byte has a restricted range */ +#define P3A 6 /* Lead was E0, check for 3-byte overlong */ +#define P3B 20 /* Lead was ED, check for surrogate */ +#define P4A 25 /* Lead was F0, check for 4-byte overlong */ +#define P4B 30 /* Lead was F4, check for too-large */ +/* Begin and End are the same state */ +#define END BGN + +/* the encoded state transitions for the lookup table */ + +/* ASCII */ +#define ASC (END << BGN) +/* 2-byte lead */ +#define L2A (CS1 << BGN) +/* 3-byte lead */ +#define L3A (P3A << BGN) +#define L3B (CS2 << BGN) +#define L3C (P3B << BGN) +/* 4-byte lead */ +#define L4A (P4A << BGN) +#define L4B (CS3 << BGN) +#define L4C (P4B << BGN) +/* continuation byte */ +#define CR1 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B) +#define CR2 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A) +#define CR3 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A) +/* invalid byte */ +#define ILL ERR + +static const uint32 Utf8Transition[256] = +{ + /* ASCII */ + + ILL, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + + /* continuation bytes */ + + /* 80..8F */ + CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1, + CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1, + + /* 90..9F */ + CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2, + CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2, + + /* A0..BF */ + CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, + CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, + CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, + CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, + + /* leading bytes */ + + /* C0..DF */ + ILL, ILL, L2A, L2A, L2A, L2A, L2A, L2A, + L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, + L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, + L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, + + /* E0..EF */ + L3A, L3B, L3B, L3B, L3B, L3B, L3B, L3B, + L3B, L3B, L3B, L3B, L3B, L3C, L3B, L3B, + + /* F0..FF */ + L4A, L4B, L4B, L4B, L4C, ILL, ILL, ILL, + ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL +}; + +static void +utf8_advance(const unsigned char *s, uint32 *state, int len) +{ + /* Note: We deliberately don't check the state's value here. */ + while (len > 0) + { + /* + * It's important that the mask value is 31: In most instruction sets, + * a shift by a 32-bit operand is understood to be a shift by its mod + * 32, so the compiler should elide the mask operation. + */ + *state = Utf8Transition[*s++] >> (*state & 31); + len--; + } + + *state &= 31; +} + +static int +pg_utf8_verifystr(const unsigned char *s, int len) +{ + const unsigned char *start = s; + const int orig_len = len; + uint32 state = BGN; + +/* + * With a stride of two vector widths, gcc will unroll the loop. Even if + * the compiler can unroll a longer loop, it's not worth it because we + * must fall back to the byte-wise algorithm if we find any non-ASCII. + */ +#define STRIDE_LENGTH (2 * sizeof(Vector8)) + + if (len >= STRIDE_LENGTH) + { + while (len >= STRIDE_LENGTH) + { + /* + * If the chunk is all ASCII, we can skip the full UTF-8 check, + * but we must first check for a non-END state, which means the + * previous chunk ended in the middle of a multibyte sequence. + */ + if (state != END || !is_valid_ascii(s, STRIDE_LENGTH)) + utf8_advance(s, &state, STRIDE_LENGTH); + + s += STRIDE_LENGTH; + len -= STRIDE_LENGTH; + } + + /* The error state persists, so we only need to check for it here. */ + if (state == ERR) + { + /* + * Start over from the beginning with the slow path so we can + * count the valid bytes. + */ + len = orig_len; + s = start; + } + else if (state != END) + { + /* + * The fast path exited in the middle of a multibyte sequence. + * Walk backwards to find the leading byte so that the slow path + * can resume checking from there. We must always backtrack at + * least one byte, since the current byte could be e.g. an ASCII + * byte after a 2-byte lead, which is invalid. + */ + do + { + Assert(s > start); + s--; + len++; + Assert(IS_HIGHBIT_SET(*s)); + } while (pg_utf_mblen(s) <= 1); + } + } + + /* check remaining bytes */ + while (len > 0) + { + int l; + + /* fast path for ASCII-subset characters */ + if (!IS_HIGHBIT_SET(*s)) + { + if (*s == '\0') + break; + l = 1; + } + else + { + l = pg_utf8_verifychar(s, len); + if (l == -1) + break; + } + s += l; + len -= l; + } + + return s - start; +} + +/* + * Check for validity of a single UTF-8 encoded character + * + * This directly implements the rules in RFC3629. The bizarre-looking + * restrictions on the second byte are meant to ensure that there isn't + * more than one encoding of a given Unicode character point; that is, + * you may not use a longer-than-necessary byte sequence with high order + * zero bits to represent a character that would fit in fewer bytes. + * To do otherwise is to create security hazards (eg, create an apparent + * non-ASCII character that decodes to plain ASCII). + * + * length is assumed to have been obtained by pg_utf_mblen(), and the + * caller must have checked that that many bytes are present in the buffer. + */ +bool +pg_utf8_islegal(const unsigned char *source, int length) +{ + unsigned char a; + + switch (length) + { + default: + /* reject lengths 5 and 6 for now */ + return false; + case 4: + a = source[3]; + if (a < 0x80 || a > 0xBF) + return false; + /* FALL THRU */ + case 3: + a = source[2]; + if (a < 0x80 || a > 0xBF) + return false; + /* FALL THRU */ + case 2: + a = source[1]; + switch (*source) + { + case 0xE0: + if (a < 0xA0 || a > 0xBF) + return false; + break; + case 0xED: + if (a < 0x80 || a > 0x9F) + return false; + break; + case 0xF0: + if (a < 0x90 || a > 0xBF) + return false; + break; + case 0xF4: + if (a < 0x80 || a > 0x8F) + return false; + break; + default: + if (a < 0x80 || a > 0xBF) + return false; + break; + } + /* FALL THRU */ + case 1: + a = *source; + if (a >= 0x80 && a < 0xC2) + return false; + if (a > 0xF4) + return false; + break; + } + return true; +} + + +/* + *------------------------------------------------------------------- + * encoding info table + * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h) + *------------------------------------------------------------------- + */ +const pg_wchar_tbl pg_wchar_table[] = { + {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1}, /* PG_SQL_ASCII */ + {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JP */ + {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2}, /* PG_EUC_CN */ + {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3}, /* PG_EUC_KR */ + {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4}, /* PG_EUC_TW */ + {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JIS_2004 */ + {pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifychar, pg_utf8_verifystr, 4}, /* PG_UTF8 */ + {pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifychar, pg_mule_verifystr, 4}, /* PG_MULE_INTERNAL */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN1 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN2 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN3 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN4 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN5 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN6 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN7 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN8 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN9 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_LATIN10 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1256 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1258 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN866 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN874 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_KOI8R */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1251 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1252 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-5 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-6 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-7 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* ISO-8859-8 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1250 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1253 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1254 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1255 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_WIN1257 */ + {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1}, /* PG_KOI8U */ + {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2}, /* PG_SJIS */ + {0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifychar, pg_big5_verifystr, 2}, /* PG_BIG5 */ + {0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifychar, pg_gbk_verifystr, 2}, /* PG_GBK */ + {0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifychar, pg_uhc_verifystr, 2}, /* PG_UHC */ + {0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifychar, pg_gb18030_verifystr, 4}, /* PG_GB18030 */ + {0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifychar, pg_johab_verifystr, 3}, /* PG_JOHAB */ + {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2} /* PG_SHIFT_JIS_2004 */ +}; + +/* + * Returns the byte length of a multibyte character. + * + * Caution: when dealing with text that is not certainly valid in the + * specified encoding, the result may exceed the actual remaining + * string length. Callers that are not prepared to deal with that + * should use pg_encoding_mblen_bounded() instead. + */ +int +pg_encoding_mblen(int encoding, const char *mbstr) +{ + return (PG_VALID_ENCODING(encoding) ? + pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) : + pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr)); +} + +/* + * Returns the byte length of a multibyte character; but not more than + * the distance to end of string. + */ +int +pg_encoding_mblen_bounded(int encoding, const char *mbstr) +{ + return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr)); +} + +/* + * Returns the display length of a multibyte character. + */ +int +pg_encoding_dsplen(int encoding, const char *mbstr) +{ + return (PG_VALID_ENCODING(encoding) ? + pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) : + pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr)); +} + +/* + * Verify the first multibyte character of the given string. + * Return its byte length if good, -1 if bad. (See comments above for + * full details of the mbverifychar API.) + */ +int +pg_encoding_verifymbchar(int encoding, const char *mbstr, int len) +{ + return (PG_VALID_ENCODING(encoding) ? + pg_wchar_table[encoding].mbverifychar((const unsigned char *) mbstr, len) : + pg_wchar_table[PG_SQL_ASCII].mbverifychar((const unsigned char *) mbstr, len)); +} + +/* + * Verify that a string is valid for the given encoding. + * Returns the number of input bytes (<= len) that form a valid string. + * (See comments above for full details of the mbverifystr API.) + */ +int +pg_encoding_verifymbstr(int encoding, const char *mbstr, int len) +{ + return (PG_VALID_ENCODING(encoding) ? + pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len) : + pg_wchar_table[PG_SQL_ASCII].mbverifystr((const unsigned char *) mbstr, len)); +} + +/* + * fetch maximum length of a given encoding + */ +int +pg_encoding_max_length(int encoding) +{ + Assert(PG_VALID_ENCODING(encoding)); + + return pg_wchar_table[encoding].maxmblen; +} |